# Iris Classification

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing

## Iris Classification Dataset

_Input Features:_
sepal_length, sepal_width, petal_length, petal_width

_Target:_
Iris plant class

_Objective:_ Predict iris plant class for a given sepal_length, sepal_width, petal_length, petal_width

**Reference: [Iris Dataset](https://archive.ics.uci.edu/ml/datasets/iris)**

In [None]:
columns = ['encoded_class','sepal_length','sepal_width','petal_length','petal_width']

In [None]:
# Encode Class Labels to integers
le = preprocessing.LabelEncoder() # Encode the string into numeric classes
le.fit(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']) # Iris Names

In [None]:
le.classes_ # data check of the created array

In [None]:
df =pd.read_csv('iris_all.csv')

In [None]:
df['class'].value_counts() # display the count of classes

In [None]:
df.head() # data check

In [None]:
df.tail() # check of end og the file

In [None]:
le.transform(df['class'])[-5:] # transform the dataset

In [None]:
df['encoded_class'] = le.transform(df['class'])

In [None]:
df.head() # see the added encoded_class

## Visualize the Dataset

In [None]:
setosa = df['class'] == 'Iris-setosa'
versicolor = df['class'] == 'Iris-versicolor'
virginica = df['class'] == 'Iris-virginica'

### By Sepal Attributes

In [None]:
plt.scatter(df[setosa].sepal_length,y=df[setosa].sepal_width, label='setosa',color='g')
plt.scatter(df[versicolor].sepal_length,y=df[versicolor].sepal_width, label='versicolor',color='r')
plt.scatter(df[virginica].sepal_length,y=df[virginica].sepal_width, label='virginica',color='b')
plt.xlabel('length')
plt.ylabel('width')
plt.title('Sepal')
plt.grid(True)
plt.legend()
plt.show()

### By Petal Attributes

In [None]:
plt.scatter(df[setosa].petal_length,y=df[setosa].petal_width, label='setosa',color='g')
plt.scatter(df[versicolor].petal_length,y=df[versicolor].petal_width, label='versicolor',color='r')
plt.scatter(df[virginica].petal_length,y=df[virginica].petal_width, label='virginica',color='b')
plt.xlabel('length')
plt.ylabel('width')
plt.title('Petal')
plt.grid(True)
plt.legend()
plt.show()

### Sepal Length vs. Petal Length

In [None]:
plt.scatter(df[setosa].petal_length,y=df[setosa].sepal_length, label='setosa',color='g')
plt.scatter(df[versicolor].petal_length,y=df[versicolor].sepal_length, label='versicolor',color='r')
plt.scatter(df[virginica].petal_length,y=df[virginica].sepal_length, label='virginica',color='b')
plt.xlabel('petal length')
plt.ylabel('sepal length')
plt.title('Petal-Sepal')
plt.grid(True)
plt.legend()
plt.show()

## Training and Validation Set

### Target Variables as First Column Folloed by Input Features:
_class,sepal_length,sepal_width,petal_length,petal_width_

### Training, Validation Files Do Not Have a Column Header

In [None]:
# Training = 70% of the data
# Validation = 30% of data
# randomize the dataset

np.random.seed(378)
L = list(df.index)
np.random.shuffle(L)
df = df.iloc[L]

In [None]:
rows = df.shape[0]
train = int(.7 * rows)
validation = rows - train

In [None]:
rows, train, validation # data check

### Wiriting Training Set, Validation Set

In [None]:
df[:train].to_csv('irisTrain.csv',
                 index=False, header=False,
                 columns=columns)

In [None]:
df[train:].to_csv('irisValidation.csv',
                 index=False, header=False,
                 columns=columns)

### Write the Column List

In [None]:
with open('iris_TrainColumns.txt','w') as f:
    f.write(','.join(columns))