# Setup

In [43]:
# All the needed imports
import pandas as pd 
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix

## Download the data and create a dataframe

In [3]:
# UCI data location
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'type']

iris = pd.read_csv(url, names=names)

## Look at the data for sanity check

In [4]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,type
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Create dummies (technical term) for iris types

Because we want to estimate a categorical value not a numeric one, we need to numericly encode the distinct values of the target variable (iris in this case).  This technique is also referred to as one-hot encoding

Neural Networks perform best on numeric values so we'll convert them here

In [26]:
target = pd.get_dummies (iris['type'])

In [27]:
target.head()

Unnamed: 0,Iris-setosa,Iris-versicolor,Iris-virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


## Seperate the inputs from the full data set

In [68]:
# first four columns of data
inputs = iris.iloc[:, 0:4]


le = preprocessing.LabelEncoder()
le.fit(iris.type)
print(le.classes_)
target = le.transform(iris.type)
print(target[45:52], target.shape)

['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']
[0 0 0 0 0 1 1] (150,)


## Split the data into training and test

Creating a `training` and `validation` (some times called a `test`) set help prevent overfitting of the model. A model that is overfit will not be useful in predicting future behavior, which is the point of this modeling in the first place

In [69]:
input_train, input_test, target_train, target_test = train_test_split(inputs, target, test_size = 0.20)

In [70]:
print(input_train.shape, input_test.shape, target_train.shape, target_test.shape)

(120, 4) (30, 4) (120,) (30,)


## Scale the inputs

We need to scale the inputs to improve model performance

In [71]:
scaler = StandardScaler()
scaler.fit(input_train)

input_train = scaler.transform(input_train)
input_test = scaler.transform(input_test)

## Training the model

In [81]:
mlp = MLPClassifier(hidden_layer_sizes=(10,10), max_iter=1000,verbose=False)
mlp.fit(input_train, target_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10, 10), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [77]:
predictions = mlp.predict(input_test)

In [78]:
print(confusion_matrix(target_test,predictions))
print(classification_report(target_test,predictions))

[[13  0  0]
 [ 0  3  1]
 [ 0  2 11]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.60      0.75      0.67         4
           2       0.92      0.85      0.88        13

   micro avg       0.90      0.90      0.90        30
   macro avg       0.84      0.87      0.85        30
weighted avg       0.91      0.90      0.90        30

