In [133]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import validation_curve
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

In [3]:
# https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html
iris = datasets.load_iris()

In [4]:
#x[sample][feature]
#y[sample_classification]
#Setosa, Versicolor, Virginica
x = iris.data
y = iris.target
#iris

In [5]:
#Turn x into a dataframe for ease
X = pd.DataFrame(x)
X.columns=['sepal_length','sepal_width','petal_length','petal_width']
#X

In [6]:
#Normalize features
X[X.columns] = (X[X.columns] - X[X.columns].mean()) / (X[X.columns].max() - X[X.columns].min())
X = X[X.columns].values
Y=y
#X

In [7]:
#Split the data into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=12)
print(X_train.shape)
print(X_test.shape)

(120, 4)
(30, 4)


In [8]:
#Split the training data into train and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=12)

In [31]:
layer_units = np.arange(1, 10)

train_scores, valid_scores = validation_curve(
  MLPClassifier(solver='lbfgs', activation='logistic', max_iter=5000, verbose=True, random_state=77, learning_rate_init=0.01),
  X, Y, param_name="hidden_layer_sizes", param_range=layer_units
)

train_scores_mean = np.mean(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)

In [32]:
train_df = pd.DataFrame(dict(
    Hidden_Layer_Units = layer_units,
    Accuracy = train_scores_mean
))
train_df['type'] = "Training Score"

valid_df = pd.DataFrame(dict(
    Hidden_Layer_Units = layer_units,
    Accuracy = valid_scores_mean
))
valid_df['type'] = "Cross Validation Score"

acc_df = pd.concat([train_df, valid_df])

In [33]:
px.line(acc_df, x="Hidden_Layer_Units", y="Accuracy", labels={"Hidden_Layer_Units" : "HL Units"}, title="Error as Affected by Number of Units in hidden layer", color="type", markers=True)

In [115]:
# The above graph makes me think that 1 unit in the hidden layer and 6 units in the hidden layer would be interesting to continue to explore

# Model with 1 Unit
learningRate = np.arange(0.5, 1, 0.05)

train_scores, valid_scores = validation_curve(
  MLPClassifier(solver='lbfgs', activation='logistic', max_iter=5000, verbose=True, hidden_layer_sizes=(1,)),
  X, Y, param_name="learning_rate_init", param_range=learningRate
)

train_scores_mean = np.mean(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)

In [116]:
train_df = pd.DataFrame(dict(
    Learning_Rates = learningRate,
    Accuracy = train_scores_mean
))
train_df['type'] = "Training Score"

valid_df = pd.DataFrame(dict(
    Learning_Rates = learningRate,
    Accuracy = valid_scores_mean
))
valid_df['type'] = "Cross Validation Score"

Unit1_df = pd.concat([train_df, valid_df])

In [117]:
px.line(Unit1_df, x="Learning_Rates", y="Accuracy", labels={"Learning_Rates" : "Lambda"}, title="Error as Affected by Learning Rate for a model with 1 Unit in the Hidden Layer", color="type", markers=True)

In [127]:
# Model with 6 Units
learningRate = np.arange(3, 5, 0.05)

train_scores, valid_scores = validation_curve(
  MLPClassifier(solver='lbfgs', activation='logistic', max_iter=5000, verbose=True, hidden_layer_sizes=(6,)),
  X, Y, param_name="learning_rate_init", param_range=learningRate
)

train_scores_mean = np.mean(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)

In [128]:
train_df = pd.DataFrame(dict(
    Learning_Rates = learningRate,
    Accuracy = train_scores_mean
))
train_df['type'] = "Training Score"

valid_df = pd.DataFrame(dict(
    Learning_Rates = learningRate,
    Accuracy = valid_scores_mean
))
valid_df['type'] = "Cross Validation Score"

Unit1_df = pd.concat([train_df, valid_df])

In [129]:
px.line(Unit1_df, x="Learning_Rates", y="Accuracy", labels={"Learning_Rates" : "Lambda"}, title="Error as Affected by Learning Rate for a model with 6 Units in the Hidden Layer", color="type", markers=True)

After zeroing in a bit, it seems like the model with 1 unit is the best model since the score of the training and cross validation sets are closer and are higher than the model with 6 models. I think the 1 unit model becomes stable at about 0.85 for the learning rate but becomes unstable if the learning rate is turned higher.

In [135]:
chosen_model = MLPClassifier(solver='lbfgs', activation='logistic', max_iter=5000, verbose=True, hidden_layer_sizes=(1,), learning_rate_init=0.85)
chosen_model.fit(X_train, Y_train)

MLPClassifier(activation='logistic', hidden_layer_sizes=(1,),
              learning_rate_init=0.85, max_iter=5000, solver='lbfgs',
              verbose=True)

In [136]:
predictions = chosen_model.predict(X_test)
print(accuracy_score(predictions, Y_test))

0.9666666666666667


It seems that the model with 1 hidden unit performs with a 0.967% accuracy on the fisher's iris dataset.