In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import metrics
from sklearn.metrics import classification_report, zero_one_loss, confusion_matrix, accuracy_score

In [2]:
from sklearn.datasets import fetch_openml
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X = X / 255.
X.shape

(70000, 784)

In [3]:
# rescale the data, use the traditional train/test split

# (60K: Train) and (10K: Test)
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [4]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
mlp1 = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, alpha=0.0001)
#sgd refers to stochastic gradient descent.
mlp1.fit(X_train, y_train)
y_pred = mlp1.predict(X_test)

In [7]:
print("Report: " + str(classification_report(y_test, y_pred)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred)))
print("Error: " + str(zero_one_loss(y_test, y_pred)))

Report:               precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.97      0.97      1032
           3       0.98      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.98      0.98       892
           6       0.98      0.97      0.98       958
           7       0.97      0.98      0.98      1028
           8       0.98      0.97      0.97       974
           9       0.97      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

Accuracy: 0.9785
Error: 0.021499999999999964


In [11]:
# For lbfgs solver:
# 50 hidden layers, activation function is identity, learning rate is set to constant and max iterations is set to 450 
mlp2 = MLPClassifier(hidden_layer_sizes=(50), activation='identity', solver='lbfgs', alpha=0.001, 
              learning_rate='constant', learning_rate_init=0.001, 
              max_iter=450, tol=0.00001, 
              validation_fraction=0.2, beta_1=0.9, beta_2=0.999, 
              max_fun=15000)

mlp2.fit(X_train, y_train)
y_pred2 = mlp2.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred2)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred2)))
print("Error: " + str(zero_one_loss(y_test, y_pred2)))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Report:               precision    recall  f1-score   support

           0       0.95      0.97      0.96       980
           1       0.96      0.98      0.97      1135
           2       0.93      0.90      0.91      1032
           3       0.90      0.92      0.91      1010
           4       0.93      0.93      0.93       982
           5       0.90      0.87      0.88       892
           6       0.94      0.95      0.95       958
           7       0.93      0.92      0.92      1028
           8       0.88      0.89      0.88       974
           9       0.91      0.91      0.91      1009

    accuracy                           0.92     10000
   macro avg       0.92      0.92      0.92     10000
weighted avg       0.92      0.92      0.92     10000

Accuracy: 0.9235
Error: 0.07650000000000001


In [12]:
# For lbfgs solver:
# 100 hidden layers, activation function is logistic, learning rate is set to adaptive, 
# beta1 is 0.95 and max iterations is set to 500 
mlp3 = MLPClassifier(hidden_layer_sizes=(100), activation='logistic', solver='lbfgs', alpha=0.001, 
              learning_rate='adaptive', learning_rate_init=0.001, 
              max_iter=500, tol=0.00001, 
              validation_fraction=0.2, beta_1=0.95, beta_2=0.999, 
              max_fun=15000)

mlp3.fit(X_train, y_train)
y_pred3 = mlp3.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred3)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred3)))
print("Error: " + str(zero_one_loss(y_test, y_pred3)))

Report:               precision    recall  f1-score   support

           0       0.98      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.97      0.97      1032
           3       0.96      0.97      0.97      1010
           4       0.97      0.97      0.97       982
           5       0.96      0.95      0.95       892
           6       0.98      0.98      0.98       958
           7       0.97      0.97      0.97      1028
           8       0.96      0.96      0.96       974
           9       0.97      0.96      0.97      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

Accuracy: 0.9709
Error: 0.029100000000000015


In [13]:
# For lbfgs solver:
# 200 hidden layers, activation function is relu, learning rate is set to invscaling, beta1 is 0.9 and max iterations is set to 550 
mlp4 =  MLPClassifier(hidden_layer_sizes=(200), activation='relu', solver='lbfgs', alpha=0.001, 
              learning_rate='invscaling', learning_rate_init=0.01, 
              max_iter=550, tol=0.00001, 
              validation_fraction=0.2, beta_1=0.9, beta_2=0.999, 
              max_fun=15000)

mlp4.fit(X_train, y_train)
y_pred4 = mlp4.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred4)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred4)))
print("Error: " + str(zero_one_loss(y_test, y_pred4)))

Report:               precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.97      0.98      1032
           3       0.97      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.97      0.98       892
           6       0.98      0.98      0.98       958
           7       0.98      0.98      0.98      1028
           8       0.97      0.97      0.97       974
           9       0.98      0.97      0.98      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

Accuracy: 0.9791
Error: 0.02090000000000003


In [15]:
# For sgd solver:
# 50 hidden layers, activation function is logistic, learning rate is set to constant, batch size is auto
# epochs is set to 10 and max iterations is set to 500 
mlp5 = MLPClassifier(hidden_layer_sizes=(50), activation='logistic', solver='sgd', alpha=0.001, 
              batch_size='auto', learning_rate='constant', learning_rate_init=0.001, max_iter = 500,
              tol=0.0001, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, n_iter_no_change=10)

mlp5.fit(X_train, y_train)
y_pred5 = mlp5.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred5)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred5)))
print("Error: " + str(zero_one_loss(y_test, y_pred5)))



Report:               precision    recall  f1-score   support

           0       0.96      0.98      0.97       980
           1       0.98      0.98      0.98      1135
           2       0.96      0.95      0.95      1032
           3       0.94      0.96      0.95      1010
           4       0.95      0.96      0.95       982
           5       0.96      0.93      0.94       892
           6       0.95      0.96      0.96       958
           7       0.96      0.95      0.95      1028
           8       0.95      0.94      0.94       974
           9       0.94      0.93      0.94      1009

    accuracy                           0.95     10000
   macro avg       0.95      0.95      0.95     10000
weighted avg       0.95      0.95      0.95     10000

Accuracy: 0.9541
Error: 0.04590000000000005


In [16]:
# For sgd solver:
# 100 hidden layers, activation function is tanh, learning rate is set to invscaling, batch size is auto
# epochs is set to 20 and max iterations is set to 500 
mlp6 = MLPClassifier(hidden_layer_sizes=(100), activation='tanh', solver='sgd', alpha=0.001, 
              batch_size='auto', learning_rate='invscaling', learning_rate_init=0.01, power_t=0.5,
              tol=0.0001, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, 
              verbose = 10, early_stopping=True, max_iter=500, n_iter_no_change=20)

mlp6.fit(X_train, y_train)
y_pred6 = mlp6.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred6)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred6)))
print("Error: " + str(zero_one_loss(y_test, y_pred6)))

Iteration 1, loss = 0.59241646
Validation score: 0.894333
Iteration 2, loss = 0.36175796
Validation score: 0.896500
Iteration 3, loss = 0.36016917
Validation score: 0.896167
Iteration 4, loss = 0.35933324
Validation score: 0.896167
Iteration 5, loss = 0.35878133
Validation score: 0.896167
Iteration 6, loss = 0.35837312
Validation score: 0.896167
Iteration 7, loss = 0.35804900
Validation score: 0.896000
Iteration 8, loss = 0.35778327
Validation score: 0.895833
Iteration 9, loss = 0.35755356
Validation score: 0.896000
Iteration 10, loss = 0.35735198
Validation score: 0.895833
Iteration 11, loss = 0.35717199
Validation score: 0.895833
Iteration 12, loss = 0.35700865
Validation score: 0.895667
Iteration 13, loss = 0.35685820
Validation score: 0.895667
Iteration 14, loss = 0.35671876
Validation score: 0.895833
Iteration 15, loss = 0.35658746
Validation score: 0.895833
Iteration 16, loss = 0.35646395
Validation score: 0.895833
Iteration 17, loss = 0.35634755
Validation score: 0.895833
Iterat

In [18]:
# For sgd solver:
# 200 hidden layers, activation function is relu, learning rate is set to adaptive , batch size is auto
# epochs is set to 20, early stopping is used and max iterations is set to 550 
mlp7 = MLPClassifier(hidden_layer_sizes=(200), activation='relu', solver='sgd', 
                     alpha=0.0001, batch_size='auto', learning_rate='adaptive', 
                     learning_rate_init=0.001, power_t=0.5, max_iter=550, tol=0.0001, early_stopping=True,
                     validation_fraction=0.1, beta_1=0.9, beta_2=0.999, n_iter_no_change=20)

mlp7.fit(X_train, y_train)
y_pred7 = mlp7.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred7)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred7)))
print("Error: " + str(zero_one_loss(y_test, y_pred7)))

Report:               precision    recall  f1-score   support

           0       0.98      0.98      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.98      0.98      1032
           3       0.98      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.97      0.98       892
           6       0.98      0.98      0.98       958
           7       0.98      0.98      0.98      1028
           8       0.97      0.98      0.97       974
           9       0.98      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

Accuracy: 0.9786
Error: 0.021399999999999975


In [19]:
# For adam solver:
# 250 hidden layers, activation function is logistic, learning rate is set to invscaling, batch size is auto
# epochs is set to 20, early stopping is used and max iterations is set to 450 
mlp8 = MLPClassifier(hidden_layer_sizes=(250), activation='logistic', solver='adam', alpha=0.0001, 
              learning_rate='invscaling', learning_rate_init=0.001, 
              max_iter=450, tol=0.0001, early_stopping=True,
              validation_fraction=0.1, beta_1=0.95, beta_2=0.999, 
              n_iter_no_change=20)

mlp8.fit(X_train, y_train)
y_pred8 = mlp8.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred8)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred8)))
print("Error: " + str(zero_one_loss(y_test, y_pred8)))

Report:               precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.98      0.98      1032
           3       0.97      0.99      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.97      0.97       892
           6       0.99      0.98      0.98       958
           7       0.98      0.98      0.98      1028
           8       0.98      0.98      0.98       974
           9       0.98      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

Accuracy: 0.9794
Error: 0.02059999999999995


In [20]:
# For adam solver:
# 250 hidden layers, activation function is tanh, learning rate is set to adaptive, 
# epochs is set to 20, early stopping is used and max iterations is set to 550 
mlp9 = MLPClassifier(hidden_layer_sizes=(250), activation='tanh', solver='adam', alpha=0.0001, 
              learning_rate='adaptive', learning_rate_init=0.001, 
              max_iter=550, tol=0.0001, early_stopping=True,
              validation_fraction=0.1, beta_1=0.95, beta_2=0.999, 
              n_iter_no_change=20)

mlp9.fit(X_train, y_train)
y_pred9 = mlp9.predict(X_test)

print("Report: " + str(classification_report(y_test, y_pred9)))
print("Accuracy: " + str(accuracy_score(y_test,y_pred9)))
print("Error: " + str(zero_one_loss(y_test, y_pred9)))

Report:               precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.97      0.97      1032
           3       0.97      0.98      0.97      1010
           4       0.97      0.98      0.98       982
           5       0.98      0.98      0.98       892
           6       0.98      0.98      0.98       958
           7       0.98      0.97      0.98      1028
           8       0.97      0.97      0.97       974
           9       0.97      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

Accuracy: 0.9778
Error: 0.022199999999999998
