In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
from mnist import MNIST
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

#### Loading train data

In [5]:
mndata = MNIST('./data')
mndata.gz = True
train_images, train_labels = mndata.load_training()
train_mod_labels = pd.get_dummies(train_labels).to_numpy()

#### Loading test data

In [6]:
test_images, test_labels = mndata.load_testing()
test_labels = np.array(test_labels)
test_mod_labels = pd.get_dummies(test_labels).to_numpy()

# Multi-layer perceptron using Keras

#### Importing library and modules

In [10]:
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten

### Neural network with 2 hidden layers, 1 dropout layer and relu, sigmoid, relu activations

In [11]:
model1 = Sequential()
model1.add(Dense(16, input_dim=len(train_images[0]), activation='relu'))
model1.add(Dense(1000, activation='sigmoid'))
model1.add(Dropout(0.25))
model1.add(Dense(1000, activation='relu'))
model1.add(Dense(10, activation='softmax'))

In [12]:
model1.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [13]:
model1.fit(np.array(train_images), train_mod_labels, epochs=20, batch_size=500)

Train on 60000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f37c05aba20>

In [14]:
y_pred = model1.predict(np.array(test_images))
pred1 = []
for i in range(len(y_pred)):
    pred1.append(np.argmax(y_pred[i], axis=0))

### Accuracy and classification report

In [15]:
print(classification_report(test_labels, pred1))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       980
           1       0.98      0.99      0.98      1135
           2       0.95      0.97      0.96      1032
           3       0.95      0.96      0.96      1010
           4       0.97      0.97      0.97       982
           5       0.96      0.96      0.96       892
           6       0.96      0.98      0.97       958
           7       0.97      0.97      0.97      1028
           8       0.97      0.94      0.96       974
           9       0.97      0.95      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000



### Confusion matrix

In [16]:
print(confusion_matrix(test_labels, pred1))

[[ 962    1    1    4    0    2    5    2    3    0]
 [   0 1121    3    1    0    1    6    1    2    0]
 [   3    2 1004    7    5    1    2    5    3    0]
 [   0    1   15  974    0    5    0    7    2    6]
 [   1    1    3    0  949    1    9    2    1   15]
 [   1    0    1   14    2  855    7    3    5    4]
 [   6    3    3    1    4    6  935    0    0    0]
 [   0    8   12    4    0    0    1  995    3    5]
 [   7    2   11   13    4    9    6    7  913    2]
 [   1    6    0   11   16    8    1    6    5  955]]


### Neural network with 2 hidden layers and sigmoid, tanh, relu activations

In [17]:
model2 = Sequential()
model2.add(Dense(100, input_dim=len(train_images[0]), activation='sigmoid'))
model2.add(Dense(100, activation='tanh'))
model2.add(Dense(100, activation='relu'))
model2.add(Dense(10, activation='softmax'))

In [18]:
model2.compile(loss='categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy'])

In [19]:
model2.fit(np.array(train_images), train_mod_labels, epochs=40, batch_size=1000)

Train on 60000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f37b864f978>

In [20]:
y_pred = model2.predict(np.array(test_images))
pred2 = list()
for i in range(len(y_pred)):
    pred2.append(np.argmax(y_pred[i]))

### Accuracy and classification report

In [21]:
print(classification_report(test_labels, pred2))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.96      0.97      1032
           3       0.93      0.98      0.95      1010
           4       0.97      0.97      0.97       982
           5       0.95      0.95      0.95       892
           6       0.96      0.97      0.97       958
           7       0.97      0.97      0.97      1028
           8       0.96      0.95      0.96       974
           9       0.98      0.93      0.95      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000



### Confusion matrix

In [22]:
print(confusion_matrix(test_labels, pred2))

[[ 966    0    1    0    0    3    5    1    3    1]
 [   0 1124    1    4    0    1    2    0    3    0]
 [   7    1  994    8    3    0    5    8    6    0]
 [   0    1    5  986    0    6    0    7    5    0]
 [   2    0    2    1  954    3    9    2    2    7]
 [   2    0    1   25    1  845    8    1    6    3]
 [   3    2    0    3    1   13  932    1    3    0]
 [   0    6    9    3    3    0    0  994    3   10]
 [   5    1    3   15    3   10    4    3  927    3]
 [   5    4    0   13   21    9    1    9    4  943]]


### Neural network with 3 hidden layers, 1 dropout layer (25% rate) and sigmoid, tanh, relu, exp activations

In [23]:
model3 = Sequential()
model3.add(Dense(100, input_dim=len(train_images[0]), activation='sigmoid'))
model3.add(Dense(100, activation='tanh'))
model3.add(Dense(100, activation='relu'))
model3.add(Dropout(0.25))
model3.add(Dense(100, activation='exponential'))
model3.add(Dense(10, activation='softmax'))

In [24]:
model3.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [25]:
model3.fit(np.array(train_images), train_mod_labels, epochs=40, batch_size=1000)

Train on 60000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f37b84573c8>

In [26]:
y_pred = model3.predict(np.array(test_images))
pred3 = list()
for i in range(len(y_pred)):
    pred3.append(np.argmax(y_pred[i]))

### Accuracy and classification report

In [27]:
print(classification_report(test_labels, pred3))

              precision    recall  f1-score   support

           0       0.97      0.98      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.96      0.96      0.96      1032
           3       0.95      0.95      0.95      1010
           4       0.96      0.96      0.96       982
           5       0.95      0.94      0.95       892
           6       0.96      0.97      0.97       958
           7       0.97      0.97      0.97      1028
           8       0.95      0.95      0.95       974
           9       0.96      0.94      0.95      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000



### Confusion matrix

In [28]:
print(confusion_matrix(test_labels, pred3))

[[ 964    0    2    2    0    6    5    1    0    0]
 [   0 1122    2    1    0    1    4    0    4    1]
 [   8    1  987   10    5    0    7    7    6    1]
 [   3    0   10  964    0    9    0    9   10    5]
 [   1    0    4    1  943    1    7    5    3   17]
 [   5    0    1   16    2  841    8    3   11    5]
 [   8    2    0    1    5    4  932    0    5    1]
 [   2    4   14    6    1    0    0  994    2    5]
 [   3    2    4   11    2   13    4    3  927    5]
 [   3    4    1    8   20    9    1    5   10  948]]


# Convolutional neural network with Keras

#### Adjusting input data dimesnsions

In [29]:
unravelled_train_images = []
for img in train_images:
    unravelled_train_images.append(np.array(img).reshape(28,28,1))
    
unravelled_test_images = []
for img in test_images:
    unravelled_test_images.append(np.array(img).reshape(28,28,1))

In [30]:
model4 = Sequential()
model4.add(Conv2D(32, kernel_size=(3, 3), activation='relu',input_shape=(28,28,1)))
model4.add(Conv2D(64, (3, 3), activation='relu'))
model4.add(MaxPooling2D(pool_size=(2, 2)))
model4.add(Dropout(0.25))
model4.add(Flatten())
model4.add(Dense(128, activation='relu'))
model4.add(Dropout(0.5))
model4.add(Dense(10, activation='softmax'))

In [32]:
model4.compile(loss='categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy'])
model4.fit(np.array(unravelled_train_images), train_mod_labels, epochs=5, batch_size=1000)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f377651e978>

In [33]:
y_pred = model4.predict(np.array(unravelled_test_images))
pred4 = list()
for i in range(len(y_pred)):
    pred4.append(np.argmax(y_pred[i]))

### Accuracy and classification report

In [34]:
print(classification_report(test_labels, pred4))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       980
           1       0.99      0.99      0.99      1135
           2       1.00      0.98      0.99      1032
           3       1.00      0.99      0.99      1010
           4       0.98      1.00      0.99       982
           5       0.98      0.99      0.99       892
           6       0.99      0.99      0.99       958
           7       0.98      0.99      0.99      1028
           8       0.99      0.99      0.99       974
           9       1.00      0.98      0.99      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000



### Confusion matrix

In [35]:
print(confusion_matrix(test_labels, pred4))

[[ 976    1    0    0    0    0    2    1    0    0]
 [   0 1129    2    1    0    1    1    0    1    0]
 [   2    3 1010    1    4    0    1   10    1    0]
 [   0    0    1 1000    0    5    0    2    2    0]
 [   0    0    0    0  978    0    1    0    2    1]
 [   2    0    0    1    0  887    1    0    1    0]
 [   4    1    0    0    2    4  947    0    0    0]
 [   0    2    2    0    4    0    0 1019    1    0]
 [   5    0    0    0    4    0    3    1  960    1]
 [   1    3    0    1   10    5    0    4    1  984]]


# Support Vector Machine

In [36]:
from sklearn.svm import SVC

#### Rbf kernel and C=2

In [37]:
clf1 = SVC(C=2, kernel='rbf')
clf1.fit(np.array(train_images), train_labels)

SVC(C=2, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

#### Classification report

In [38]:
pred5 = clf1.predict(np.array(test_images))
print(classification_report(test_labels, pred5))

              precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.98      0.98      1032
           3       0.98      0.99      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.98      0.98       892
           6       0.99      0.99      0.99       958
           7       0.98      0.98      0.98      1028
           8       0.98      0.98      0.98       974
           9       0.98      0.97      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



### Confusion matrix

In [39]:
print(confusion_matrix(test_labels, pred5))

[[ 973    0    1    0    0    3    0    1    2    0]
 [   0 1127    3    1    0    1    1    1    1    0]
 [   6    1 1012    0    1    0    1    7    3    1]
 [   0    0    1  997    0    3    0    4    3    2]
 [   0    0    4    0  966    0    3    0    1    8]
 [   2    0    0    6    1  876    3    0    3    1]
 [   4    2    0    0    2    3  946    0    1    0]
 [   0    5   10    2    1    0    0 1005    0    5]
 [   3    0    2    3    4    3    1    2  951    5]
 [   2    2    0    7   10    1    1    7    1  978]]


#### Poly kernel and C=2

In [40]:
clf2 = SVC(C=2, kernel='poly')
clf2.fit(np.array(train_images), train_labels)

SVC(C=2, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

#### Classification report

In [41]:
pred6 = clf2.predict(np.array(test_images))
print(classification_report(test_labels, pred6))

              precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.98      0.99      0.99      1135
           2       0.98      0.97      0.98      1032
           3       0.98      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.97      0.97       892
           6       0.98      0.98      0.98       958
           7       0.98      0.97      0.98      1028
           8       0.98      0.98      0.98       974
           9       0.97      0.96      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



### Confusion matrix

In [42]:
print(confusion_matrix(test_labels, pred6))

[[ 971    0    1    1    0    3    1    1    2    0]
 [   0 1128    2    1    0    0    3    0    1    0]
 [   7    3 1006    0    2    0    4    8    2    0]
 [   0    2    2  986    0    6    0    5    5    4]
 [   2    0    2    0  965    0    4    0    0    9]
 [   2    0    1    9    1  867    4    1    5    2]
 [   4    5    2    0    3    4  938    0    2    0]
 [   0   11    8    1    1    0    0  999    0    8]
 [   3    0    1    4    4    3    1    3  953    2]
 [   2    6    1    4   12    5    1    4    2  972]]


In [43]:
l1 = ["MLP with 2 hidden layers, 1 dropout layer and relu, sigmoid, relu activations", accuracy_score(test_labels, pred1), f1_score(test_labels, pred1, average='weighted'), precision_score(test_labels, pred1, average='weighted'), recall_score(test_labels, pred1, average='weighted')]
l2 = ["MLP with 2 hidden layers and sigmoid, tanh, relu activations", accuracy_score(test_labels, pred2), f1_score(test_labels, pred2, average='weighted'), precision_score(test_labels, pred2, average='weighted'), recall_score(test_labels, pred2, average='weighted')]
l3 = ["MLP with 3 hidden layers, 1 dropout layer (25% rate) and sigmoid, tanh, relu, exp activations", accuracy_score(test_labels, pred3), f1_score(test_labels, pred3, average='weighted'), precision_score(test_labels, pred3, average='weighted'), recall_score(test_labels, pred3, average='weighted')]
l4 = ["Convolutional neural network", accuracy_score(test_labels, pred4), f1_score(test_labels, pred4, average='weighted'), precision_score(test_labels, pred4, average='weighted'), recall_score(test_labels, pred4, average='weighted')]
l5 = ["SVM with rbf kernel and C=2", accuracy_score(test_labels, pred5), f1_score(test_labels, pred5, average='weighted'), precision_score(test_labels, pred5, average='weighted'), recall_score(test_labels, pred5, average='weighted')]
l6 = ["SVM with poly kernel and C=2", accuracy_score(test_labels, pred6), f1_score(test_labels, pred6, average='weighted'), precision_score(test_labels, pred6, average='weighted'), recall_score(test_labels, pred6, average='weighted')]

## Summary

In [44]:
data = [l1, l2, l3, l4, l5, l6]
df = pd.DataFrame(data, columns = ['Model', 'Accuracy', 'F1 score (weighted average)', 'Precision (weighted average)', 'Recall (weighted average)'])
pd.set_option('display.max_colwidth', 100)
df

Unnamed: 0,Model,Accuracy,F1 score (weighted average),Precision (weighted average),Recall (weighted average)
0,"MLP with 2 hidden layers, 1 dropout layer and relu, sigmoid, relu activations",0.9663,0.966271,0.966388,0.9663
1,"MLP with 2 hidden layers and sigmoid, tanh, relu activations",0.9665,0.966489,0.966698,0.9665
2,"MLP with 3 hidden layers, 1 dropout layer (25% rate) and sigmoid, tanh, relu, exp activations",0.9622,0.962175,0.962202,0.9622
3,Convolutional neural network,0.989,0.988999,0.989076,0.989
4,SVM with rbf kernel and C=2,0.9831,0.983092,0.983098,0.9831
5,SVM with poly kernel and C=2,0.9785,0.978481,0.978501,0.9785
