# Lab 07: Handwritten Digit Recognition Using MLPClassifier 

## 1. Import Necessary imports

In [None]:
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.io import loadmat

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score


## 2. Upload Dataset

In [None]:
mnist_raw = loadmat("D:/FTI/Module 3 Supervised Machine Learning/Lecture 7 NN/mnist-original.mat")

MNIST is a database. The acronym stands for “Modified National Institute of Standards and Technology.” The MNIST database contains handwritten digits (0 through 9), and can provide a baseline for testing image processing systems.

In [None]:
mnist_raw

In [None]:
mnist_raw["data"]

In [None]:
mnist_raw["data"].shape

In [None]:
mnist_raw["data"].T.shape

In [None]:
mnist_raw["label"][0]

In [None]:
mnist_raw["label"][0].shape

In [None]:
mnist = {
    "data": mnist_raw["data"].T,
    "target": mnist_raw["label"][0],
     }

X,y = mnist['data'], mnist['target']
X.shape, y.shape

##  3. Train Test Split

In [None]:
shuffle_index = np.random.permutation(70000)
X, y = X[shuffle_index], y[shuffle_index]

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
y

In [None]:
y.shape

## 4. Dataset Visualization 

In [None]:
some_digit = X[36000]

In [None]:
some_digit.shape

In [None]:
some_digit

In [None]:
some_digit_image = some_digit.reshape(28, 28)

In [None]:
some_digit_image

In [None]:
plt.imshow(
    some_digit_image, 
    cmap = plt.cm.binary,
    )

plt.title(y[36000])
plt.axis("off")
plt.show()

### View First 100 Training Data Points

In [None]:
fig, axes = plt.subplots(10, 10, figsize=(8, 8),
                        subplot_kw={'xticks':[], 'yticks':[]},
                        gridspec_kw=dict(hspace=0.1, wspace=0.1))

for i, ax in enumerate(axes.flat):
    ax.imshow(X_train[i].reshape((28,28)), cmap='binary', interpolation='nearest')
    ax.text(0.05, 0.05, str(int(y_train[i])),
           transform=ax.transAxes, color='black')

plt.show()

## 5. Training MLPClassifier

In [None]:
X_train.shape

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=10, alpha=1e-4,
                    solver='sgd', verbose=True, tol=1e-5, random_state=1,
                    learning_rate_init=0.001)

mlp.fit(X_train, y_train)

In [None]:
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" % mlp.score(X_test, y_test))

In [None]:
yfit = mlp.predict(X_test)

In [None]:
y_test.shape

In [None]:
from sklearn.metrics import classification_report

tn = ['0','1','2','3','4','5','6','7','8','9']
print(classification_report(y_test, yfit,
                            target_names=tn))

In [None]:
mat = confusion_matrix(y_test, yfit)

fig, ax = plt.subplots(figsize=(8,8))
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False, cmap='viridis',
            xticklabels=tn,
            yticklabels=tn,
            ax=ax)
plt.title('Confusion matrix')
plt.xlabel('true label')
plt.ylabel('predicted label');

In [None]:
from sklearn.metrics import accuracy_score

print("Accuracy", accuracy_score(y_test, yfit)*100)

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(10, 10, figsize=(8, 8),
                        subplot_kw={'xticks':[], 'yticks':[]},
                        gridspec_kw=dict(hspace=0.1, wspace=0.1))

for i, ax in enumerate(axes.flat):
    ax.imshow(X_test[i].reshape((28,28)), cmap='binary', interpolation='nearest')
    # actual class
    ax.text(0.05, 0.05, str(int(y_test[i])),
            transform=ax.transAxes, 
            color='black')    
    # predict class
    ax.text(0.75, 0.05, str(int(yfit[i])),
            transform=ax.transAxes, 
            color='black' if yfit[i] == y_test[i] else 'red')

plt.show()