# Classification on the MNIST dataset

In this notebook, I will try to classify the digits in the MNIST dataset using basing Neural Networks implemented in Pytorch.

In a second step, I will try to interprete the models with different methods (SHAP, LIME, etc.).

## Classification

In [None]:
import numpy as np

from mnist import MNIST

mndata = MNIST("../data/raw/")

images, labels = mndata.load_training()
images = np.array(images)
labels = np.array(labels)

images_test, labels_test = mndata.load_testing()
images_test = np.array(images_test)
labels_test = np.array(labels_test)

In [None]:
import random

import matplotlib.pyplot as plt


def plot_digit(image, label):
    """Plot a single MNIST image."""
    image = image.reshape(28, 28)
    plt.imshow(image, cmap="Greys", interpolation="nearest")
    plt.axis("off")
    plt.title(label)


def plot_random_digits(n=10, x=5, y=2, figsize=4):
    """Plot a random MNIST digit."""
    index = random.sample(range(len(images)), n)
    fig, axs = plt.subplots(y, x, figsize=(figsize * x, figsize * y))
    for i, ax in enumerate(axs.flat):
        ax.imshow(
            images[index[i]].reshape(28, 28), cmap="Greys", interpolation="nearest"
        )
        ax.axis("off")
        ax.set_title(labels[index[i]])
    plt.show()

In [None]:
plot_random_digits()

In [None]:
indexes = np.array(range(len(images)))
np.random.shuffle(indexes)
test_size = int(len(images) * 0.2)
test_indexes = indexes[:test_size]
train_indexes = indexes[test_size:]

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [None]:
X, y = images, labels
X_test, y_test = images_test, labels_test

In [None]:
estimator = KNeighborsClassifier(n_neighbors=5)
estimator.fit(X, y)

In [None]:
estimator.score(X_test, y_test)

In [None]:
from mlxtend.evaluate import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

cm = confusion_matrix(y_target=y_test, y_predicted=estimator.predict(X_test))
fig, ax = plot_confusion_matrix(conf_mat=cm)
fig.show()

In [None]:
estimator = KNeighborsClassifier()
param_grid = {"n_neighbors": [3, 5, 7, 11, 15]}

selector = GridSearchCV(
    estimator,
    param_grid=param_grid,
    scoring="neg_log_loss",
    cv=5,
    return_train_score=True,
    verbose=3,
    n_jobs=-1,
)

selector = selector.fit(X, y)

In [None]:
import pandas as pd

results = pd.DataFrame(selector.cv_results_)

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.set_title("GridSearchCV results")