# Exercise 1
Try to build a classifier for the MNIST dataset that achieves over 97% accuracy on the test set.

In [None]:
from __future__ import division, print_function

import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score

%matplotlib inline

Utility functions used to measure the performance of each classifier:

In [None]:
from matplotlib import cm
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score


def measure_performance(clf, X, y):
    accuracy = get_accuracy(clf, X, y)
    print('Accuracy: ', accuracy)
    conf_matrix = confusion_matrix(y_train, y_pred)
    conf_matrix_norm = normalize_confusion_matrix(conf_matrix)
    plot_confusion_matrix(conf_matrix_norm)

    
def get_accuracy(clf, X, y):
    y_pred = clf.predict(X)
    return np.sum(y_pred == y) / y.shape[0]


def normalize_confusion_matrix(conf_matrix):
    row_sums = conf_matrix.sum(axis=1, keepdims=True)
    conf_matrix_norm = conf_matrix / row_sums
    np.fill_diagonal(conf_matrix_norm, 0)
    return conf_matrix_norm


def plot_confusion_matrix(matrix):
    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111)
    cax = ax.matshow(matrix, cmap=cm.get_cmap('viridis'))
    fig.colorbar(cax)

Loading and preparing the data:

In [None]:
mnist = fetch_mldata('MNIST original')
X, y = mnist['data'], mnist['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

## Neural network
First we will train to obtain the best hyperparameters for the neural network using GridSearchCV:

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

alpha_options = 10.0 ** -np.arange(1, 7)
solvers = ['lbfgs', 'sgd', 'adam']
hidden_layer_options = [(100,), (100, 100), (200,), (200,150), (200, 125, 50)]

param_grid = [
    {
        'alpha': alpha_options,
        'solver': solvers,
        'hidden_layer_sizes': hidden_layer_options,
    }
]

mlp_clf = MLPClassifier()
grid = GridSearchCV(mlp_clf, cv=3, param_grid=param_grid)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train, y_train)
grid.fit(X_train_scaled, y_train)
grid.best_params_



Now we can create a MLPClassifier using the parameters previously obtained, and we measure its performance on the test set:

In [None]:
mlp_clf = MLPClassifier(hidden_layer_sizes=(200,), alpha=10.0 ** -3, solver='adam')
mlp_clf.fit(X_train, y_train)
pipeline = Pipeline([('scaler', scaler), ('neural_net', mlp_clf)])

measure_performance(pipeline, X_test, y_test)