In [None]:
# %load 'https://grmnd.ch/jupyter/init.py'
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

np.random.seed(42)

%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
plt.rcParams['figure.figsize'] = [10, 5]
plt.style.use('fivethirtyeight')

%reload_ext autoreload
%autoreload 2

## Exercise 1

Try to build a classifier for the MNIST dataset that achieves over 97% accuracy on the test set. Hint: the `KNeighborsClassifier` works quite well for this task; you just need to find good hyperparameter values (try a grid search on the weights and n_neighbors hyperparameters).

## Setup

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)

In [None]:
X, y = mnist["data"], mnist["target"]
X.shape, y.shape

In [None]:
y = y.astype(np.uint8)

In [None]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

## Evaluation

In [None]:
import joblib
grid_search = joblib.load('grid_search.pkl')
model = grid_search.best_estimator_
model

In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score(model, X_train, y_train, cv=3, scoring="accuracy")

In [None]:
model.score(X_test, y_test)

In [None]:
y_test_predict = model.predict(X_test)

In [None]:
y_test_predict

In [None]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(model, X_train, y_train, cv=3, n_jobs=-1)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_test_predict)

In [None]:
plt.matshow(cm)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_test_predict))