In [None]:
import numpy as np
import pandas as pd

import matplotlib as mlp
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from sklearn.datasets import fetch_openml

from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

## Exercise 01
Try to build a classifier for the MNIST dataset that achieves over 97% accuracy on the test set. (hint use KNeighborClassifier)

In [None]:
mnist = fetch_openml("mnist_784", version=1)

X, y = mnist["data"], mnist["target"]

y = y.astype(np.uint8)

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

In [None]:
pipe = Pipeline([
    ("clf", KNeighborsClassifier())
])

In [None]:
param_grid = {
    "clf__weights": ["uniform","distance"],
    "clf__n_neighbors": range(1,20)
}

In [None]:
g = GridSearchCV(pipe, param_grid, cv=3, n_jobs=1)
clf = g.fit(X_train, y_train)
clf.best_score_

In [None]:
clf.score(X_test, y_test)

## Exercise 02
write a funtion that can shift a MINST image in any direction (left, right, up, down) by one pixel. You can use the shift() funcion from the scipy.ndimage.interpolation. For example, shift(image, [2,1], cval=0)

In [None]:
from scipy.ndimage.interpolation import shift 

def shift_img(image):
    
    # reshape image
    image = image.reshape(28,28)
    
    return [
        shift(image, [0,-1], cval=0).reshape(-1,),
        shift(image, [0,1], cval=0).reshape(-1,),
        shift(image, [-1,0], cval=0).reshape(-1,),
        shift(image, [1,0], cval=0).reshape(-1,)
    ]

In [None]:
new_images = [ i for img in X_train for i in shift_img(img) ]
augmented_images = np.array(new_images)
X_train_augmented = np.concatenate((X_train, augmented_images), axis=0)
y_train_augmented = np.concatenate((y_train, np.repeat(y_train, 4)), axis=0)

In [None]:
print(X_train_augmented.shape, y_train_augmented.shape)

In [None]:
clf.best_estimator_.fit(X_train_augmented, y_train_augmented)
clf.score(X_test, y_test)