# Chapter 3 - Exercise 2
Write a function that can shift an MNIST image in any direction by one pixel.

In [1]:
from sklearn.datasets import fetch_openml
import numpy as np
from scipy.ndimage import shift

In [2]:
mnist = fetch_openml('mnist_784', version=1, parser='auto', as_frame=False)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
X, y = mnist['data'], mnist['target']
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

In [4]:
def augment_data(X, y):
    X_shifted = list(X)
    y_shifted = list(y)
    shifts = [[1, 0], [-1, 0], [0, 1], [0, -1]]
    
    for image, label in zip(X,y):
        for direction in shifts:
            shifted_image = shift(image.reshape(28, 28), shift=direction)
            X_shifted.append(shifted_image.reshape([-1]))
            y_shifted.append(label)
            
    return np.array(X_shifted), np.array(y_shifted)

In [5]:
X_train_shifted, y_train_shifted = augment_data(X_train, y_train)
shuffle = np.random.permutation(len(X_train_shifted))
X_train_shifted = X_train_shifted[shuffle]
y_train_shifted = y_train_shifted[shuffle]

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV


param_distrib = {
    'n_neighbors': [2, 3, 4],#, 5, 6, 7, 8, 9,10],
    'weights': ['uniform', 'distance'],
}
knn_clf = KNeighborsClassifier()

grid = GridSearchCV(knn_clf, param_distrib, scoring='accuracy', cv=5, verbose=3)
grid.fit(X_train_shifted, y_train_shifted)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5] END ....n_neighbors=2, weights=uniform;, score=0.979 total time= 3.5min
[CV 2/5] END ....n_neighbors=2, weights=uniform;, score=0.979 total time= 3.5min
[CV 3/5] END ....n_neighbors=2, weights=uniform;, score=0.979 total time= 3.5min
[CV 4/5] END ....n_neighbors=2, weights=uniform;, score=0.979 total time= 3.5min
[CV 5/5] END ....n_neighbors=2, weights=uniform;, score=0.978 total time= 3.6min
[CV 1/5] END ...n_neighbors=2, weights=distance;, score=0.986 total time= 3.5min
[CV 2/5] END ...n_neighbors=2, weights=distance;, score=0.986 total time= 3.5min
[CV 3/5] END ...n_neighbors=2, weights=distance;, score=0.986 total time= 3.5min
[CV 4/5] END ...n_neighbors=2, weights=distance;, score=0.986 total time= 3.5min
[CV 5/5] END ...n_neighbors=2, weights=distance;, score=0.986 total time= 3.5min
[CV 1/5] END ....n_neighbors=3, weights=uniform;, score=0.984 total time= 4.1min
[CV 2/5] END ....n_neighbors=3, weights=uniform;,

In [8]:
from sklearn.metrics import accuracy_score

knn_clf = KNeighborsClassifier(**grid.best_params_)

knn_clf.fit(X_train_shifted, y_train_shifted)

In [9]:
y_pred = knn_clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f'Final accuracy: {accuracy}')

Final accuracy: 0.9727
