In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.datasets import fetch_mldata
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, precision_recall_curve, roc_curve, roc_auc_score
from sklearn.model_selection import cross_val_score, cross_val_predict, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

%matplotlib inline

import warnings
# warnings.filterwarnings('ignore') # To ingnore warnings entirely
warnings.filterwarnings(action="once") # To see warning only once

In [None]:
mnist = fetch_mldata("MNIST original")
X,y = mnist["data"], mnist["target"]
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

## Exercise 1

In [None]:
param_grid = {
    'n_neighbors': [1, 3, 5, 10, 20], 
    'weights': ['uniform', 'distance'], 
    'algorithm': ['ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2, 3]
}

knn_clf = KNeighborsClassifier()
grid_search = GridSearchCV(knn_clf, param_grid, cv=5, scoring="neg_mean_squared_error")
grid_search.fit(X_train[:1000], y_train[:1000])

In [None]:
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)
y_test_knn_pred = cross_val_predict(best_model, X_test, y_test, cv=3)
f1_score(y_multilabel, y_test_knn_pred, average="macro")

## Exercise 2

Because KNeighborsClassifier takes FOREVER to train, I will use just SGDClassifier for this exercise.

In [None]:
def MoveUpOne(image):
    copy_image = copy(image).reshape((28, 28))
    no_top_row = np.delete(copy_image, 0, 0)
    new_image = np.r_[np.zeros(28), no_top_row]
    return new_image.reshape((784))

def MoveDownOne(image):
    copy_image = copy(image).reshape((28, 28))
    no_bot_row = np.delete(copy_image, 27, 0)
    new_image = np.r_[no_bot_row, np.zeros(28)]
    return new_image.reshape((784))

def MoveLeftOne(image):
    copy_image = copy(image).reshape((28, 28))
    no_left_col = np.delete(copy_image, 0, 1)
    new_image = np.c_[np.zeros(28), no_left_col]
    return new_image.reshape((784))

def MoveRightOne(image):
    copy_image = copy(image).reshape((28, 28))
    no_right_col = np.delete(copy_image, 27, 1)
    new_image = np.c_[no_right_col, np.zeros(28)]
    return new_image.reshape((784))
    

In [None]:
def AddMoreTrain(X, y):
    new_X = []
    new_y = []
    for ix in xrange(len(X)):
        image = X[ix]
        label = y[ix]
        new_X.append(MoveUpOne(image))
        new_y.append(label)
        new_X.append(MoveDownOne(image))
        new_y.append(label)
        new_X.append(MoveLeftOne(image))
        new_y.append(label)
        new_X.append(MoveRightOne(image))
        new_y.append(label)
    return np.r_[X, new_X], np.r_[y, new_y]                 
        