**Importing modules**

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mlp
import matplotlib.pyplot as plt

sns.set_style("whitegrid")
from sklearn.preprocessing import StandardScaler


In [2]:
# importing the dataset from the datasets provided sklearn
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()


dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
# In the MNIST dataset the target and data are already seperated so we just assign it to variables
X, y = mnist['data'], mnist['target']
X.shape

(70000, 784)

In [4]:
y.shape


(70000,)

In [5]:
# The data stored in the target dataset are objects so we need to convert them to integers
y = y.astype(np.uint8)

In [6]:
# Seperating the data into training and test set
# since sklearn already splits thet data into train
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

In [7]:
X_train

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
59996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
59997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
59998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
from sklearn.neighbors import KNeighborsClassifier
knn_class = KNeighborsClassifier()

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
knn_class.fit(X_train, y_train)

KNeighborsClassifier()

In [10]:
print("Train Score: ", knn_class.score(X_train, y_train))
print("Test Score: ", knn_class.score(X_test, y_test))


Train Score:  0.96425
Test Score:  0.9443


In [11]:
knn_class.predict(X_test)[:9]

array([7, 2, 1, 0, 4, 1, 4, 9, 4], dtype=uint8)

In [12]:
knn_class.predict_proba(X_test)[:9]

array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. ],
       [0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0.4, 0. , 0. , 0. , 0. , 0.6],
       [0. , 0. , 0. , 0. , 0.6, 0.4, 0. , 0. , 0. , 0. ]])

In [13]:
from sklearn.model_selection import GridSearchCV

grid_params = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

gs = GridSearchCV(
    knn_class, param_grid=grid_params,
    cv=3, verbose=1
)

# Where knn_class is the KNearestNeighbors Classifier
# grid_params contains the testing parameters
# cv is the total number of iterations 
# n_jobs dictates the number of processors to be used
# Verbose prints string at the end of every iteration to check progress


In [14]:
gs_results = gs.fit(X_train, y_train)

Fitting 3 folds for each of 16 candidates, totalling 48 fits


In [16]:
gs_results.best_score_

0.9568166666666666

In [17]:
gs_results.best_estimator_

KNeighborsClassifier(metric='manhattan', n_neighbors=3, weights='distance')

In [19]:
gs_results.best_params_

{'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}