<a href="https://colab.research.google.com/github/heshamg124/mltest/blob/master/MNIST_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST KNN CLassifier with 97% Accurasy

## Initialize

In [2]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

# Where to save the figures
PROJECT_ROOT_DIR = "."
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
os.makedirs(IMAGES_PATH, exist_ok=True)


## Fetch Data

In [3]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'DESCR', 'details', 'categories', 'url'])

## Peepare Data

In [4]:
X, y = mnist["data"], mnist["target"]
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

print('Training: ', X_train.shape)
print('Test: ', X_test.shape)


Training:  (60000, 784)
Test:  (10000, 784)


## Train KNN Model

In [5]:
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train, y_train)


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

## Accuracy Score

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
y_pred = knn_clf.predict(X_test)


knn_acc = accuracy_score(y_true=y_test, y_pred=y_pred)
knn_prc = precision_score(y_true=y_test, y_pred=y_pred, average=None)
knn_rec = recall_score(y_true=y_test, y_pred=y_pred, average=None)
knn_f1 = f1_score(y_true=y_test, y_pred=y_pred, average=None)
knn_conf = confusion_matrix(y_true=y_test, y_pred=y_pred)

print('Accuracy: {:.4f}'.format(knn_acc))
print('\nPrecision: ',knn_prc)
print('\nRecall: ', knn_rec)
print('\nF1-score: ', knn_f1)
print('\nConfusion Matrix: ',knn_conf)


Accuracy: 0.9705

Precision:  [0.96626984 0.95773457 0.98224852 0.96347483 0.97535934 0.96625422
 0.98333333 0.96494645 0.98917749 0.96031746]

Recall:  [0.99387755 0.99823789 0.96511628 0.96633663 0.96741344 0.96300448
 0.98538622 0.96400778 0.93839836 0.95936571]

F1-score:  [0.97987928 0.97756687 0.97360704 0.96490361 0.97137014 0.96462661
 0.98435871 0.96447689 0.96311907 0.95984135]

Confusion Matrix:  [[ 974    1    1    0    0    1    2    1    0    0]
 [   0 1133    2    0    0    0    0    0    0    0]
 [  10    9  996    2    0    0    0   13    2    0]
 [   0    2    4  976    1   13    1    7    3    3]
 [   1    6    0    0  950    0    4    2    0   19]
 [   6    1    0   11    2  859    5    1    3    4]
 [   5    3    0    0    3    3  944    0    0    0]
 [   0   21    5    0    1    0    0  991    0   10]
 [   8    2    4   16    8   11    3    4  914    4]
 [   4    5    2    8    9    2    1    8    2  968]]
