In [1]:
#Importing Libraries
import pandas as pd
import numpy as np
import cv2
import matplotlib as plt
import operator 
from operator import itemgetter
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [2]:
#Importing and Loading dataset
#Source : https://www.askpython.com/python/examples/load-and-plot-mnist-dataset-in-python
from keras.datasets import mnist
(X_train,y_train), (X_test,y_test) = mnist.load_data()

In [3]:
#Calculating Euclidian Distance
def euc_dist(x1, x2):
    return np.sqrt(np.sum((x1-x2)**2))

In [4]:
#Defining KNN Class 
#Source : https://medium.com/analytics-vidhya/a-beginners-guide-to-knn-and-mnist-handwritten-digits-recognition-using-knn-from-scratch-df6fb982748a
class KNN:
    #Initializing K Value
    def __init__(self, K=31):
        self.K = K
    def fit(self, x_train, y_train):
        self.X_train = x_train
        self.Y_train = y_train
    def predict(self, X_test):
        predictions = [] 
        
        #Calculating Euclidian Distance from test point to data points
        for i in range(len(X_test)):
            dist = np.array([euc_dist(X_test[i], x_t) for x_t in   
            self.X_train])
            
            #Sorting the distance in asc order and selecting first K shortest distances
            dist_sorted = dist.argsort()[:self.K]
            
            #Counting the Neighbours
            neigh_count = {}
            for idx in dist_sorted:
                if self.Y_train[idx] in neigh_count:
                    neigh_count[self.Y_train[idx]] += 1
                else:
                    neigh_count[self.Y_train[idx]] = 1
                    
            #Sorting the Neighbours and predicting based on max neighbours        
            sorted_neigh_count = sorted(neigh_count.items(),    
            key=operator.itemgetter(1), reverse=True)
            predictions.append(sorted_neigh_count[0][0]) 
        return predictions

In [5]:
#Splitting the Dataset
X_train=X_train[:6800]
y_train=y_train[:6800]
from sklearn.model_selection import train_test_split

In [6]:
#Defining X and y 
X=X_train
y=y_train

In [7]:
#Initializing 10*10 base confusion matrix
cm_prev = np.zeros((10,10))
for i in range(1,32):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None,shuffle=True)
    
    #Normalizing X and y values
    X_train=X_train/255
    X_test=X_test/255
    
    #Model fitting and predictions
    model = KNN(K = i)
    model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
   
    #Calculating Confusion matrix and accuracy for each value of K
    acc=accuracy_score(y_test,y_pred)
    print("Accuracy when k=",i," is",acc)
    print("\n")
    print("Confusion matrix when k=",i)
    print("\n")
    print(confusion_matrix(y_test, y_pred))
    print("\n")
    cm_now = cm_prev + confusion_matrix(y_test, y_pred)
    cm_prev = cm_now
    
#Formatting the elements in the matrix
#Source:https://stackoverflow.com/questions/63675559/how-can-i-adjust-space-between-elements-of-numpy-array
#Calculating and printing Average Confusion Matrix
np.set_printoptions(formatter={'all': lambda x: " {:.2f} ".format(x)})
print("\n Average confusion matrix without using libraries \n")
print(cm_now/31)

Accuracy when k= 1  is 0.9485294117647058


Confusion matrix when k= 1


[[125   0   0   0   0   0   0   0   0   1]
 [  0 154   0   0   0   0   0   0   0   0]
 [  0   5 135   2   0   0   0   0   2   0]
 [  0   1   1 133   1   2   0   1   2   1]
 [  0   2   0   0 129   0   3   1   0   5]
 [  0   1   0   3   0  98   1   0   1   1]
 [  0   0   0   0   0   2 129   0   0   0]
 [  1   1   0   0   1   0   0 152   0   2]
 [  0   2   0   2   0   0   4   1  93   1]
 [  2   1   1   0   6   0   1   5   0 142]]


Accuracy when k= 2  is 0.9419117647058823


Confusion matrix when k= 2


[[144   0   0   0   0   0   1   1   1   1]
 [  0 143   0   0   0   0   0   1   0   0]
 [  1   5 128   0   0   0   0   3   3   0]
 [  0   0   2 144   0   2   1   1   1   2]
 [  0   1   0   0 131   0   2   0   0   8]
 [  0   0   0   3   2 102   6   1   2   1]
 [  1   0   0   0   0   2 111   0   0   0]
 [  0   3   0   0   0   0   0 139   0   3]
 [  0   2   2   2   0   2   2   1 112   1]
 [  1   0   0   0   4   0   0   1 