**K Nearest Neighbours Model using MNIST Dataset**

1.Importing important libraries for the model.

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

2.K-Nearest Neighbours Model

In [0]:
class k_nearest_neighbours:
    
    #'fit' method takes X_train,y_train,k(no.of nearest neighbours) as arguments and saves them.
    def fit(self,X_train,y_train,k):
        self.k = k
        self.X_train = X_train
        self.y_train = y_train

    # 'predict' method takes X_test as input and returns the prediction y_pred
    def predict(self,X_test):
        m_train = self.X_train.shape[0] 
        n = self.X_train.shape[1] 
        m_test = X_test.shape[0] 
        self.y_train.reshape(m_train,1)
        y_pred = np.zeros((m_test,1))
        for i in range(m_test):
          p = X_test[i,:]
          dis = np.sqrt(np.sum((p-self.X_train)**2,axis =1))         # calculating distance between training set and test example
          dis = dis.reshape(m_train,1)
          dis = np.hstack((dis,self.y_train))
          dis = dis[np.argsort(dis[:,0])]                            # sorting distances 
          near_neighbours = dis[0:self.k,1]                          # storing the first k indices
          y_pred[i][0] = stats.mode(near_neighbours)[0][0]           # finding mode of the indices
        return y_pred.flatten()
        
    # Computes Accuracy
    def accuracy(self,y_test,y_pred):
        m = len(y_test)
        sum1=0
        for i in range(m):
          if(y_test[i]==y_pred[i]):
              sum1+=1
        return (sum1/m)*100            

    # splits the dataset into training set and test set based on input split fraction.
    def test_train_split(self,X,y,size):
        m_test = int(X.shape[0]*size)
        X_test = X[0:m_test,:]
        y_test = y[0:m_test]
        X_train = X[m_test:,:]
        y_train = y[m_test:]
        return X_train,X_test,y_train,y_test   
                  

3.Reading Training Dataset using pandas

In [0]:
df = pd.read_csv('sample_data/mnist_train_small.csv',header = None)

In [0]:
X_train = np.array(df.iloc[:,1:785])
y_train = np.array(df.iloc[:,0:1])

4.Reading Test Dataset

In [0]:
df1 = pd.read_csv('sample_data/mnist_test.csv',header = None)
X_test = np.array(df1.iloc[:,1:785])
y_test = np.array(df1.iloc[:,0])

5.Using 'a' as our model;fitting the model.

In [0]:
a = k_nearest_neighbours()
a.fit(X_train,y_train,3)

In [0]:
y_pred = a.predict(X_test)

In [0]:
y_pred.astype('int64')

array([7, 2, 1, ..., 4, 5, 6])

In [0]:
print("Test Accuracy:",a.accuracy(y_test,y_pred))

Test Accuracy: 95.95
