In [37]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

class CustomKNN:
    def __init__(self,n_neighbours=5):
        self.n_neighbours=n_neighbours
        
    def fit(self,X,y):
        self._X=(X-X.mean())/X.std()
        self._y=y
        
    def predict(self,X):
        results=[]
        X=(X-X.mean())/X.std()
        for point in X:
            results.append(self.predict_point(point))
        return np.array(results,dtype=int)
    
    def score(self,X,y):
        return sum(self.predict(X)==y)/len(y)
    
    def predict_point(self,point):
        list_dist=[]
        for x_point,y_point in zip(self._X,self._y):
            dist_point=((point-x_point)**2).sum()
            list_dist.append([dist_point,y_point])
            
        sorted_dist=sorted(list_dist)
        top_k=sorted_dist[:self.n_neighbours]
        items,counts=np.unique(np.array(top_k)[:1],return_counts=True)
        ans=items[np.argmax(counts)]
        return ans

In [38]:
model=CustomKNN()

In [39]:
data=np.load('mnist_train_small.npy')

In [40]:
data

array([[5, 0, 0, ..., 0, 0, 0],
       [7, 0, 0, ..., 0, 0, 0],
       [9, 0, 0, ..., 0, 0, 0],
       ...,
       [2, 0, 0, ..., 0, 0, 0],
       [9, 0, 0, ..., 0, 0, 0],
       [5, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [41]:
from sklearn.model_selection import train_test_split

In [42]:
X=data[:,1:]
y=data[:,0]

In [43]:
X,y

(array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 array([5, 7, 9, ..., 2, 9, 5], dtype=uint8))

In [44]:
x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size=0.33)
x_test

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [45]:
model.fit(x_train,y_train)

In [46]:
model.predict(x_test[:10])

array([1, 7, 0, 9, 4, 5, 4, 6, 9, 2])

In [47]:
y_test[:10]

array([7, 7, 0, 9, 4, 5, 4, 6, 9, 2], dtype=uint8)

In [49]:
model.score(x_test[:400],y_test[:400])

0.955