In [1]:
import numpy as np
from random import shuffle
from sklearn.datasets import load_iris

In [6]:
def accuracy(preds, targets):
    return (preds == targets).mean().item()

In [3]:
class KNN:
    def __init__(self, k: int):
        self.k = k
        
    def fit(self, X: np.ndarray, y: np.ndarray):
        self.X = X
        self.y = y
    
    def predict(self, x: np.ndarray) -> int:
        distance = ((self.X - x.reshape(1, -1))**2).sum(axis=1)
        k_neighbours = np.argpartition(distance, self.k)[:self.k]
        return np.bincount(self.y[k_neighbours]).argmax()

In [4]:
data = load_iris(as_frame=True)
X = data["data"].values
y = data["target"].values

idx = list(range(len(X)))
shuffle(idx)
X = X[idx]
y = y[idx]

val_size = int(len(X)*0.1)
train_X, val_X = X[val_size:], X[:val_size]
train_y, val_y = y[val_size:], y[:val_size]

In [16]:
classifier = KNN(5)
classifier.fit(train_X, train_y)

preds = np.array([classifier.predict(x) for x in train_X])
accuracy(preds, train_y)

0.9777777777777777

In [17]:
preds = np.array([classifier.predict(x) for x in val_X])
accuracy(preds, val_y)

0.9333333333333333