# **KNN from scratch**

In [4]:
from sklearn.metrics import accuracy_score, confusion_matrix
from collections import Counter
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

class KNearestNeighbors:
  def __init__(self, k):
    self.k = k

  def chi_square(self, x1, x2, eps = 1e-10):
    return np.sum((x1-x2)**2 / (x1+2+eps))

  def fit(self, x_train, y_train):
    self.x_train=x_train
    self.y_train = y_train

  def predict_single(self, x):
    distances = []
    index = 0

    for x_train_sample in self.x_train:
      distances.append((index, self.chi_square(x,x_train_sample)))
      index = index + 1

    distances.sort(key = lambda x: x[1])

    k_index = [idx for idx, _ in distances[:self.k]]
    k_labels = [self.y_train[idx] for idx in k_index]

    most_common = Counter(k_labels).most_common(1)[0][0]
    return most_common

  def predict(self, x_test):
    return np.array([self.predict_single(x) for x in x_test])


iris = load_iris()
x = iris.data
y = iris.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

knn = KNearestNeighbors(k=3)
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

1.0
[[11  0  0]
 [ 0  7  0]
 [ 0  0 12]]
