# KNN from scratch using numpy

Class = majority of k nearest neighbors in the dataset

In [None]:
import numpy as np
from collections import Counter

In [None]:
def euclideanDistance(x1, x2):
  return np.sqrt(np.sum(x1-x2)**2)

In [None]:
class KNN:
  def __init__(self, k=30):
    self.k = k

  def fit(self, X, y):
    self.XTrain = X
    self.yTrain = y

  def predict(self, X):
    predictions = [self._predict(x) for x in X]
    return predictions

  def _predict(self, x):
    distances = [euclideanDistance(x, X) for X in self.XTrain]

    kIdxs = np.argsort(distances)[:self.k]
    kLabels = [self.yTrain[i] for i in kIdxs]

    mostCommon = Counter(kLabels).most_common()
    return mostCommon[0][0]

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import datasets

iris = datasets.load_iris()
X, y = iris.data, iris.target

XTrain, XTest, YTrain, YTest = train_test_split(
    X, y, test_size=0.2, random_state=0)

classifier = KNN(k=5)
classifier.fit(XTrain, YTrain)
predictions = classifier.predict(XTest)


def accuracy(yTest, yPred):
  return np.sum(yTest == yPred) / len(yTest)


acc = accuracy(YTest, predictions)

In [None]:
acc

In [None]:
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap

cmap = ListedColormap(["#F00", "#0F0", "#00F"])

plt.figure()
plt.scatter(X[:, 2], X[:, 3], c=y, cmap=cmap, edgecolor='k', s=20)
plt.show()