In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from collections import Counter

In [2]:
iris = load_iris()

In [3]:
X,y = iris.data,iris.target

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1234)

In [5]:
print(f"Shape of X_train is {X_train.shape}")
print(f"Shape of X_test is {X_test.shape}")
print(f"Shape of y_train is {y_train.shape}")
print(f"Shape of y_test is {y_test.shape}")

Shape of X_train is (120, 4)
Shape of X_test is (30, 4)
Shape of y_train is (120,)
Shape of y_test is (30,)


In [6]:
def euclidean_distance(x1,x2):
    return np.sqrt(np.sum((x1-x2)**2))


In [7]:
class KNN:
    def __init__(self,k=3):
        self.k = k
    
    def fit(self,X,y):
        self.X_train = X
        self.y_train = y
    
    def predict(self,X):
        predicted_labels = [self._predict(x) for x in X]
        return np.array(predicted_labels)
        
    def _predict(self,x):
        #compute distances
        distances = [euclidean_distance(x,x_train) for x_train in self.X_train]
        #get k nearest samples,labels
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        #majority vote, most comman class label
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]
        

In [8]:
clf = KNN(k=3)

In [9]:
clf.fit(X_train,y_train)

In [10]:
predictions = clf.predict(X_test)

In [11]:
accuracy = np.sum(predictions ==y_test)/len(y_test)
print(accuracy)

1.0


In [12]:
predictions

array([1, 1, 2, 0, 1, 0, 0, 0, 1, 2, 1, 0, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1,
       1, 1, 2, 0, 2, 1, 2, 0])

# from sklearn

In [13]:
from sklearn.neighbors import KNeighborsClassifier

In [14]:
model = KNeighborsClassifier(n_neighbors=3)

In [15]:
model.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=3)

In [16]:
model.score(X_test,y_test)

1.0

In [17]:
sk_predictions = model.predict(X_test)

In [18]:
sk_predictions

array([1, 1, 2, 0, 1, 0, 0, 0, 1, 2, 1, 0, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1,
       1, 1, 2, 0, 2, 1, 2, 0])

# Confusion Matrix

In [19]:
from sklearn.metrics import confusion_matrix


In [20]:
confusion_matrix(y_test,sk_predictions)

array([[ 9,  0,  0],
       [ 0, 13,  0],
       [ 0,  0,  8]])

In [21]:
confusion_matrix(y_test,predictions)

array([[ 9,  0,  0],
       [ 0, 13,  0],
       [ 0,  0,  8]])