In [1]:
import numpy as np
from statistics import mode

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y): #k-NN is a lazy learner, meaning it doesn’t "learn" during training but memorizes the data to use later during prediction
        self.X_train = X
        self.y_train = y

    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        return mode(k_nearest_labels)

In [2]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
X, y = make_classification(n_samples = 50,
                                       n_features = 2,
                                       n_informative = 2,
                                       n_redundant = 0,
                                       n_classes = 2,
                                       weights = [0.51, .49])

X_train, X_test,y_train, y_test = train_test_split(X,y ,
                                   random_state=104, 
                                   test_size=0.25, 
                                   shuffle=True)

knn = KNN(k=2)
knn.fit(X_train, y_train)
prediction = knn.predict(X_test)

print("Predicted class:", prediction)
print("Actual class:", y_test)

Predicted class: [1 1 1 1 0 1 0 0 0 1 1 1 1]
Actual class: [1 1 1 1 0 1 0 0 0 1 1 0 0]
