## Implement KNN in Python from Scratch 

In [7]:
from collections import Counter
import numpy as np

## Define the knn function 
def knn(X_train, y_train, X_test, k):
    """
    KNN function that takes in four parameters:
    X_train - training features
    y_train - training labels
    X_test - test features
    k - number of nearest neighbors to consider
    
    Returns a numpy array of predicted labels for the test set.
    """
    # Create a list to store predicted labels
    y_pred = []
    
    for i in range(len(X_test)):
        # Calculate the Euclidean distance between the test point and all points in the training set
        distances = np.sqrt(np.sum((X_train - X_test[i])**2, axis=1))
        
        # Get the indices of the k nearest neighbors
        nearest_neighbors = np.argsort(distances)[:k]
        
        # Get the labels of the k nearest neighbors
        labels = y_train[nearest_neighbors]
        
        # Get the most common label among the nearest neighbors
        most_common_label = Counter(labels).most_common(1)[0][0]
        
        # Append the predicted label to the list
        y_pred.append(most_common_label)
    
    # Convert the list to a numpy array and return it
    return np.array(y_pred)

In [8]:
## Sample dataset
X_train = np.array([[1, 2], [2, 1], [3, 4], [4, 3]])
y_train = np.array([0, 0, 1, 1])
X_test = np.array([[1, 1], [3, 3], [2, 3], [4, 3]])

## Apply the KNN function with k=1
y_pred = knn(X_train, y_train, X_test, 1)

## Print the predicted labels
print(y_pred)

[0 1 0 1]


In [9]:
## Apply the KNN function with k=2
y_pred = knn(X_train, y_train, X_test, 2)

## Print the predicted labels
print(y_pred)

[0 1 0 1]


In [10]:
## Apply the KNN function with k=10
y_pred = knn(X_train, y_train, X_test, 10)

## Print the predicted labels
print(y_pred)

[0 1 0 1]
