# K-Nearest Neighbors

Why Machines Learn: Chapter 5

In [2]:
pip install numpy

Collecting numpy
  Downloading numpy-2.2.2-cp313-cp313-macosx_11_0_arm64.whl.metadata (116 kB)
Downloading numpy-2.2.2-cp313-cp313-macosx_11_0_arm64.whl (14.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: numpy
Successfully installed numpy-2.2.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
from collections import Counter

## Euclidean Distance

$\text{Euclidean Distance} = \sqrt{\sum_{i=1}^{n} (x_i - y_i)^2}$

In [4]:
class KNN:
    def __init__(self, k=3): # k (int): Number of nearest neighbors
        """
        Initialize k-NN classifier.
        """
        self.k = k

    def fit(self, X, y): # X: Training features, shape (n_samples, n_features); y: Training labels, shape (n_samples,)
        """
        Store the training data.
        """
        self.X_train = np.array(X)
        self.y_train = np.array(y)

    def predict(self, X): # X: Test features, shape (n_samples, n_features)
        """
        Predict the class labels for the input data.

        Returns: Predicted labels, shape (n_samples,).
        """
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)

    def _predict(self, x): # x: Test sample, shape (n_features,)
        """
        Predict the label for a single sample.

        Returns: Predicted label for the sample.
        """
        distances = np.linalg.norm(self.X_train - x, axis=1) # Euclidean distance 
        # Find the k nearest neighbors
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = self.y_train[k_indices]
        # Majority vote for the most common label
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

In [7]:
if __name__ == "__main__":
    X_train = [[1, 2], [2, 3], [3, 4], [6, 7], [7, 8], [8, 9]]
    y_train = [0, 0, 0, 1, 1, 1]

    X_test = [[5, 5], [2, 2], [7, 7]]

    k = 3
    knn = KNN(k=k)
    knn.fit(X_train, y_train)

    predictions = knn.predict(X_test)
    print("Predictions:", predictions)

Predictions: [0 0 1]


# Manhattan Distance

$
\text{Manhattan Distance} = \sum_{i=1}^{n} |x_i - y_i|
$

In [8]:
import numpy as np
from collections import Counter

class KNN:
    def __init__(self, k=2): # k: number of nn
        """
        Initialize the k-NN classifier.
        """
        self.k = k

    def fit(self, X, y): # X: Training features, shape; y: Training labels, shape
        """
        Store the training data.
        """
        self.X_train = np.array(X)
        self.y_train = np.array(y)

    def predict(self, X): # X: Test features, shape (n_samples, n_features)
        """
        Predict the class labels for the input data.
        
        Returns: Predicted labels, shape (n_samples,).
        """
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)

    def _predict(self, x): # x: Test sample, shape (n_features,)
        """
        Predict the label for a single sample.

        Returns: Predicted label for the sample.
        """
        distances = np.sum(np.abs(self.X_train - x), axis=1) # Manhattan distance
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = self.y_train[k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

In [9]:
if __name__ == "__main__":
    X_train = [[1, 2], [2, 3], [3, 4], [6, 7], [7, 8], [8, 9]]
    y_train = [0, 0, 0, 1, 1, 1]

    X_test = [[5, 5], [2, 2], [7, 7]]

    k = 2
    knn = KNN(k=k)
    knn.fit(X_train, y_train)

    predictions = knn.predict(X_test)
    print("Predictions:", predictions)

Predictions: [0 0 1]
