# k-Nearest Neighbor

1) Implement the k-Nearest Neighbor algorithm with Python!
2) It should be applicable for classification and regression tasks!
3) Extend your algorithm with a weighting function (see the lecture slides)

In [6]:
import numpy as np
from collections import Counter
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor


In [15]:
# YOUR CODE HERE

class PythonKNN:
    def __init__(self, k=3, task='classification', distance_metric='euclidean', weight_function=None): # default k = 3
        self.k = k
        self.task = task.lower()
        self.distance_metric = distance_metric
        self.weight_function = weight_function

        # Initialiser
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def distance(self, x1, x2):
        if self.distance_metric == 'euclidean':
            return np.sqrt(np.sum((x1 - x2) ** 2))
        else:
            raise ValueError("Unsupported distance metric")

    def predict(self, X):
        predictions = []

        for x in X:
            # compute the distance
            distances = np.array([self.distance(x, x_train) for x_train in self.X_train])

            # get the closest k
            k_indices = np.argsort(distances)[:self.k] # indices of the training datapoints, sorted by their distance to the given test point)
            k_nearest_labels = self.y_train[k_indices] # list of labels corresponding to k_indices
            k_nearest_distance = distances[k_indices] # list of actual distances between test points and the nearest k


            # If weight function exists
            if self.weight_function:
                weights = self.weight_function(k_nearest_distance) 
            else: 
                weights = np.ones(len(k_nearest_distance)) # default weight = 1


            if self.task == 'classification':
                weighted_counter = Counter()
                for i, label in enumerate(k_nearest_labels):
                    weighted_counter[label] += weights[i]
                prediction = max(weighted_counter, key=weighted_counter.get) # Get most common class
            elif self.task == 'regression':
                prediction = np.dot(k_nearest_labels, weights) / np.sum(weights)
            else:
                raise ValueError("Unsupported task")
            
            predictions.append(prediction)


        return predictions
    

In [8]:
# Weighing function for inverse distance
def inverse_distance_weight(distance):
    return 1 / (distance + 1e-9) # 1e-9 is added to ensure no division by 0

## Compare the results with the sklearn kNN algorithm

In [17]:
# Make your code work with the following inputs:
# Use this data as your reference data points and labels

# Classification data
cl_X = np.arange(0, 9).reshape(9,1)
cl_y = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
print('Classification input:\n', cl_X, '\tShape:', cl_X.shape)
print('Classification labels:\n', cl_y, '\tShape:', cl_y.shape)


# Regression data
r_X = np.array([[1, 2], [2, 4], [3, 6], [4, 8], [5, 10]])
r_y = np.array([1, 2, 3, 4, 5])
print('Regression input:\n', r_X, '\tShape:', r_X.shape)
print('Regression labels:\n', r_y, '\tShape:', r_y.shape)


# Use the following list to test your code for classification
cl_X_test = np.array([[2.1], [5.2], [7.2]])

r_X_test = np.array([[2.1, 5.1], [2.6, 6.2]])

Classification input:
 [[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]] 	Shape: (9, 1)
Classification labels:
 [0 0 0 1 1 1 2 2 2] 	Shape: (9,)
Regression input:
 [[ 1  2]
 [ 2  4]
 [ 3  6]
 [ 4  8]
 [ 5 10]] 	Shape: (5, 2)
Regression labels:
 [1 2 3 4 5] 	Shape: (5,)


In [16]:
# classification (dont forget the weighting function)
print("Classification Predictions")

# yours
python_knn_cl = PythonKNN(k=3, task='classification', weight_function=inverse_distance_weight)
python_knn_cl.fit(cl_X, cl_y)
python_knn_cl_predictions = python_knn_cl.predict(cl_X_test)
print(f"Python KNN: {python_knn_cl_predictions}")

# sklearn comparison
sklearn_knn_cl = KNeighborsClassifier(n_neighbors=3, weights='distance')
sklearn_knn_cl.fit(cl_X, cl_y)
sklearn_cl_predictions = sklearn_knn_cl.predict(cl_X_test)
print(f"sklearn KNN: {sklearn_cl_predictions}")

# Accuracy score
classification_accuracy = accuracy_score(sklearn_cl_predictions, python_knn_cl_predictions)
print(f"Accuracy: {classification_accuracy}")

Classification Predictions
Python KNN: [0, 1, 2]
sklearn KNN: [0 1 2]
Accuracy: 1.0


In [37]:
# regression (dont forget the weighting function)
print("Regression Predictions")

# yours
python_knn_r = PythonKNN(k=3, task='regression', weight_function=inverse_distance_weight)
python_knn_r.fit(r_X, r_y)
python_knn_r_predictions = python_knn_r.predict(r_X_test)
print(f"Python KNN: {python_knn_r_predictions}")

# sklearn comparison
sklearn_knn_r = KNeighborsRegressor(n_neighbors=3, weights='distance')
sklearn_knn_r.fit(r_X, r_y)
sklearn_r_predictions = sklearn_knn_r.predict(r_X_test)
print(f"sklearn KNN: {sklearn_r_predictions}")

# Mean square error (MSE)
regression_mse = mean_squared_error(sklearn_r_predictions, python_knn_r_predictions)
print(f"MSE: {regression_mse}")

Regression Predictions
Python KNN: [2.2414313012651355, 3.0]
sklearn KNN: [2.2414313 3.       ]
MSE: 3.042402328611397e-21
