In [13]:
import torch
from scipy.stats import mode
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [32]:
class KNN:
    def __init__(self, k):
        """
        k: number of neighbors
        """
        self.k = k
    
    def distance(self, point1, point2, meansure='euclidean', p=2):
        if meansure == 'euclidean':
            return torch.norm(point1 - point2, 2, 0)
        elif meansure == 'manhattan':
            return torch.norm(torch.abs(point1 - point2))
        elif meansure == 'minkowski':
            return torch.norm(torch.sum(torch.abs(point1 - point2)**p), 1/p)
        else:
            raise ValueError("Unknown similarity distance type")

    def fit_predict(self, X, y, item):
        """
        - Iterate through each datapoints (item/y_test) that needs to be classified
        - Find distance between all train data points and each datapoint (item/y_test)
            using D distance with D in [euclidean, manhattan, minkowski]
        - Sort the distance using argsort, it gives indices of the y_test
        - Find the majority label whose distance closest to each datapoint of y_test.

        X: input tensor
        y: ground truth label
        item: tensors to be classified

        return: predicted labels
        """
        y_pred = []
        for i in item:
            point_distances = []
            for ipt in range(X.shape[0]):
                distances = self.distance(X[ipt,:], i) # computing all distances between the test data point and all training data points
                point_distances.append(distances)

            point_distances = torch.tensor(point_distances)
            k_neighbors = torch.argsort(point_distances)[:self.k] # sort ascending and return indices of k neighbors
            y_label = y[k_neighbors]
            major_class = torch.mode(y_label).values
            y_pred.append(major_class)

        return torch.tensor(y_pred)
            

In [None]:
iris = load_iris()
X = torch.tensor(iris.data)
y = torch.tensor(iris.target)

torch.manual_seed(42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = KNN(k = 3)
y_pred = model.fit_predict(X_train, y_train, X_test)
print("Accuracy score: {:.4f}".format(accuracy_score(y_test, y_pred)))


Accuracy score: 0.9667
