In [7]:
import numpy as np
from scipy.spatial import distance
from sklearn.metrics import accuracy_score, f1_score

from dataset import get_2D_normalised, get_dimensionlly_reduced

In [8]:
class KNN:
    def __init__(self, k):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = []

        for x in X:
            distances = distance.cdist([x], self.X_train, metric='euclidean')[0]
            k_nearest_indices = np.argpartition(distances, self.k)[:self.k]
            k_nearest_labels = self.y_train[k_nearest_indices]
            unique_labels, counts = np.unique(k_nearest_labels, return_counts=True)
            most_common = unique_labels[np.argmax(counts)]
            y_pred.append(most_common)

        return np.array(y_pred)

# Without PCA

In [9]:
(x_train_wo_pca, y_train_wo_pca), (x_test_wo_pca, y_test_wo_pca) = get_2D_normalised()

In [10]:
knn_wo_pca = KNN(5)
knn_wo_pca.fit(x_train_wo_pca, y_train_wo_pca)

In [12]:
y_pred_wo_pca = knn_wo_pca.predict(x_test_wo_pca)

In [14]:
accuracy_wo_pca = accuracy_score(y_test_wo_pca, y_pred_wo_pca)

In [15]:
print("Accuracy without PCA: ", accuracy_wo_pca)

Accuracy without PCA:  0.3398


# With PCA

In [21]:
(x_train_w_pca, y_train_w_pca), (x_test_w_pca, y_test_w_pca) = get_dimensionlly_reduced(components=1024, needed=128)

In [22]:
knn_w_pca = KNN(5)
knn_w_pca.fit(x_train_w_pca, y_train_w_pca)

In [23]:
y_pred_w_pca = knn_w_pca.predict(x_test_w_pca)

In [24]:
accuracy_w_pca = accuracy_score(y_test_w_pca, y_pred_w_pca)

In [25]:
print("Accuracy with PCA: ", accuracy_w_pca)

Accuracy with PCA:  0.3764
