In [None]:
import numpy as np
from tensorflow.keras.datasets import mnist
import tqdm

class KNN(object):
  def __init__(self):
    pass

  def train(self, X, y):
    self.X_train = X
    self.y_train = y

  def dist_calculator(self, X, norm=None):
    if norm is None: 
      norm = lambda x: np.sqrt(np.sum(x**2)) 

    num_test = X.shape[0]
    num_train = self.X_train.shape[0]
    dists = np.zeros((num_test, num_train))

    for i in np.arange(num_test):
      for j in np.arange(num_train):
          dists[i][j] = norm(self.X_train[j] - X[i])
    return dists

  def label_predictor(self, dists, k=1):
    num_test = dists.shape[0]
    prediction_y = np.zeros(num_test)

    for i in np.arange(num_test):
      sorted_dists = np.argsort(dists[i])
      nearest_y = self.y_train[sorted_dists[0:k]]
      prediction_y[i] = np.argmax(np.bincount(nearest_y))
    return prediction_y


# Flatten the images
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape((X_train.shape[0], num_pixels))
X_test = X_test.reshape((X_test.shape[0], num_pixels))

# Ensure the labels are integers
y_train = y_train.astype(int)
y_test = y_test.astype(int)

# Normalize pixel values to be between 0 and 1
X_train, X_test = X_train / 255.0, X_test / 255.0

#L1norm = lambda x: np.linalg.norm(x, ord=1)
#L2norm = lambda x: np.linalg.norm(x, ord=2)
#Linfnorm = lambda x: np.linalg.norm(x, ord= np.inf)
#norms = [L1norm, L2norm, Linfnorm]

knn = KNN()
knn.train(X=X_train, y=y_train)
L2_dists = knn.dist_calculator(X=X_test)
prediction_y = knn.label_predictor(dists=L2_dists,k=1)
error = np.mean(prediction_y != y_test)
print(f"Error: {error:.4f}")

num_folds = 5
X_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)

k_values = [1, 2, 3, 5, 7, 10, 15, 20, 25, 30]
errors = []

for value in tqdm.tqdm(k_values):
 for k in k_values:
   knn = KNN()
   error = 0
   for i in range(5):
     X_train2 = np.concatenate(X_train_folds[:i] + X_train_folds[i + 1:], axis=0)
     y_train2 = np.concatenate(y_train_folds[:i] + y_train_folds[i + 1:], axis=0)
     X_test2, y_test2 = X_train_folds[i], y_train_folds[i]
     knn.train(X=X_train2, y=y_train2)
     dists_2 = knn.dist_calculator(X=X_test2, norm=None)
     prediction_y = knn.label_predictor(dists=dists_2, k=k)
     error += np.mean(prediction_y != y_test2)
   error /= 5
   errors.append({"k": k, "norm": norm, "error": error})
   print(f"k: {k}, norm: {norm}, error: {error:.4f}")

print(errors)