# Intro to NN & k-NN.
Use CIFAR10 dataset.

Please do not use any library for NN / k-NN.

Task 1: Implement Nearest Neighbor with L1 and L2 metrics.

Task 2: Implement k-NN with L1 & L2, where k={1,3,5,7} and 4-folds cross-validation to select k.

To run experiments, you can take every 100th element due the size of the dataset.



In [74]:
import numpy as np
from keras.datasets import cifar10

Loading dataset:

In [75]:
(X_train, y_train) , (X_test, y_test) = cifar10.load_data()

Choosing every 100th element:


In [76]:
X_train.shape, X_test.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

In [77]:
X_train = X_train[::100]
y_train = y_train[::100]
X_test = X_test[::100]
y_test = y_test[::100]

In [78]:
X_train.shape, X_test.shape

((500, 32, 32, 3), (100, 32, 32, 3))

In [79]:
X_train = X_train.reshape(X_train.shape[0], 32 * 32 * 3)
X_test = X_test.reshape(X_test.shape[0], 32 * 32 * 3)

In [80]:
X_train.shape, X_test.shape

((500, 3072), (100, 3072))

In [81]:
y_test = [item[0] for item in y_test]

Implementation of NN


In [82]:
class NN(object):
  def __init__(self, distance):
    self.distance = distance

  def train(self, X_train, y_train):
    self.X_train = X_train
    self.y_train = y_train
  
  def predict(self, X_test):
    num_test_cases = X_test.shape[0]
    prediction = np.zeros(num_test_cases, dtype=self.y_train.dtype)

    for i in range(num_test_cases):

      if self.distance == 'l1':
        distances = np.sum(np.abs(self.X_train - X_test[i, :]), axis=1)
        idx = np.argmin(distances)
      elif self.distance == 'l2':
        distances = np.sqrt(np.sum(np.square(self.X_train - X_test[i, :]), axis=1))
        idx = np.argmax(distances)
      
      prediction[i] = self.y_train[idx]

    return prediction

In [83]:
nn = NN(distance='l1')
nn.train(X_train, y_train)
pred = nn.predict(X_test)
acc = np.mean(pred == y_test)
print(f'Accuracy for Nearest Neighbours for L1 distance: {acc * 100} %')

Accuracy for Nearest Neighbours for L1 distance: 24.0 %


In [84]:
nn = NN(distance='l2')
nn.train(X_train, y_train)
pred = nn.predict(X_test)
acc = np.mean(pred == y_test)
print(f'Accuracy for Nearest Neighbours for L2 distance: {acc * 100} %')

Accuracy for Nearest Neighbours for L2 distance: 17.0 %


Implementation of kNN

In [85]:
class kNN(object):
  def __init__(self, distance):
    self.distance = distance

  def train(self, X_train, y_train):
    self.X_train = X_train
    self.y_train = y_train
  
  def predict(self, X_test, k=1):
    num_test_cases = X_test.shape[0]
    prediction = np.zeros(num_test_cases, dtype=self.y_train.dtype)

    for i in range(num_test_cases):

      if self.distance == 'l1':
        distances = np.sum(np.abs(self.X_train - X_test[i, :]), axis=1)
      elif self.distance == 'l2':
        distances = np.sqrt(np.sum(np.square(self.X_train - X_test[i, :]), axis=1))
    
      idx_sort = np.argsort(distances)

      closest_y = self.y_train[idx_sort[:k]]
      closest_y = [item[0] for item in closest_y]
      count = np.bincount(closest_y)
      idx = np.argmax(count)

      prediction[i] = idx

    return prediction

In [86]:
for k in [1, 3, 5, 7]:
  knn = kNN(distance='l1')
  knn.train(X_train, y_train)
  pred = knn.predict(X_test, k=k)
  acc = np.mean(pred == y_test)
  print(f'Accuracy for {k} - Nearest Neighbours for L1 distance: {acc * 100} %')

Accuracy for 1 - Nearest Neighbours for L1 distance: 24.0 %
Accuracy for 3 - Nearest Neighbours for L1 distance: 23.0 %
Accuracy for 5 - Nearest Neighbours for L1 distance: 22.0 %
Accuracy for 7 - Nearest Neighbours for L1 distance: 21.0 %


In [87]:
for k in [1, 3, 5, 7]:
  knn = kNN(distance='l2')
  knn.train(X_train, y_train)
  pred = knn.predict(X_test, k=k)
  acc = np.mean(pred == y_test)
  print(f'Accuracy for {k} - Nearest Neighbours for L2 distance: {acc * 100} %')

Accuracy for 1 - Nearest Neighbours for L2 distance: 12.0 %
Accuracy for 3 - Nearest Neighbours for L2 distance: 15.0 %
Accuracy for 5 - Nearest Neighbours for L2 distance: 12.0 %
Accuracy for 7 - Nearest Neighbours for L2 distance: 12.0 %


kNN with 4-fold cross validation

In [88]:
num_folds = 4

X_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)

In [89]:
for k in [1, 3, 5, 7]:
  for i in range(num_folds):
    knn = kNN(distance='l1')
    X = np.concatenate(X_train_folds[:i] + X_train_folds[i+1:])
    y = np.concatenate(y_train_folds[:i] + y_train_folds[i+1:])
    knn.train(X, y)
    X_val = X_train_folds[i]
    y_val = y_train_folds[i]
    pred = knn.predict(X_val, k=k)
    y_val = [item[0] for item in y_val]
    acc = np.mean(pred == y_val)
    print(f'Accuracy for {k} - Nearest Neighbours for L1 distance: {acc * 100} %')

Accuracy for 1 - Nearest Neighbours for L1 distance: 13.600000000000001 %
Accuracy for 1 - Nearest Neighbours for L1 distance: 23.200000000000003 %
Accuracy for 1 - Nearest Neighbours for L1 distance: 25.6 %
Accuracy for 1 - Nearest Neighbours for L1 distance: 21.6 %
Accuracy for 3 - Nearest Neighbours for L1 distance: 12.8 %
Accuracy for 3 - Nearest Neighbours for L1 distance: 24.0 %
Accuracy for 3 - Nearest Neighbours for L1 distance: 20.0 %
Accuracy for 3 - Nearest Neighbours for L1 distance: 15.2 %
Accuracy for 5 - Nearest Neighbours for L1 distance: 13.600000000000001 %
Accuracy for 5 - Nearest Neighbours for L1 distance: 23.200000000000003 %
Accuracy for 5 - Nearest Neighbours for L1 distance: 18.4 %
Accuracy for 5 - Nearest Neighbours for L1 distance: 19.2 %
Accuracy for 7 - Nearest Neighbours for L1 distance: 11.200000000000001 %
Accuracy for 7 - Nearest Neighbours for L1 distance: 20.8 %
Accuracy for 7 - Nearest Neighbours for L1 distance: 19.2 %
Accuracy for 7 - Nearest Neigh

In [90]:
for k in [1, 3, 5, 7]:
  for i in range(num_folds):
    knn = kNN(distance='l2')
    X = np.concatenate(X_train_folds[:i] + X_train_folds[i+1:])
    y = np.concatenate(y_train_folds[:i] + y_train_folds[i+1:])
    knn.train(X, y)
    X_val = X_train_folds[i]
    y_val = y_train_folds[i]
    pred = knn.predict(X_val, k=k)
    y_val = [item[0] for item in y_val]
    acc = np.mean(pred == y_val)
    print(f'Accuracy for {k} - Nearest Neighbours for L2 distance: {acc * 100} %')

Accuracy for 1 - Nearest Neighbours for L2 distance: 10.4 %
Accuracy for 1 - Nearest Neighbours for L2 distance: 16.0 %
Accuracy for 1 - Nearest Neighbours for L2 distance: 15.2 %
Accuracy for 1 - Nearest Neighbours for L2 distance: 20.0 %
Accuracy for 3 - Nearest Neighbours for L2 distance: 7.199999999999999 %
Accuracy for 3 - Nearest Neighbours for L2 distance: 13.600000000000001 %
Accuracy for 3 - Nearest Neighbours for L2 distance: 19.2 %
Accuracy for 3 - Nearest Neighbours for L2 distance: 16.8 %
Accuracy for 5 - Nearest Neighbours for L2 distance: 12.8 %
Accuracy for 5 - Nearest Neighbours for L2 distance: 16.0 %
Accuracy for 5 - Nearest Neighbours for L2 distance: 20.0 %
Accuracy for 5 - Nearest Neighbours for L2 distance: 18.4 %
Accuracy for 7 - Nearest Neighbours for L2 distance: 16.8 %
Accuracy for 7 - Nearest Neighbours for L2 distance: 16.8 %
Accuracy for 7 - Nearest Neighbours for L2 distance: 17.599999999999998 %
Accuracy for 7 - Nearest Neighbours for L2 distance: 19.2 %