In [None]:
import numpy as np
import tensorflow as tf

#Load and Prepare the MNIST Dataset

In [None]:
# Load the MNIST dataset and split it into training and test sets
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Type conversion to float
x_train = x_train.astype(float)
y_train = y_train.astype(float)
x_text = x_test.astype(float)
y_test = y_test.astype(float)

# Only use the first 2000 training examples
x_train = x_train[0:2000,:,:]
y_train = y_train[0:2000]

#Nearest Neighbor Classifier

## Nearest neighbor classification function

In [None]:
def nearest_neighbor(_x_test):
  # Check to make sure input is the correct shape (28x28)
  assert np.shape(_x_test) == np.shape(x_train[0])
  # Nearest neighbor using l2 distance
  nn = np.argmin(np.linalg.norm((np.array(x_train) - np.array(_x_test)), axis=(1,2)))
  y_pred = y_train[nn]
  return y_pred

##Predict the classes for the first 1000 test images

In [None]:
y_hat = []
for i in range(1000):
  y_hat.append(nearest_neighbor(x_test[i]))

print('The empirical risk when using the missclassification loss function =', np.count_nonzero(y_hat - y_test[:1000]) / 1000)
print('The empirical risk when using the squared error loss function =', sum((y_hat - y_test[:1000])**2) / 1000)

The empirical risk when using the missclassification loss function = 0.127
The empirical risk when using the squared error loss function = 2.103


#K Nearest Neighbor Classifier

##K nearest neighbor classification function

In [None]:
def k_nearest_neighbor(_k, _x_test):
  # Check to make sure input is the correct shape (28x28)
  assert np.shape(_x_test) == np.shape(x_train[0])
  # k nearest neighbor using l2 distance
  nn = np.take(np.argpartition(np.linalg.norm((np.array(x_train) - np.array(_x_test)), axis=(1,2)), range(_k)), np.arange(_k))
  y_pred = np.bincount(y_train[nn].astype(int)).argmax()
  return y_pred

##Predict the classes for the first 1000 test images

In [None]:
k = 10
y_hat = []
for i in range(1000):
  y_hat.append(k_nearest_neighbor(k, x_test[i]))

print('The empirical risk when using the missclassification loss function =', np.count_nonzero(y_hat - y_test[:1000]) / 1000)
print('The empirical risk when using the squared error loss function =', sum((y_hat - y_test[:1000])**2) / 1000)

The empirical risk when using the missclassification loss function = 0.136
The empirical risk when using the squared error loss function = 2.394
