In [1]:
import numpy as np
import time

In [2]:
# Load data set and code labels as 0 = 'NO', 1 = 'DH', 2 = 'SL'
labels = ['NO', 'DH', 'SL']
data = np.loadtxt('spine-data.txt', converters={6: lambda s: labels.index(s)})

In [3]:
train_data = data[:250, :6]
train_labels = data[:250, 6].astype(int)

test_data = data[250:, :6]
test_labels = data[250:, 6].astype(int)

In [4]:
# L2 Distance
def L2_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# L1 Distance
def L1_distance(a, b):
    return np.sum(np.abs(a - b))

In [5]:
# L1 Nearest Neighbor Classifier
def L1_find_NN(x):
    distances = [L1_distance(x,train_data[i,]) for i in range(len(train_labels))]
    return np.argmin(distances)

def L1_NN_classifier(x):
    index = L1_find_NN(x)
    return train_labels[index]

In [6]:
# L2 Nearest Neighbor Classifier
def L2_find_NN(x):
    distances = [L2_distance(x,train_data[i,]) for i in range(len(train_labels))]
    return np.argmin(distances)

def L2_NN_classifier(x):
    index = L2_find_NN(x)
    return train_labels[index]

In [7]:
## A success case:
print("A success case:")
print("L1 NN classification: ", L1_NN_classifier(test_data[0,]))
print("True label: ", test_labels[0])


A success case:
L1 NN classification:  2
True label:  2


In [8]:
## A success case:
print("A success case:")
print("L2 NN classification: ", L2_NN_classifier(test_data[40,]))
print("True label: ", test_labels[0])


A success case:
L2 NN classification:  1
True label:  2


In [9]:
## Predict on each test data point (and time it!)
t_before = time.time()
l1_test_predictions = [L1_NN_classifier(test_data[i,]) for i in range(len(test_labels))]
t_after = time.time()

## Compute the error
err_positions = np.not_equal(l1_test_predictions, test_labels)
error = float(np.sum(err_positions))/len(test_labels)

print("Error of l1 nearest neighbor classifier: ", error)
print("Classification time (seconds): ", t_after - t_before)

Error of l1 nearest neighbor classifier:  0.21666666666666667
Classification time (seconds):  0.02718210220336914


In [10]:
## Predict on each test data point (and time it!)
t_before = time.time()
l2_test_predictions = [L2_NN_classifier(test_data[i,]) for i in range(len(test_labels))]
t_after = time.time()

## Compute the error
err_positions = np.not_equal(l2_test_predictions, test_labels)
error = float(np.sum(err_positions))/len(test_labels)

print("Error of l2 nearest neighbor classifier: ", error)
print("Classification time (seconds): ", t_after - t_before)

Error of l2 nearest neighbor classifier:  0.23333333333333334
Classification time (seconds):  0.03246593475341797


In [11]:
l1_confusion_matrix = np.zeros((3,3), dtype=int)
for i,j in zip(test_labels, l1_test_predictions):
    l1_confusion_matrix[i,j] += 1

print("Confusion Matrix for L1:")
print(l1_confusion_matrix)

Confusion Matrix for L1:
[[14  0  2]
 [ 9  9  0]
 [ 1  1 24]]


In [12]:
l2_confusion_matrix = np.zeros((3,3), dtype=int)
for i,j in zip(test_labels, l2_test_predictions):
    l2_confusion_matrix[i,j] += 1

print("Confusion Matrix for L2:")
print(l2_confusion_matrix)

Confusion Matrix for L2:
[[12  1  3]
 [ 9  9  0]
 [ 1  0 25]]
