In [20]:
import numpy as np
import scipy.io as spio
import scipy.stats as stats
import math

def EuclideanDistance(x,y):
    return np.linalg.norm(np.array(y)-np.array(x))

def L1Distance(x,y):
    return np.sum(abs(np.array(y)-np.array(x)))

def LinfDistance(x,y):
    return max(abs(np.array(y)-np.array(x)))

class kNNModel:
    
    def __init__(self, dist_function):
        self.dist_function = dist_function
        
    # computes the distance from x to each element in lst, which is a list of vectors
    def __dist(self, x, lst, lst_labels):
        dists = [self.dist_function(x,e) for e in lst]
        vec_label_dist_tuple = list(zip(lst,lst_labels,dists))
        return vec_label_dist_tuple
                
    def train_model(self,training_data,labels):
        # k will be log(# of data points) rounded to the nearest integer
        self.k = int(np.log(len(training_data)) + 0.5)
        self.data = training_data.tolist()
        self.labels = labels
        
    def predict(self,x):
        vec_label_dist_tuple = self.__dist(x,self.data,self.labels)
        # sort by distance
        vec_label_dist_tuple_sorted = sorted(vec_label_dist_tuple, key=lambda f: f[2])
        # get an array containing (label,distance) tuples
        k_closest = [list((e[1],e[2])) for e in vec_label_dist_tuple_sorted[:self.k]]
        # group by frequency of label
        count_dic = {}
        for tup in k_closest:
            if tup[0] not in count_dic:
                count_dic[tup[0]] = 1.0
            else:
                count_dic[tup[0]] += 1.0
        
        prediction = max(count_dic, key=count_dic.get)
        return prediction          
        
    def testing_error(self,test_data,labels):
        misses=0
        for i in range(len(test_data)):
            if(self.predict(test_data[i])!=labels[i]):
                misses=misses+1
        return (misses*1.0/len(test_data))
    
mat = spio.loadmat('hw1data.mat', squeeze_me=True)
image_matrix=mat['X']
label_array=mat['Y']   

kNN_model_L2 = kNNModel(EuclideanDistance)
kNN_model_L2.train_model(image_matrix[:1000,:],label_array[:1000])
L2_test_error = kNN_model_L2.testing_error(image_matrix[1001:1101,:],label_array[1001:1101])
print("L2 Test error: " + str(L2_test_error))

kNN_model_L1 = kNNModel(L1Distance)
kNN_model_L1.train_model(image_matrix[:1000,:],label_array[:1000])
L1_test_error = kNN_model_L1.testing_error(image_matrix[1001:1101,:],label_array[1001:1101])
print("L1 Test error: " + str(L1_test_error))

kNN_model_Linf = kNNModel(LinfDistance)
kNN_model_Linf.train_model(image_matrix[:1000,:],label_array[:1000])
Linf_test_error = kNN_model_Linf.testing_error(image_matrix[1001:1101,:],label_array[1001:1101])
print("Linf Test error: " + str(Linf_test_error))

L2 Test error: 0.11
L1 Test error: 0.14
Linf Test error: 0.47
