In [1]:
from collections import Counter
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [3]:
def read_preprocess(filename):
    f = open(filename)
    raw_data = f.readlines()
    
    point_lst = []
    for ele in raw_data:
        point = []
        for value in ele.split():
            point.append(float(value))
        point_lst.append(point)
        
    return np.array(point_lst)

In [4]:
filename = './zip.train'
training_array = read_preprocess(filename)

In [5]:
filename = './zip.test'
testing_array = read_preprocess(filename)

In [6]:
X_train = training_array[:,1:]
Y_train = training_array[:,0]

X_test = testing_array[:,1:]
Y_test = testing_array[:, 0]

In [7]:
print X_train.shape
print Y_train.shape
print X_test.shape
print Y_test.shape

(7291, 256)
(7291,)
(2007, 256)
(2007,)


In [8]:
def dist(X_test , X_train):
    dist_lst = []
    for point in X_test:
        dist_lst.append(np.sum(np.power(point - X_train, 2), axis = 1))
    return np.array(dist_lst)

In [9]:
def knn(k, dist_lst):
    
    prediction = []
    
    index = np.argsort(dist_lst)
    
    knn_points =  index[:, :k]
    
    for row in Y_train[knn_points]:
        prediction.append(Counter(row).most_common(1)[0][0])
    
    return np.array(prediction)

In [10]:
dist_lst = dist(X_test, X_train)

In [11]:
kone_predictions = knn(1, dist_lst)
ktwo_predictions = knn(2, dist_lst)
kthree_predictions = knn(3, dist_lst)

In [13]:
kone_confusion_matrix= confusion_matrix(Y_test, kone_predictions)
np.around(kone_confusion_matrix/np.sum(kone_confusion_matrix, axis = 1, dtype='float'), decimals=3)

array([[ 0.989,  0.   ,  0.01 ,  0.   ,  0.   ,  0.   ,  0.   ,  0.007,
         0.   ,  0.006],
       [ 0.   ,  0.966,  0.   ,  0.   ,  0.03 ,  0.   ,  0.012,  0.007,
         0.   ,  0.   ],
       [ 0.017,  0.004,  0.924,  0.012,  0.005,  0.   ,  0.   ,  0.014,
         0.018,  0.   ],
       [ 0.008,  0.   ,  0.01 ,  0.928,  0.   ,  0.031,  0.   ,  0.   ,
         0.   ,  0.011],
       [ 0.   ,  0.011,  0.005,  0.   ,  0.91 ,  0.006,  0.012,  0.014,
         0.006,  0.045],
       [ 0.006,  0.004,  0.01 ,  0.024,  0.   ,  0.906,  0.012,  0.   ,
         0.018,  0.006],
       [ 0.   ,  0.   ,  0.005,  0.   ,  0.01 ,  0.019,  0.965,  0.   ,
         0.   ,  0.   ],
       [ 0.   ,  0.004,  0.005,  0.006,  0.02 ,  0.   ,  0.   ,  0.946,
         0.   ,  0.006],
       [ 0.014,  0.   ,  0.005,  0.036,  0.005,  0.006,  0.   ,  0.007,
         0.892,  0.017],
       [ 0.   ,  0.   ,  0.005,  0.   ,  0.01 ,  0.   ,  0.   ,  0.027,
         0.006,  0.955]])

In [14]:
ktwo_confusion_matrix= confusion_matrix(Y_test, ktwo_predictions)
np.around(ktwo_confusion_matrix/np.sum(ktwo_confusion_matrix, axis = 1, dtype='float'), decimals= 3)

array([[ 0.989,  0.   ,  0.015,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.   ,  0.006],
       [ 0.   ,  0.981,  0.   ,  0.   ,  0.015,  0.   ,  0.006,  0.007,
         0.   ,  0.   ],
       [ 0.028,  0.004,  0.899,  0.006,  0.005,  0.   ,  0.   ,  0.014,
         0.03 ,  0.   ],
       [ 0.008,  0.   ,  0.01 ,  0.928,  0.   ,  0.019,  0.   ,  0.   ,
         0.012,  0.011],
       [ 0.   ,  0.011,  0.02 ,  0.   ,  0.865,  0.006,  0.012,  0.014,
         0.006,  0.079],
       [ 0.011,  0.004,  0.01 ,  0.048,  0.   ,  0.856,  0.   ,  0.   ,
         0.024,  0.023],
       [ 0.011,  0.   ,  0.005,  0.   ,  0.01 ,  0.012,  0.941,  0.   ,
         0.006,  0.   ],
       [ 0.   ,  0.008,  0.005,  0.006,  0.02 ,  0.   ,  0.   ,  0.905,
         0.006,  0.028],
       [ 0.014,  0.   ,  0.015,  0.006,  0.   ,  0.006,  0.   ,  0.007,
         0.922,  0.011],
       [ 0.003,  0.   ,  0.005,  0.   ,  0.01 ,  0.   ,  0.   ,  0.027,
         0.006,  0.949]])

In [15]:
kthree_confusion_matrix= confusion_matrix(Y_test, kthree_predictions)
np.around(kthree_confusion_matrix/np.sum(kthree_confusion_matrix, axis = 1, dtype='float'), decimals= 3)

array([[ 0.989,  0.   ,  0.01 ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
         0.006,  0.006],
       [ 0.   ,  0.977,  0.   ,  0.   ,  0.015,  0.   ,  0.012,  0.007,
         0.   ,  0.   ],
       [ 0.022,  0.   ,  0.924,  0.006,  0.005,  0.   ,  0.   ,  0.014,
         0.018,  0.   ],
       [ 0.008,  0.   ,  0.01 ,  0.922,  0.   ,  0.038,  0.   ,  0.007,
         0.   ,  0.006],
       [ 0.   ,  0.008,  0.   ,  0.   ,  0.91 ,  0.012,  0.012,  0.014,
         0.006,  0.051],
       [ 0.014,  0.   ,  0.015,  0.018,  0.   ,  0.9  ,  0.   ,  0.   ,
         0.006,  0.023],
       [ 0.008,  0.004,  0.005,  0.   ,  0.01 ,  0.   ,  0.959,  0.   ,
         0.   ,  0.   ],
       [ 0.   ,  0.004,  0.005,  0.006,  0.02 ,  0.   ,  0.   ,  0.939,
         0.006,  0.006],
       [ 0.011,  0.   ,  0.005,  0.024,  0.   ,  0.006,  0.   ,  0.007,
         0.916,  0.017],
       [ 0.003,  0.   ,  0.   ,  0.   ,  0.015,  0.   ,  0.   ,  0.027,
         0.006,  0.949]])