# Nearest Neighbor Classifier
------------


## Pen-Based Recognition of Handwritten Digits Data Set 
---

[image01]: data_abstract.PNG

![Data] [image01]

## Import Dependencies

In [60]:
import numpy as np
from collections import Counter 

## Load Data

In [59]:
def load_data(file):
    return np.loadtxt(file)

## Build the KNN Class

In [104]:
class KNN(object):
    
    def __init__(self, train_data, test_data, k):
        self.train_data = train_data[:, :-1]
        self.train_label = train_data[:, -1]
        self.test_data = test_data[:, :-1]
        self.test_label = test_data[:, -1]
        self.k = k
        self.correct = 0
        
    def normalize(self):
        mean = np.mean(self.train_data, axis=0)
        std_dev = np.std(self.train_data, axis=0)
        
        for i in range(0, self.train_data.shape[1]):
            self.train_data[:, i] = self.train_data[:, i] - mean[i]
            self.train_data[:, i] = self.train_data[:, i] / std_dev[i]
            self.test_data[:, i] = self.test_data[:, i] - mean[i]
            self.test_data[:, i] = self.test_data[:, i] / std_dev[i]
            
    def classify(self):
        for i in range(0, len(self.test_data)):
        
            distance = calculate_distance(self.test_data[i, :], self.train_data)
            distance = np.concatenate([[distance, self.train_label]])
            distance = distance.T
            distance = np.array(sorted(distance, key=lambda x:x[0]))
            self.show_results(distance, i)
            
    def show_results(self, distance, row_number):
        if self.k == 1:
            k_neighbors = distance[self.k, :]
            predicted = k_neighbors[1]
            true = self.test_label[row_number]
        
        elif self.k > 1:
            k_neighbors = distance[0: self.k, :]
            counts = Counter(k_neighbors[:, 1])
            predicted = counts.most_common(1)[0][0]
            true = self.test_label[row_number]
            
        if true == predicted:
            self.correct += 1
            print("correct predictions = ", self.correct)
            
    def show_acc(self):
        print("accuracy when k", self.k, "is ", self.correct/self.test_label.shape[0])
        
        

## Calculate Euclidean Distance

In [94]:
def calculate_distance(x, y):
    distance = x - y
    distance = np.square(distance)
    distance = np.sum(distance, axis = 1)
    distance = np.sqrt(distance)
    return distance


## Main Function calls

In [95]:
k_value = 1
train_file = "pendigits_training.txt"
test_file = "pendigits_test.txt"
train = load_data(train_file)
test = load_data(test_file)

## Print Values of Loaded Data

In [96]:
print(train[:5, :])

[[  47.  100.   27.   81.   57.   37.   26.    0.    0.   23.   56.   53.
   100.   90.   40.   98.    8.]
 [   0.   89.   27.  100.   42.   75.   29.   45.   15.   15.   37.    0.
    69.    2.  100.    6.    2.]
 [   0.   57.   31.   68.   72.   90.  100.  100.   76.   75.   50.   51.
    28.   25.   16.    0.    1.]
 [   0.  100.    7.   92.    5.   68.   19.   45.   86.   34.  100.   45.
    74.   23.   67.    0.    4.]
 [   0.   67.   49.   83.  100.  100.   81.   80.   60.   60.   40.   40.
    33.   20.   47.    0.    1.]]


In [105]:
knn = KNN(train, test, k_value)
knn.normalize()

In [106]:
knn.classify()

correct predictions =  1
correct predictions =  2
correct predictions =  3
correct predictions =  4
correct predictions =  5
correct predictions =  6
correct predictions =  7
correct predictions =  8
correct predictions =  9
correct predictions =  10
correct predictions =  11
correct predictions =  12
correct predictions =  13
correct predictions =  14
correct predictions =  15
correct predictions =  16
correct predictions =  17
correct predictions =  18
correct predictions =  19
correct predictions =  20
correct predictions =  21
correct predictions =  22
correct predictions =  23
correct predictions =  24
correct predictions =  25
correct predictions =  26
correct predictions =  27
correct predictions =  28
correct predictions =  29
correct predictions =  30
correct predictions =  31
correct predictions =  32
correct predictions =  33
correct predictions =  34
correct predictions =  35
correct predictions =  36
correct predictions =  37
correct predictions =  38
correct predictions =

correct predictions =  312
correct predictions =  313
correct predictions =  314
correct predictions =  315
correct predictions =  316
correct predictions =  317
correct predictions =  318
correct predictions =  319
correct predictions =  320
correct predictions =  321
correct predictions =  322
correct predictions =  323
correct predictions =  324
correct predictions =  325
correct predictions =  326
correct predictions =  327
correct predictions =  328
correct predictions =  329
correct predictions =  330
correct predictions =  331
correct predictions =  332
correct predictions =  333
correct predictions =  334
correct predictions =  335
correct predictions =  336
correct predictions =  337
correct predictions =  338
correct predictions =  339
correct predictions =  340
correct predictions =  341
correct predictions =  342
correct predictions =  343
correct predictions =  344
correct predictions =  345
correct predictions =  346
correct predictions =  347
correct predictions =  348
c

correct predictions =  618
correct predictions =  619
correct predictions =  620
correct predictions =  621
correct predictions =  622
correct predictions =  623
correct predictions =  624
correct predictions =  625
correct predictions =  626
correct predictions =  627
correct predictions =  628
correct predictions =  629
correct predictions =  630
correct predictions =  631
correct predictions =  632
correct predictions =  633
correct predictions =  634
correct predictions =  635
correct predictions =  636
correct predictions =  637
correct predictions =  638
correct predictions =  639
correct predictions =  640
correct predictions =  641
correct predictions =  642
correct predictions =  643
correct predictions =  644
correct predictions =  645
correct predictions =  646
correct predictions =  647
correct predictions =  648
correct predictions =  649
correct predictions =  650
correct predictions =  651
correct predictions =  652
correct predictions =  653
correct predictions =  654
c

correct predictions =  925
correct predictions =  926
correct predictions =  927
correct predictions =  928
correct predictions =  929
correct predictions =  930
correct predictions =  931
correct predictions =  932
correct predictions =  933
correct predictions =  934
correct predictions =  935
correct predictions =  936
correct predictions =  937
correct predictions =  938
correct predictions =  939
correct predictions =  940
correct predictions =  941
correct predictions =  942
correct predictions =  943
correct predictions =  944
correct predictions =  945
correct predictions =  946
correct predictions =  947
correct predictions =  948
correct predictions =  949
correct predictions =  950
correct predictions =  951
correct predictions =  952
correct predictions =  953
correct predictions =  954
correct predictions =  955
correct predictions =  956
correct predictions =  957
correct predictions =  958
correct predictions =  959
correct predictions =  960
correct predictions =  961
c

correct predictions =  1221
correct predictions =  1222
correct predictions =  1223
correct predictions =  1224
correct predictions =  1225
correct predictions =  1226
correct predictions =  1227
correct predictions =  1228
correct predictions =  1229
correct predictions =  1230
correct predictions =  1231
correct predictions =  1232
correct predictions =  1233
correct predictions =  1234
correct predictions =  1235
correct predictions =  1236
correct predictions =  1237
correct predictions =  1238
correct predictions =  1239
correct predictions =  1240
correct predictions =  1241
correct predictions =  1242
correct predictions =  1243
correct predictions =  1244
correct predictions =  1245
correct predictions =  1246
correct predictions =  1247
correct predictions =  1248
correct predictions =  1249
correct predictions =  1250
correct predictions =  1251
correct predictions =  1252
correct predictions =  1253
correct predictions =  1254
correct predictions =  1255
correct predictions 

correct predictions =  1520
correct predictions =  1521
correct predictions =  1522
correct predictions =  1523
correct predictions =  1524
correct predictions =  1525
correct predictions =  1526
correct predictions =  1527
correct predictions =  1528
correct predictions =  1529
correct predictions =  1530
correct predictions =  1531
correct predictions =  1532
correct predictions =  1533
correct predictions =  1534
correct predictions =  1535
correct predictions =  1536
correct predictions =  1537
correct predictions =  1538
correct predictions =  1539
correct predictions =  1540
correct predictions =  1541
correct predictions =  1542
correct predictions =  1543
correct predictions =  1544
correct predictions =  1545
correct predictions =  1546
correct predictions =  1547
correct predictions =  1548
correct predictions =  1549
correct predictions =  1550
correct predictions =  1551
correct predictions =  1552
correct predictions =  1553
correct predictions =  1554
correct predictions 

correct predictions =  1816
correct predictions =  1817
correct predictions =  1818
correct predictions =  1819
correct predictions =  1820
correct predictions =  1821
correct predictions =  1822
correct predictions =  1823
correct predictions =  1824
correct predictions =  1825
correct predictions =  1826
correct predictions =  1827
correct predictions =  1828
correct predictions =  1829
correct predictions =  1830
correct predictions =  1831
correct predictions =  1832
correct predictions =  1833
correct predictions =  1834
correct predictions =  1835
correct predictions =  1836
correct predictions =  1837
correct predictions =  1838
correct predictions =  1839
correct predictions =  1840
correct predictions =  1841
correct predictions =  1842
correct predictions =  1843
correct predictions =  1844
correct predictions =  1845
correct predictions =  1846
correct predictions =  1847
correct predictions =  1848
correct predictions =  1849
correct predictions =  1850
correct predictions 

correct predictions =  2110
correct predictions =  2111
correct predictions =  2112
correct predictions =  2113
correct predictions =  2114
correct predictions =  2115
correct predictions =  2116
correct predictions =  2117
correct predictions =  2118
correct predictions =  2119
correct predictions =  2120
correct predictions =  2121
correct predictions =  2122
correct predictions =  2123
correct predictions =  2124
correct predictions =  2125
correct predictions =  2126
correct predictions =  2127
correct predictions =  2128
correct predictions =  2129
correct predictions =  2130
correct predictions =  2131
correct predictions =  2132
correct predictions =  2133
correct predictions =  2134
correct predictions =  2135
correct predictions =  2136
correct predictions =  2137
correct predictions =  2138
correct predictions =  2139
correct predictions =  2140
correct predictions =  2141
correct predictions =  2142
correct predictions =  2143
correct predictions =  2144
correct predictions 

correct predictions =  2405
correct predictions =  2406
correct predictions =  2407
correct predictions =  2408
correct predictions =  2409
correct predictions =  2410
correct predictions =  2411
correct predictions =  2412
correct predictions =  2413
correct predictions =  2414
correct predictions =  2415
correct predictions =  2416
correct predictions =  2417
correct predictions =  2418
correct predictions =  2419
correct predictions =  2420
correct predictions =  2421
correct predictions =  2422
correct predictions =  2423
correct predictions =  2424
correct predictions =  2425
correct predictions =  2426
correct predictions =  2427
correct predictions =  2428
correct predictions =  2429
correct predictions =  2430
correct predictions =  2431
correct predictions =  2432
correct predictions =  2433
correct predictions =  2434
correct predictions =  2435
correct predictions =  2436
correct predictions =  2437
correct predictions =  2438
correct predictions =  2439
correct predictions 

correct predictions =  2703
correct predictions =  2704
correct predictions =  2705
correct predictions =  2706
correct predictions =  2707
correct predictions =  2708
correct predictions =  2709
correct predictions =  2710
correct predictions =  2711
correct predictions =  2712
correct predictions =  2713
correct predictions =  2714
correct predictions =  2715
correct predictions =  2716
correct predictions =  2717
correct predictions =  2718
correct predictions =  2719
correct predictions =  2720
correct predictions =  2721
correct predictions =  2722
correct predictions =  2723
correct predictions =  2724
correct predictions =  2725
correct predictions =  2726
correct predictions =  2727
correct predictions =  2728
correct predictions =  2729
correct predictions =  2730
correct predictions =  2731
correct predictions =  2732
correct predictions =  2733
correct predictions =  2734
correct predictions =  2735
correct predictions =  2736
correct predictions =  2737
correct predictions 

correct predictions =  3005
correct predictions =  3006
correct predictions =  3007
correct predictions =  3008
correct predictions =  3009
correct predictions =  3010
correct predictions =  3011
correct predictions =  3012
correct predictions =  3013
correct predictions =  3014
correct predictions =  3015
correct predictions =  3016
correct predictions =  3017
correct predictions =  3018
correct predictions =  3019
correct predictions =  3020
correct predictions =  3021
correct predictions =  3022
correct predictions =  3023
correct predictions =  3024
correct predictions =  3025
correct predictions =  3026
correct predictions =  3027
correct predictions =  3028
correct predictions =  3029
correct predictions =  3030
correct predictions =  3031
correct predictions =  3032
correct predictions =  3033
correct predictions =  3034
correct predictions =  3035
correct predictions =  3036
correct predictions =  3037
correct predictions =  3038
correct predictions =  3039
correct predictions 

correct predictions =  3299
correct predictions =  3300
correct predictions =  3301
correct predictions =  3302
correct predictions =  3303
correct predictions =  3304
correct predictions =  3305
correct predictions =  3306
correct predictions =  3307
correct predictions =  3308
correct predictions =  3309
correct predictions =  3310
correct predictions =  3311
correct predictions =  3312
correct predictions =  3313
correct predictions =  3314
correct predictions =  3315
correct predictions =  3316
correct predictions =  3317
correct predictions =  3318
correct predictions =  3319
correct predictions =  3320
correct predictions =  3321
correct predictions =  3322
correct predictions =  3323
correct predictions =  3324
correct predictions =  3325
correct predictions =  3326
correct predictions =  3327
correct predictions =  3328
correct predictions =  3329
correct predictions =  3330
correct predictions =  3331
correct predictions =  3332
correct predictions =  3333
correct predictions 

In [107]:
knn.show_acc()

accuracy when k 1 is  0.9745568896512292


## Data - Pendigits.txt  16 Dimensional Data 
-----------------

[image01]: data.PNG

![Data] [image01]
