In [1]:
!pip install tqdm numpy

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
!ls

Untitled1.ipynb      lost+found      mnist_train.csv
cifar-10-batches-py  mnist_test.csv


In [14]:
import numpy as np
from tqdm import tqdm
from collections import Counter

class KNNClassifier:
    def __init__(self, k=1):
        if k > 0:
            self.k = k
        else:
            raise ValueError('Please provide a valid value for k')
    
    def fit(self, X, y):
        assert X.shape[0] == y.shape[0], "Equal number of samples and labels expected"
        
        self.X = X
        self.y = y
    
    def predict(self, X):
        
        labels = []
        
        for x in tqdm(X):
            dist = [np.linalg.norm(x-x_train) for x_train in self.X]
            
            k_samples = np.argsort(dist)[:self.k]
            
            k_labels = [self.y[i][0] for i in k_samples]
            
            label = Counter(k_labels).most_common(1)

            labels.append(label[0][0])
        
        return labels
    
    def accuracy(self, X, y):
        assert X.shape[0] == y.shape[0], "Equal number of samples and labels expected"
        
        labels = self.predict(X)
        
        total = 0
        correct = 0
        
        for i in range(len(labels)):
            total += 1
            
            if labels[i] == y[i]:
                correct += 1
        
        print('\n\nAccuracy {}%'.format(correct/total*100))
        
        return correct/total*100

In [4]:
import pandas as pd
import numpy as np

In [5]:
train = pd.read_csv('mnist_train.csv')
train.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
test = pd.read_csv('mnist_test.csv')
test.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
X_train = train.drop(['label'], axis=1).values 
y_train = train['label'].values

X_train = X_train.reshape(60000, 784)
y_train = y_train.reshape(60000, 1)

In [18]:
X_test = test.drop(['label'], axis=1).values 
y_test = test['label'].values

X_test = X_test.reshape(10000, 784)[0:100]
y_test = y_test.reshape(10000, 1)[0:100]

In [10]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(60000, 784) (60000, 1)
(10000, 784) (10000, 1)


In [15]:
clf = KNNClassifier(k=3)

In [16]:
clf.fit(X_train, y_train)

In [19]:
clf.accuracy(X_test, y_test)


  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:01<02:48,  1.70s/it][A
  2%|▏         | 2/100 [00:03<02:46,  1.70s/it][A
  3%|▎         | 3/100 [00:05<02:44,  1.70s/it][A
  4%|▍         | 4/100 [00:06<02:43,  1.70s/it][A
  5%|▌         | 5/100 [00:08<02:42,  1.71s/it][A
  6%|▌         | 6/100 [00:10<02:40,  1.71s/it][A
  7%|▋         | 7/100 [00:11<02:39,  1.71s/it][A
  8%|▊         | 8/100 [00:13<02:36,  1.70s/it][A
  9%|▉         | 9/100 [00:15<02:34,  1.70s/it][A
 10%|█         | 10/100 [00:17<02:32,  1.69s/it][A
 11%|█         | 11/100 [00:18<02:33,  1.72s/it][A
 12%|█▏        | 12/100 [00:20<02:30,  1.72s/it][A
 13%|█▎        | 13/100 [00:22<02:29,  1.71s/it][A
 14%|█▍        | 14/100 [00:23<02:28,  1.73s/it][A
 15%|█▌        | 15/100 [00:25<02:28,  1.74s/it][A
 16%|█▌        | 16/100 [00:27<02:26,  1.74s/it][A
 17%|█▋        | 17/100 [00:29<02:23,  1.73s/it][A
 18%|█▊        | 18/100 [00:30<02:20,  1.71s/it][A
 19%|█▉        | 19/100 [00:3



Accuracy 99.0%





99.0

In [21]:
clf.predict([X_test[50]])


  0%|          | 0/1 [00:00<?, ?it/s][A
100%|██████████| 1/1 [00:01<00:00,  1.73s/it][A


[6]

In [22]:
y_test[50]

array([6])