# KNN Digits
Implementing a simple KNN to classify digits.

In [1]:
import cv2 as cv
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

#### Data Prep

In [2]:
df = pd.read_csv('numbers.csv')

df.tail(3)

Unnamed: 0,numbers,file_path
1401,11,media/kills_only/loop_01401.jpg
1402,19,media/kills_only/loop_01402.jpg
1403,19,media/kills_only/loop_01403.jpg


Remove 'null' values. This improves accuracy a few %.

In [3]:
df = df.loc[df.numbers != 'n']
df = df.loc[df.numbers != 'e']
df = df.loc[df.numbers != 'b']

df = df.reset_index()

df.tail(3)

Unnamed: 0,index,numbers,file_path
1197,1401,11,media/kills_only/loop_01401.jpg
1198,1402,19,media/kills_only/loop_01402.jpg
1199,1403,19,media/kills_only/loop_01403.jpg


Make list of arrays of images. `.flatten()` them so they're 1D.

In [4]:
X = [cv.imread(fp).flatten() for fp in df.file_path.values]
y = df.numbers.values

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

#### Create & Train Model
And output an array of predictions (just to see what they look like).

In [5]:
knn = KNeighborsClassifier(n_neighbors=3)

In [6]:
knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

In [7]:
knn.predict(X_test)

array(['6', '5', '11', '6', '3', '5', '1', '2', '11', '10', '14', '12',
       '11', '6', '11', '11', '12', '4', '8', '12', '1', '1', '11', '5',
       '9', '6', '12', '6', '12', '10', '10', '1', '7', '4', '2', '11',
       '9', '9', '17', '10', '9', '8', '1', '10', '14', '8', '13', '6',
       '11', '7', '13', '2', '0', '9', '10', '10', '0', '1', '12', '8',
       '13', '6', '10', '5', '10', '7', '11', '5', '10', '9', '8', '5',
       '6', '6', '10', '9', '6', '6', '0', '12', '11', '11', '11', '10',
       '6', '10', '9', '4', '6', '0', '11', '10', '10', '5', '6', '10',
       '5', '6', '1', '1', '11', '6', '10', '5', '1', '0', '10', '1', '6',
       '12', '1', '11', '4', '8', '10', '6', '9', '0', '9', '13', '2',
       '1', '14', '10', '12', '0', '12', '5', '5', '6', '10', '1', '10',
       '9', '3', '3', '11', '0', '3', '1', '9', '9', '7', '12', '11',
       '10', '7', '0', '10', '5', '13', '1', '1', '5', '11', '1', '13',
       '2', '0', '9', '14', '10', '5', '2', '9', '11', '3', '

#### Score Model

In [8]:
preds = knn.predict(X_test)

n_correct = np.sum(preds == y_test)
n_possible = len(y_test)

print(f'n_correct: {n_correct}\nn_possible: {n_possible}\n% correct: {n_correct/n_possible*100}%')

n_correct: 220
n_possible: 240
% correct: 91.66666666666666%


## What's wrong? Predicted v Actual
Incorrect predictions on the left, actual values (labels) on the right. (Assumes labels are correct.)

In [9]:
for i in range(len(preds)):
    if preds[i] != y_test[i]:
        print(preds[i], y_test[i])
        print()

11 21

12 11

1 10

10 12

6 5

10 18

6 0

10 11

0 6

5 6

6 0

1 10

0 7

0 9

10 12

1 5

9 0

0 6

0 2

0 1

