In [61]:

import math
from skimage import io
from skimage.transform import resize
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
import numpy as np
import math

def imgToBinaryVector(image):
    image = io.imread(image)
    image = resize(image, (20, 20), preserve_range=True).astype('uint8')
    threshold = 150
    # make all pixels < threshold black
    binarized = 1.0 * (image > threshold)

    meanKernel = np.full((3, 3), 1.0/9)
    # convert 3d array to 2d array
    binarized = binarized[:, :, 0]
    binarized = ndi.correlate(binarized, meanKernel)
    return np.concatenate(binarized)

def euclideanDistance(v1, v2):
    distance = 0
    for i in range(len(v1)):
        distance += math.pow((v1[i] - v2[i]), 2)
    return math.sqrt(distance)

def getTrainingDistanceForTestSample(X_train, test_sample):
    return [euclideanDistance(train_sample,test_sample) for train_sample in X_train]

def get_most_frequent_element(l):
    return max(l, key=l.count)

def knn(X_train, Y_train, sample, k=3):

    training_disance = getTrainingDistanceForTestSample(X_train, sample)
    sorted_distance_indices = [ 
        pair[0]
        for pair in sorted(enumerate(training_disance), key=lambda x: x[1])
    ]

    candidates = [
        Y_train[idx]
        for idx in sorted_distance_indices[:k]
    ]
    top_candidate = get_most_frequent_element(candidates)
    return top_candidate


def getNumberOfFileName(s):
    return int(s.split("/", 1)[1][0])

def getValuesWithoutSample(dictionnary, key):
    dic = {}
    dic = dictionnary.copy()
    dic.pop(key)
    return dic

def main():
    fileNames = []
    imageData = {}
    mat = np.zeros(shape=(10, 10))

    for i in range(0,10):
        for x in range(1,11):
            name = f'baseProjetOCR/{i}_{x}.png'
            fileNames.append(name)


    for name in fileNames:
        imageData[name]= imgToBinaryVector(name)




    for key in imageData:
        imageDataTest = getValuesWithoutSample(imageData, key)
        n = knn(imageDataTest.values(),
        [getNumberOfFileName(name) for name in imageDataTest.keys()],
        imageData[key])
        mat[getNumberOfFileName(key)][n]+=1


    print("Percentage : " + str(np.trace(mat)))
    print("Confusion matrix :\n" + str(mat))

main()

Percentage : 88.0
Confusion matrix :
[[ 9.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  9.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  9.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  9.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  9.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0. 10.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0. 10.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  9.  0.  0.]
 [ 0.  0.  1.  2.  0.  0.  0.  0.  6.  1.]
 [ 0.  0.  0.  2.  0.  0.  0.  0.  0.  8.]]
