In [3]:
import math
from skimage import io
from skimage.transform import resize
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
import numpy as np


def imgToBinaryMatrix(image, sizeX=20, sizeY=20):
    image = resize(image, (sizeX, sizeY), preserve_range=True).astype('uint8')
    threshold = 150
    # make all pixels < threshold black
    binarized = 1.0 * (image > threshold)

    # convert 3d array to 2d array
    return binarized[:, :, 0]


def getNumberOfFileName(s):
    return int(s.split("/", 1)[1][0])

def euclideanDistance(v1, v2):
    distance = 0
    for i in range(len(v1)):
        distance += math.pow((v1[i] - v2[i]), 2)
    return math.sqrt(distance)

#region KNN
def getTrainingDistanceForTestSample(X_train, test_sample):
    return [euclideanDistance(train_sample,test_sample) for train_sample in X_train]

def get_most_frequent_element(l):
    return max(l, key=l.count)

def knn(X_train, Y_train, sample, k=3):

    training_disance = getTrainingDistanceForTestSample(X_train, sample)
    sorted_distance_indices = [ 
        pair[0]
        for pair in sorted(enumerate(training_disance), key=lambda x: x[1])
    ]

    candidates = [
        Y_train[idx]
        for idx in sorted_distance_indices[:k]
    ]
    top_candidate = get_most_frequent_element(candidates)
    return top_candidate

def getValuesWithoutSample(dictionnary, key):
    dic = {}
    dic = dictionnary.copy()
    dic.pop(key)
    return dic

def getKnnConfusionMatrix():
    fileNames = []
    imageData = {}
    mat = np.zeros(shape=(10, 10))

    for i in range(0,10):
        for x in range(1,11):
                name = f'baseProjetOCR/{i}_{x}.png'
                fileNames.append(name)


    for name in fileNames:
        image = io.imread(name)
        imMatrix= imgToBinaryMatrix(image, 35, 6)
        meanKernel = np.full((3, 3), 1.0/9)
        imMatrix = ndi.correlate(imMatrix, meanKernel)
        imageData[name]=np.concatenate(imMatrix)

    for key in imageData:
        imageDataTest = getValuesWithoutSample(imageData, key)
        n = knn(imageDataTest.values(),
        [getNumberOfFileName(name) for name in imageDataTest.keys()],
        imageData[key],5)
        mat[getNumberOfFileName(key)][n]+=1
        return mat


#endregion

#region zoning

def convertMatrixTo4x4(matrix):
    xs = matrix.shape[0]//4  # division lines for the picture
    ys = matrix.shape[1]//4

    # now slice up the image (in a shape that works well with subplots)
    newMatrix = [[matrix[0:xs, 0:ys], matrix[0:xs, ys:ys*2], matrix[0:xs, ys*2:ys*3], matrix[0:xs, ys*3:ys*4]],
                 [matrix[xs:xs*2, 0:ys], matrix[xs:xs*2, ys:ys*2],
                     matrix[xs:xs*2, ys*2:ys*3], matrix[xs:xs, ys*3:ys*4]],
                 [matrix[xs*2:xs*3, 0:ys], matrix[xs*2:xs*3, ys:ys*2],
                     matrix[xs*2:xs*3, ys*2:ys*3], matrix[xs*2:xs*3, ys*3:ys*4]],
                 [matrix[xs*3:xs*4, 0:ys], matrix[xs*3:xs*4, ys:ys*2],
                  matrix[xs*3:xs*4, ys*3:ys*4], matrix[xs*3:xs*4, ys*3:ys*4]]
                 ]
    return newMatrix

def convert4x4ToVector(matrix):
    vector = []
    for i in range(4):
        for j in range(4):
            vector.append(np.sum(matrix[i][j]))
    return vector

def getComparedVectorsExcept(number, nth):
    vectors = []
    for i in range(10):
        for j in range(1, 11):
            if i == number and j == nth:
                continue
            name = f'baseProjetOCR/{i}_{j}.png'
            matrixImg = imgToBinaryMatrix(io.imread(name))
            matrixImg = convertMatrixTo4x4(matrixImg)
            vectors.append((name, convert4x4ToVector(matrixImg)))
    return vectors



def getFileNameCorrespondingTo(img, comparedVectors):
    matrixImg = imgToBinaryMatrix(img)
    meanKernel = np.full((3, 3), 1.0/9)
    matrixImg = ndi.correlate(matrixImg, meanKernel)
    matrixImg = convertMatrixTo4x4(matrixImg)
    sourceVector = convert4x4ToVector(matrixImg)

    allDistancesZoning = [euclideanDistance(sourceVector, comparedVectors[i][1]) for i in range(len(comparedVectors))]

    indexOfMinDistance = allDistancesZoning.index(min(allDistancesZoning))
    return comparedVectors[indexOfMinDistance][0]


def getZoningConfusionMatrix():
    mat = np.zeros(shape=(10, 10))
    for i in range(10):
        for j in range(11):
            if j == 0:
                continue
            vectors = getComparedVectorsExcept(i, j)
            n = getNumberOfFileName(getFileNameCorrespondingTo(
                io.imread(f'baseProjetOCR/{i}_{j}.png'), vectors))
            mat[i][n] += 1
    return mat

#endregion

#region profilVH

SIZE = 60
THRESHOLD = 150
FIRST = 0
SECOND = 0
confusionMatrix = [[0 for col in range(10)] for row in range(10)]


def bold(str):
    return '\033[1m' + str + '\033[0m'

def MapToHProfile(matrix: np.ndarray):
    return SIZE - matrix.sum()
    
def getHorizontaleProfile(img):
    img = resize(img, (SIZE, SIZE), preserve_range=True).astype('uint8')
    binarized = 1.0 * (img > THRESHOLD)
    binarized = binarized[:,:,0]
    return (list(map(MapToHProfile, binarized)),binarized)

def getAllVectors():
    vectors = []
    for i in range(0,10):
        for j in range(1,11):
            name = f'baseProjetOCR/{i}_{j}.png'
            img = io.imread(name)
            vector = getHorizontaleProfile(img)[0]
            vectors.append((name,vector))
    return vectors

ALL_VECTORS = getAllVectors()
    

# read in image as 8 bit grayscale
# img = io.imread(f'baseProjetOCR/{FIRST}_{SECOND}.png')

# hProfile = getHorizontaleProfile(img)
# vector = hProfile[0]
# img = hProfile[1]

def getProfileVHConfusionMatrix():
    global FIRST, SECOND
    for i in range(0,10):
        for j in range(1,11):
            FIRST = i
            SECOND = j
            unknownImageFile = f'baseProjetOCR/{i}_{j}.png'
            #print(f'reading file { bold(unknownImageFile.split("/")[1]) }', end=' ')
            unknownIMG = io.imread(unknownImageFile)
            unknownHProfile = getHorizontaleProfile(unknownIMG)
            found = guessNumber(unknownHProfile[0], ALL_VECTORS)
            #print('-- ' + ("success" if int(found) == i else ("wrong (found " + found + " instead of " + str(i) + ")")) + " !")
            confusionMatrix[i][int(found)] += 1
    return (np.matrix(confusionMatrix))

def findMatch(aVector, vectors):
    global FIRST,SECOND
    highest = 'null'
    highScore = 0
    for item in vectors:
        filename = item[0]
        numbers = numbersFromFile(filename)
        if (numbers[0] == FIRST and numbers[1] == SECOND):
            continue
        vals = item[1]
        score = 0
        for index in range(SIZE):
            difference = max(aVector[index],vals[index]) - min(aVector[index],vals[index])
            score += difference if (difference <= 3) else 0
        if (score > highScore):
            highest = item[0]
            highScore = score
    #print(f'-- best match: { bold(highest.split("/")[1]) }', end=' ')
    return highest

def numbersFromFile(name):
    exactName = name.split('/')[1]
    first = exactName[0]
    second = exactName[2]
    return (first, second)

def guessNumber(aVector, vectors):
    return numbersFromFile(findMatch(aVector, vectors))[0]
    
def getSuccessRate():
    sum = 0
    for i in range(10):
        sum += confusionMatrix[i][i]
    print(f'Taux de succès : {sum} %')
    return sum
#endregion

KNNconfusion = getKnnConfusionMatrix()
VHProfileConfusion = getProfileVHConfusionMatrix()
ZoningConfusion = getZoningConfusionMatrix()

print(KNNconfusion)
print(VHProfileConfusion)
print(ZoningConfusion)

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[7 1 0 0 0 0 1 0 0 1]
 [0 6 0 0 3 0 0 1 0 0]
 [0 0 7 0 0 0 1 0 0 2]
 [0 0 0 8 0 1 1 0 0 0]
 [0 2 0 0 3 0 0 5 0 0]
 [0 0 2 0 0 8 0 0 0 0]
 [0 0 1 0 1 1 7 0 0 0]
 [0 1 1 0 2 1 0 5 0 0]
 [2 0 0 0 0 0 0 0 8 0]
 [0 0 1 0 0 1 0 0 1 7]]
[[10.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  8.  0.  0.  1.  0.  0.  0.  0.  1.]
 [ 0.  0. 10.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  8.  0.  0.  0.  1.  0.  1.]
 [ 0.  0.  0.  0.  9.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0. 10.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0. 10.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  9.  0.  0.]
 [ 1.  0.  1.  1.  0.  0.  0.  0.  7.  0.]
 [ 0.  0.  0.  2.  0.  0.  0.  0.  1.  7.]]
