In [39]:
from skimage import io
from skimage.transform import resize
import numpy as np
import scipy.ndimage as ndi
import matplotlib.pyplot as plt
import math

def imgToBinaryMatrix(image):
    resize(image, (60, 60), preserve_range=True).astype('uint8')

    # specify a threshold 0-255
    threshold = 150

    # make all pixels < threshold black
    binarized = 1.0 * (image > threshold)

    #convert 3d array to 2d array
    return binarized[:,:,0]


In [40]:
def convertMatrixTo3x3(matrix):
    xs = matrix.shape[0]//3  # division lines for the picture
    ys = matrix.shape[1]//3

    # now slice up the image (in a shape that works well with subplots)
    newMatrix = [[matrix[0:xs, 0:ys], matrix[0:xs, ys:ys*2], matrix[0:xs, ys*2:ys*3]], 
    [matrix[xs:xs*2, 0:ys], matrix[xs:xs*2, ys:ys*2], matrix[xs:xs*2, ys*2:ys*3]], 
    [matrix[xs*2:xs*3, 0:ys], matrix[xs*2:xs*3, ys:ys*2], matrix[xs*2:xs*3, ys*2:ys*3]], 
    ]
    return newMatrix

def convert3x3ToVector(matrix):
    vector = []
    for i in range(3):
        for j in range(3):
            vector.append(np.count_nonzero(matrix[i][j] == 0))
    return vector

"""
fig, axs = plt.subplots(3, 3)
vector = []
for i in range(3):
    for j in range(3):
       axs[i][j].imshow(splits[i][j])
"""

def euclideanDistance(v1, v2):
    distance = 0
    for i in range(len(v1)):
        distance += math.pow((v1[i] - v2[i]),2)
    return math.sqrt(distance)
    
def getComparedVectorsExcept(number, nth):
    vectors = []
    for i in range(10):
        for j in range(1,11):
            if i == number and j == nth:
                    continue
            name = f'baseProjetOCR/{i}_{j}.png'
            matrixImg = imgToBinaryMatrix(io.imread(name))
            matrixImg = convertMatrixTo3x3(matrixImg)
            vectors.append((name,convert3x3ToVector(matrixImg)))
    return vectors

def getFileNameCorrespondingTo(img, comparedVectors):
    matrixImg = imgToBinaryMatrix(img)
    matrixImg = convertMatrixTo3x3(matrixImg)
    sourceVector = convert3x3ToVector(matrixImg)

    allDistances = [euclideanDistance(sourceVector, comparedVectors[i][1]) for i in range(len(comparedVectors))]
    index = allDistances.index(min(allDistances))
    return comparedVectors[index][0]

def getNumberOfImage(s):
    return int(s.split("/",1)[1][0])

def confusionMatrix():
    mat = []
    for i in range(10):
        vectorTmp = [0]*10
        for j in range(11):
            if j == 0:
                continue
            vectors = getComparedVectorsExcept(i, j)
            n = getNumberOfImage(getFileNameCorrespondingTo(io.imread(f'baseProjetOCR/{i}_{j}.png'), vectors))
            vectorTmp[n] += 1
        mat.append(vectorTmp)
    return mat

confMatrix = confusionMatrix()
s = sum([confMatrix[i][i] for i in range(10)])
print(s)
print(confMatrix)

77
[[10, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 7, 0, 1, 0, 0, 1, 0, 0, 1], [0, 0, 9, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 6, 0, 0, 0, 1, 0, 3], [0, 0, 0, 0, 8, 0, 0, 2, 0, 0], [0, 0, 0, 0, 0, 10, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 10, 0, 0, 0], [0, 1, 0, 2, 1, 0, 0, 5, 0, 1], [0, 0, 1, 0, 0, 0, 1, 0, 8, 0], [0, 0, 0, 3, 0, 1, 0, 2, 0, 4]]
