In [45]:
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from os import listdir
from os.path import isfile, join
import string


In [46]:
# load single example
def load_example( img_path ):

    Y = img_path[img_path.rfind('_')+1:-4]

    img = Image.open( img_path )
    img_mat = np.asarray( img )
    
    n_letters = len( Y )
    im_height = int(img_mat.shape[0])
    im_width = int(img_mat.shape[1]/n_letters)
    n_pixels = im_height*im_width
    
    X = np.zeros( [int(n_pixels+n_pixels*(n_pixels-1)/2),n_letters])
    for i in range(n_letters):
        
        # single letter
        letter = img_mat[:,i*im_width:(i+1)*im_width]/255
        
        # compute features
        x = letter.flatten()
        X[0:len(x),i] = x
        cnt = n_pixels 
        for j in range(0,n_pixels-1):
            for k in range(j+1,n_pixels):
                X[cnt,i] = x[j]*x[k]
                cnt = cnt + 1
           
        X[:,i] = X[:,i]/np.linalg.norm(X[:,i])
        
    return X, Y, img

# load all examples from a folder    
def load_examples( image_folder ):
    
    files = [f for f in listdir(image_folder) if isfile(join(image_folder, f))]

    X = []
    Y = []
    img = []
    for file in listdir(image_folder):
        path = join(image_folder, file)
        if isfile( path ):
                        
            X_,Y_,img_ = load_example( path )
            X.append( X_ )
            Y.append( Y_ )
            img.append( img_ )
        
    return X, Y, img

In [47]:
# Global Variables

training_size = 1000
testing_size = 500

alpha = 0.01

epochs = 30
features = 8256
labels = 26
letterDict = {}
L = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
for i in range(len(L)):
    letterDict[L[i]] = i

In [58]:
# Training Phase
def train(trn_X, trn_Y):
    
    # Add one column to each element of trn_X (for biases)
    X = [np.vstack([x, np.ones(x.shape[1])]) for x in trn_X]
    
    # Convert letters into numbers
    Y = [np.array([letterDict.get(char) for char in word]) for word in trn_Y]
    
    # Parameters
    WB = np.zeros((labels, features+1)) # Weight and Biases
    G = np.zeros((labels, labels)) # Matrix for pair-wise dependency
    
    for i in range(epochs):
        print(f"Epoch {i+1}", end="\r")
        flag = True
        
        # For each word
        for fm, word in zip(X, Y):
            
            # Predict the word
            p = predict(fm, WB, G)

            # Use tuple to compare all elements with =
            if p != tuple(word):   
                for char in range(len(word)):
                    if p[char] != word[char]:
                        # Update parameters
                        WB[p[char], :] -= fm[:, char]
                        WB[word[char], :] += fm[:, char]
                        
                        # If we're not looking at the first character, update G
                        if char > 0:
                            G[word[char-1], p[char]] -= 0.1
                            G[word[char-1], word[char]] += 0.1   
            flag = False
        
        # If no more misclassifications
        if flag:
            print("Optimum reached")
            break
        
    return WB, G

# Classifier
def predict(fm, WB, G):
    
    Letters = fm.shape[1] # number of characters in this word
    
    # Matrix multiplication
    q = np.matmul(WB, fm) 
    F = np.zeros((26, Letters))
    F[:, 0] = q[:, 0]
    
    # For each character
    for l in range(1, Letters):
        # For each letter of the alphabet
        for i in range(26):
            qi = q[i, l] # extract the relative result of the matrix multiplication
            maxF = np.max(F[:, l-1] + G[:, i])  # get the max over all y
            F[i, l] = qi + maxF
    
    # Get the argmax considering F and G, except for the last character
    predictions = np.zeros(Letters, dtype=int)
    for i in range(Letters-1, 0, -1):
        predictions[i-1] = np.argmax(F[:, i-1] + G[:, predictions[i]])
    predictions[-1] = np.argmax(F[:, -1])
    
    # Use tuple to compare all elements with =
    return tuple(predictions)


# Testing Phase
def test(tst_X, WB, G):
    
    # Add one column to each element of trn_X (for biases)
    X = [np.vstack([x, np.ones(x.shape[1])]) for x in tst_X]
    
    predictions = []
    for fm in X:
        predictions.append(predict(fm, WB, G))
    
    return predictions
        
    

In [49]:
# load data

# load training examples
trn_X, trn_Y, trn_img = load_examples( 'ocr_names_images/trn' )

# load testing examples
tst_X, tst_Y, tst_img = load_examples( 'ocr_names_images/tst' )

In [59]:
print("Training")
print("...")
WB, G = train(trn_X, trn_Y)
print()
print("Weights, biases and pair-wise dependency learned!")
print()
print("Testing")
res = test(tst_X, WB, G)
print("Predictions done")

Training
...
Epoch 30
Weights and Biases Learned!

Testing
Predictions done


In [63]:
# Convert letters into numbers
Y = [np.array([letterDict.get(char) for char in word]) for word in tst_Y]

# Sequence Error
num = 0
for i in range(500):
    if(tuple(res[i]) != tuple(Y[i])):
        num+=1
print("Sequence error: ", f"{num/500:.4f}")

# Charachter Error
num = 0
word = 0
for y in Y:
    y = list(y)
    for char in range(len(y)):
        if(y[char] != list(res[word])[char]):
            num+=1
    word+=1
print("Charachter error: ", f"{num/sum([len(l) for l in tst_Y]):.4f}")

Sequence error:  0.2340
Charachter error:  0.1062
