In [1]:
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from os import listdir
from os.path import isfile, join
import string

In [2]:
# load single example
def load_example( img_path ):

    Y = img_path[img_path.rfind('_')+1:-4]

    img = Image.open( img_path )
    img_mat = np.asarray( img )
    
    n_letters = len( Y )
    im_height = int(img_mat.shape[0])
    im_width = int(img_mat.shape[1]/n_letters)
    n_pixels = im_height*im_width
    
    X = np.zeros( [int(n_pixels+n_pixels*(n_pixels-1)/2),n_letters])
    for i in range(n_letters):
        
        # single letter
        letter = img_mat[:,i*im_width:(i+1)*im_width]/255
        
        # compute features
        x = letter.flatten()
        X[0:len(x),i] = x
        cnt = n_pixels 
        for j in range(0,n_pixels-1):
            for k in range(j+1,n_pixels):
                X[cnt,i] = x[j]*x[k]
                cnt = cnt + 1
           
        X[:,i] = X[:,i]/np.linalg.norm(X[:,i])
        
    return X, Y, img

# load all examples from a folder    
def load_examples( image_folder ):
    
    files = [f for f in listdir(image_folder) if isfile(join(image_folder, f))]

    X = []
    Y = []
    img = []
    for file in listdir(image_folder):
        path = join(image_folder, file)
        if isfile( path ):
                        
            X_,Y_,img_ = load_example( path )
            X.append( X_ )
            Y.append( Y_ )
            img.append( img_ )
        
    return X, Y, img

In [3]:
# Global Variables

training_size = 1000
testing_size = 500

alpha = 0.01

epochs = 30
features = 8256
labels = 26
letterDict = {}
L = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
for i in range(len(L)):
    letterDict[L[i]] = i

In [4]:
# Training Phase
def train(trn_X, trn_Y):
    
    # Add one column to each element of trn_X (for biases)
    X = [np.vstack([x, np.ones(x.shape[1])]) for x in trn_X]
    
    # Convert letters into numbers
    Y = [tuple(np.array([letterDict.get(char) for char in word])) for word in trn_Y]

    # Weight and Biases matrix
    WB = np.zeros((labels, features+1))
    
    # Create a dictionary for the sequences (grouped by length)
    vDict = dict()
    for word in Y:
        numL = len(word)
        if numL not in vDict:
            vDict[numL] = dict()
        values = vDict[numL]
        if word not in values:
            values[word] = 0

    # Learn
    for i in range(epochs+1):
        print(f"Epoch: {i}", end="\r")
        flag = True
        
        for fm, word in zip(X, Y):
            
            # Predict the word
            p = predict(fm, WB, vDict)
            
            if tuple(p) != tuple(word):
                flag = False
                
                # Update parameters
                temp1 = np.zeros((labels, features+1))
                temp2 = np.zeros((labels, features+1))
                temp1[word, :] = 1
                temp2[p, :] = 1
                for char in range(fm.shape[1]):
                    temp1[word[char], :] = temp1[word[char], :]*fm[:, char]
                    temp2[p[char], :] = temp2[p[char], :]*fm[:, char]
                WB += (temp1 - temp2)
                vDict[fm.shape[1]][p] -= 0.1
                vDict[fm.shape[1]][word] += 0.1
        
        # If no more misclassifications      
        if flag:
            print("\nOptimum reached!")
            break
    
    return WB, vDict


# Classifier
def predict(fm, WB, vD):
    
    # Get sequences of that length
    SEQ = vD[fm.shape[1]]
    
    prediction = None
    maxP = -float('inf')
    for chars in SEQ:
        # Element-wise multiplication
        p = np.sum(WB[chars, :].T * fm + SEQ[chars])
        if p > maxP:
            # Better prediction
            prediction = chars
            maxP = p
    
    return prediction


# Testing Phase
def test(tst_X, WB, vD):
    
    # Add one column to each element of trn_X (for biases)
    X = [np.vstack([x, np.ones(x.shape[1])]) for x in tst_X]
    
    # Predict the label
    predictions = []
    for fm in X:
        predictions.append(predict(fm, WB, vD))
        
    return predictions
        
    
    

In [5]:
# load data

# load training examples
trn_X, trn_Y, trn_img = load_examples( 'ocr_names_images/trn' )

# load testing examples
tst_X, tst_Y, tst_img = load_examples( 'ocr_names_images/tst' )

In [6]:
print("Training")
print("...")
WB, vD = train(trn_X, trn_Y)
print()
print("Weights, biases and 'v' function learned!")
print()
print("Testing")
res = test(tst_X, WB, vD)
print("Predictions done")

Training
...
(8257, 4)
(8257, 4)
(8257, 6)
(8257, 5)
(8257, 5)
(8257, 5)
(8257, 5)
(8257, 4)
(8257, 3)
(8257, 3)
(8257, 2)
(8257, 6)
(8257, 4)
(8257, 5)
(8257, 5)
(8257, 2)
(8257, 5)
(8257, 3)
(8257, 4)
(8257, 3)
(8257, 3)
(8257, 4)
(8257, 3)
(8257, 5)
(8257, 6)
(8257, 2)
(8257, 2)
(8257, 2)
(8257, 5)
(8257, 5)
(8257, 6)
(8257, 5)
(8257, 4)
(8257, 2)
(8257, 2)
(8257, 5)
(8257, 5)
(8257, 4)
(8257, 5)
(8257, 2)
(8257, 2)
(8257, 5)
(8257, 6)
(8257, 4)
(8257, 2)
(8257, 6)
(8257, 5)
(8257, 4)
(8257, 5)
(8257, 5)
(8257, 3)
(8257, 2)
(8257, 2)
(8257, 4)
(8257, 5)
(8257, 5)
(8257, 5)
(8257, 4)
(8257, 5)
(8257, 5)
(8257, 5)
(8257, 4)
(8257, 4)
(8257, 4)
(8257, 5)
(8257, 5)
(8257, 5)
(8257, 5)
(8257, 5)
(8257, 6)
(8257, 5)
(8257, 4)
(8257, 6)
(8257, 2)
(8257, 5)
(8257, 4)
(8257, 4)
(8257, 4)
(8257, 5)
(8257, 4)
(8257, 5)
(8257, 3)
(8257, 2)
(8257, 4)
(8257, 4)
(8257, 4)
(8257, 3)
(8257, 4)
(8257, 4)
(8257, 2)
(8257, 5)
(8257, 2)
(8257, 4)
(8257, 4)
(8257, 5)
(8257, 2)
(8257, 2)
(8257, 4)
(8257, 

In [7]:
# Convert letters into numbers
Y = [np.array([letterDict.get(char) for char in word]) for word in tst_Y]

# Sequence Error
num = 0
for i in range(testing_size):
    if(tuple(res[i]) != tuple(Y[i])):
        num+=1
print("Sequence error: ", f"{num/500:.4f}")

# Charachter Error
num = 0
word = 0
for y in Y:
    y = list(y)
    for char in range(len(y)):
        if(y[char] != list(res[word])[char]):
            num+=1
    word+=1
print("Charachter error: ", f"{num/sum([len(l) for l in tst_Y]):.4f}")

Sequence error:  0.0320
Charachter error:  0.0217
