In [62]:
###Import required packages###
# Make sure you have all the packages listed below
import sys
sys.path.append("/Users/anant/caffe/python/") #path to caffe for python
sys.path.append("/usr/local/lib/python2.7/site-packages/") #path to OpenCV
import numpy as np
import matplotlib.pyplot as plt
import caffe
import cv2
import os
from PIL import ImageFont, ImageDraw
from PIL import Image
import random

In [None]:
### Generating Data  -  using Fonts ###
#Execute this block if you want to generate character images using fonts
#if you are using The Chars 74k dataset or any other data set then you don't need to run this.

#Font list taken from : http://web.mit.edu/jmorzins/www/fonts.html
#Only the uncommented fonts are available in OSX by default 
font_list = [
   "Arial",
    #"Helvetica",
    #"Times",
    "Times New Roman",
    "Courier New",
    #"Courier",
    #"Palatino",
    #"Garamond",
    #"Bookman",
    #"Avant Grade",
    "Verdana",
    "Georgia",
    "Comic Sans MS",
    "Arial Black",
    "Impact",
    "Trebuchet MS",
    "Arial Narrow"
]
data_gen_folder = "data_gen_new/" #name of the folder in which the data will be generated
if(not os.path.exists(data_gen_folder)):
    os.mkdir(data_gen_folder)

#Loop over each character to be generated
#current loop is from A to Z, we can change it to 0 to 9 and a-z for generating these ranges
#This needs to be done just once in the life cycle of all the experiments we perform, so just change the range for generating other characters
for c in range( ord('A'), ord('Z')+1 ):
    image_file_path = data_gen_folder+"Sample"+str(c)
    if(not os.path.exists(image_file_path)):
        os.mkdir(image_file_path)
    #loop over all fonts
    for f in font_list:
        char = chr(c)
        font_size = 128
        font = ImageFont.truetype("/Library/Fonts/"+f+".ttf",font_size)            
        img = Image.new("RGB", (128,128),(255,255,255))
        draw = ImageDraw.Draw(img)
        size = draw.textsize(char, font=font)
        while( size[0] >= 128 or size[1] >= 128 ):
            font_size -= 1
            font = ImageFont.truetype("/Library/Fonts/"+f+".ttf", font_size)            
            size = draw.textsize(char, font=font)
        left_margin = (128-size[0])/2
        draw.text((left_margin, 0), char ,(0,0,0),font=font)

        draw = ImageDraw.Draw(img)
        img.save( image_file_path+"/img"+f+".png")

In [None]:
### Generating TrainingIndex.txt and TestIndex.Txt ###
# These files are used by caffe model for training and testing purposes
# They esentially contain the image file address and the image class label 
# This code block assumes that the data was generated using the above data generation code, 
# If that's not the case the code can be easily modified to use any other data source
# assumes that the image data folder has subfolders for each class named as "Sample<label>"
# each subfolder "Sample<label>" has the images corresponding to the class <label>

folder = "data_gen_new/" #the folder containing the images data
all_folders = os.listdir(folder)
all_folders = [x for x in all_folders if x.startswith("Sample")] 

train_list = []
cv_list = []
for f in all_folders:
    label = int(f.replace("Sample", ""))
    if( folder == "data_gen_new/"):
        #map Ascii labels to [0,61]
        if( 48 <=label and label <= 57 ):
            label -= 48
        if( 65<= label and label <= 90 ):
            label += -65 + 10
        if( 97<= label and label <= 122 ):
            label += -97 + 36
    else:
        label -=1
    all_files = os.listdir( folder+f )
    all_files = [ folder+f+"/"+x+" "+str(label) for x in all_files if x.startswith("img")] #assumes image files starts with img
    number_of_files = len(all_files)/10
    all_files = all_files[:number_of_files]
    cv_index = int(0.9*number_of_files) #split the data in 90-10 fashion, 90% for training and 10% for validation
    cv_list += all_files[cv_index:]
    train_list += all_files[:cv_index]

#shuffle the training data
random.shuffle(cv_list)
random.shuffle(train_list)

#generate files
train_file_handle = open(folder+"training_index.txt", "w")
cv_file_handle = open(folder+"test_index.txt", 'w')
train_file_handle.write("\n".join(train_list))
cv_file_handle.write("\n".join(cv_list))
train_file_handle.close()
cv_file_handle.close()

In [63]:
## Fetch the caffe trained Model
# Replace the path of the model below with your model's path
# Replace the model configuration's name as well

base_folder = ""
net = caffe.Net(base_folder+"alpha_deploy.prototxt","data/with_shuffle/25_alpha_iter_1200.caffemodel",caffe.TEST)

In [None]:
## Convert The Chars 74k dataset's integer label to corresponding character.
def convert_74k_label(x):
    if(0<=x and x<10):
        return chr(x+48)
    if(10<=x and x<36):
        return chr(x+65-10)
    if(36<=x and x<62):
        return chr(x+97-36)

In [77]:
## DEMO : 0 to 9, A to Z and a to z
# We take one image per class and try to classify it using the model trained by us using Caffe
# The images are taken from The Chars 74k dataset

base_folder = "temp/0-9A-Za-z/"
all_files = os.listdir(base_folder)
all_files = [x for x in all_files if x.startswith("img")]

correct_label = 0

MRR = 0.0 #Mean Reciprocal Rank
correct_label = 0
guess = []
guess.append(0)
guess.append(0)
guess.append(0)
#print "correct label\tfirst guess\tsecond guess\tthird guess"
for img_name in all_files:
    img = caffe.io.load_image(base_folder+img_name, False) #Load the image  
    img = img.transpose((2,0,1)) # Take care of the RGB convention
    img = img[None,:]  #Adds a new empty axis, as we have a batch size of 1 i.e. we are classifying 1 image at a time 
    out = net.forward_all(data = img) #Pass the image to the model
    prob = net.blobs['prob'].data[0] 
    index = np.argmax(prob)
    sorted_index = np.argsort(prob) #Sort the predictions according to the probabilities
    if sorted_index[-1] == correct_label: #Class with highest probability
        MRR += 1
        guess[0] += 1
        string = "first guess"
    elif sorted_index[-2] == correct_label : #Class with second highest probability
        MRR += (1/2)
        guess[1] += 1
        string = "second guess"
    elif sorted_index[-3] == correct_label : #Class with third highest probability
        MRR += (1/3)
        guess[2] += 1
        string = "third guess"
    else : 
        string = "\tincorrect"
    
    print convert_74k_label(correct_label),":\t",convert_74k_label(sorted_index[-1]),"\t",convert_74k_label(sorted_index[-2]),"\t", convert_74k_label(sorted_index[-3]),"\t",string
    correct_label += 1
print "Mean Reciprocal Rank:", MRR/62

0 :	0 	C 	O 	first guess
1 :	l 	1 	t 	second guess
2 :	2 	l 	7 	first guess
3 :	3 	Z 	7 	first guess
4 :	4 	A 	k 	first guess
5 :	5 	h 	6 	first guess
6 :	6 	C 	G 	first guess
7 :	7 	l 	J 	first guess
8 :	8 	Z 	3 	first guess
9 :	3 	9 	0 	second guess
A :	A 	t 	h 	first guess
B :	8 	3 	Z 		incorrect
C :	C 	r 	L 	first guess
D :	0 	O 	D 	third guess
E :	E 	L 	C 	first guess
F :	f 	F 	y 	second guess
G :	G 	C 	0 	first guess
H :	h 	H 	R 	second guess
I :	l 	1 	t 		incorrect
J :	d 	J 	4 	second guess
K :	K 	k 	t 	first guess
L :	L 	t 	r 	first guess
M :	M 	y 	V 	first guess
N :	N 	h 	R 	first guess
O :	0 	U 	O 	third guess
P :	p 	7 	P 	third guess
Q :	Q 	R 	G 	first guess
R :	R 	9 	7 	first guess
S :	C 	S 	G 	second guess
T :	l 	T 	I 	second guess
U :	U 	d 	h 	first guess
V :	V 	y 	M 	first guess
W :	W 	h 	N 	first guess
X :	x 	X 	1 	second guess
Y :	y 	1 	l 		incorrect
Z :	l 	I 	Z 	third guess
a :	a 	3 	B 	first guess
b :	b 	h 	A 	first guess
c :	c 	C 	r 	first guess
d :	d 	4 	L 	first g

In [None]:
## Test the model
# The caffe trained model has been already loaded in one of the code blocks above.
# This code block loads your test file, and gets the prediction using the model
# The prediction is matched with the known label

#path to test file:
test_file = open("data_gen_new/test_index.txt")

count = 0.0
total_count = 0.0
incorrect = 0
incorrect_class = dict()

for line in test_file:
    line_split = line.split(" ")
    img_name = " ".join(line_split[:-1])
    try:
        img_label = int(line_split[-1])
    except:
        print img_label+"broke"
    img = caffe.io.load_image(img_name,False)
    img = img.transpose((2,0,1))
    img = img[None,:]
    out = net.forward_all(data = img)
    prob = net.blobs['prob'].data[0]
    index = np.argmax(prob)
    print "input: ",convert_74k_label(img_label),"class: ",convert_74k_label(index)
    if(index == img_label ):
        count += 1
    else :
        print "incorrect classification", convert_74k_label(img_label), " classified as ", convert_74k_label(index)
        incorrect += 1
        incorrect_class[convert_74k_label(img_label)] = convert_74k_label(index)
    total_count +=1
test_file.close()
accuracy = count*100/total_count
print "accuracy = ",accuracy, "%"


In [None]:
## Classify images generated after Segmentation task
# The function is very similar to above, just that we are loading the images which are generated on running segmentation 
# and printing the top 3 predicted labels

base_folder = "temp/segmentation_pdfimage/" #contains the images generated as a result of segmentation.
all_files = os.listdir(base_folder)
all_files = [x for x in all_files if x.startswith("word")]

for img_name in all_files:
    img = caffe.io.load_image(base_folder+img_name, False)
    img = img.transpose((2,0,1))
    img = img[None,:]
    out = net.forward_all(data = img)
    prob = net.blobs['prob'].data[0]
    index = np.argmax(prob)
    sorted_index = np.argsort(prob)
    print convert_74k_label(sorted_index[-1]),"\t",convert_74k_label(sorted_index[-2]),"\t",convert_74k_label(sorted_index[-3])
    