## Part 1: Data Preprocessing

In [1]:
from PIL import Image
import numpy as np
import cv2
import os
import copy
from scipy import ndimage
import math


def to_gray(all_image_path):
    
    list_gray = []
    
    for img_path in os.listdir(all_image_path): 
        if img_path[0] != '.' :
            if os.path.isfile(all_image_path+img_path):
                img = cv2.imread(all_image_path + img_path)
                # print(img_path)
                ## weighted average
                h, w = img.shape[:2]
                gray = np.zeros((h, w), dtype=np.uint8)
                for i in range(h):
                    for j in range(w):
                    # Y = 0．3R + 0．59G + 0．11B
                        gray[i, j] = 0.3 * img[i, j][2] + 0.11 * img[i, j][0] + 0.59 * img[i, j][1]


            ## linear gray-scale transformation
            gray2 = copy.deepcopy(gray)
            rows = gray.shape[0]
            cols = gray.shape[1]
            for i in range(rows):
                for j in range(cols):
                    gray2[i][j] = 1.02 * (gray[i][j]) + 8
            
            # filter the image use median to remove noise 
            gray3 = cv2.medianBlur(gray2, 5) 


            list_gray.append(gray3)
        
    return list_gray

def OTSU_enhance(img_gray, th_begin=0, th_end=256, th_step=1):
    max_g = 0
    suitable_th = 0
    for threshold in range(th_begin, th_end, th_step):
        # foreground, words 
        bin_img = img_gray > threshold
        # background
        bin_img_inv = img_gray <= threshold
        fore_pix = np.sum(bin_img)
        back_pix = np.sum(bin_img_inv)
        if 0 == fore_pix: # no numebrs
            break
        if 0 == back_pix:
            continue
        
        # the proportion of pixels in the foreground image
        w0 = float(fore_pix) / img_gray.size
        # the average of pixels in the foreground image
        u0 = float(np.sum(img_gray * bin_img)) / fore_pix
        # the proportion of pixels in the background image
        w1 = float(back_pix) / img_gray.size
        # the average of pixels in the background image
        u1 = float(np.sum(img_gray * bin_img_inv)) / back_pix
        # intra-class variance - objective
        g = w0 * w1 * (u0 - u1) * (u0 - u1) 
        if g > max_g:
            max_g = g
            suitable_th = threshold
    return suitable_th

def to_bin(listG): 
    # image binarization 
    list_gray = []
    for i, img in enumerate(listG):
        print(i)
        threshold = OTSU_enhance(img)
        ret, binary = cv2.threshold(img,threshold,255,cv2.THRESH_BINARY_INV)
        list_gray.append(binary)
   
    return list_gray

In [11]:
def rectify(img):

    edges = cv2.Canny(img, 50, 150, apertureSize=3)

    # Hough transformation
    lines = cv2.HoughLines(edges, 1, np.pi / 180, 0)
    for rho, theta in lines[0]:
        a = np.cos(theta)
        b = np.sin(theta)
        x0 = a * rho
        y0 = b * rho
        x1 = int(x0 + 1000 * (-b))
        y1 = int(y0 + 1000 * (a))
        x2 = int(x0 - 1000 * (-b))
        y2 = int(y0 - 1000 * (a))
    t = float(y2 - y1) / (x2 - x1)
    rotate_angle = math.degrees(math.atan(t))
    if rotate_angle > 45:
        rotate_angle = -90 + rotate_angle
    elif rotate_angle < -45:
        rotate_angle = 90 + rotate_angle
    rotate_img = ndimage.rotate(img, rotate_angle)
    return rotate_img


def get_section(hor_list, th):
    # Get the intervals in which pixels exist in the v_list
    vv_list = list()
    v_list = list() # save each interval
    
    for index, i in enumerate(hor_list):
        if i < th:
            v_list.append(index)
        else:
            if v_list:
                vv_list.append(v_list)
                v_list = []
    return vv_list

def hor_project(listG):
    # Project pixels from left to right, counting the total number of black pixels in each row
    #list_num = []
    
    for i, img in enumerate(listG):
        print(i)
        hor_list = []
        rows, cols = img.shape
        hor_list = [0] * rows
        
        # compute horizontal pixels
        for r in range(rows):
            for c in range(cols):
                # compute black pixels of each line
                if img.item(r,c) == 0:
                    hor_list[r] = hor_list[r] + 1
        
        # Reading area position
        th = np.percentile(hor_list, 20)
        vv_list = get_section(hor_list ,th) #according to experiment, th = 20% 
        num_in_list = get_num2(vv_list)
        num = img[vv_list[num_in_list][0]:vv_list[num_in_list][-1],:]
        #list_num.append(num)
        
        cv2.imwrite('..data/hor_num2/'+str(i)+'.jpg',num)
    #return list_num

def get_num(vv_list):
    # Find the area where the number is located by finding the list with the most list elements
    maxlen = -1
    maxi = -1
    for i, it in enumerate(vv_list):
        ilen = len(it)
        if ilen > maxlen:
            maxlen = ilen
            maxi = i
    return maxi

def get_num2(vv_list):
    vvl = len(vv_list)
    maxv = -1
    maxi = -1
#     print('vvl:',vvl)  
#     print(vv_list)
    for i in range(vvl):
        if i == vvl - 1:
            break
#         print('i:',i)
#         print(vv_list[i])
#         print(vv_list[i+1])
        if vv_list[i+1][0] - vv_list[i][-1] <= 10:
            vv_list[i].extend(vv_list[i+1])
            vv_list.pop(i+1)
            break
            
    for i, l in enumerate(vv_list):
        v = np.var(l)
        if v > maxv:
            maxv = v 
            maxi = i
    return maxi



In [9]:
hor_img_path = '../data/hor_num/'
def ver_project(list_num):
    
    for i, img in enumerate(list_num):
        flag = 0
        ## Test: edge detection based on Canny operator
        img2 = cv2.Canny(img, 50, 150, apertureSize=3)
        
        rows,cols = img2.shape
        ver_list = [0] * cols
        for c in range(cols):
            for r in range(rows):
                # compute the number of black pixels of each row
                if img2.item(r,c) != 0:
                    ver_list[c] = ver_list[c] + 1
        
        ## vertial projection
        vv_list2 = get_section2(ver_list)
        vlen = []
        for ii,it in enumerate(vv_list2):
            img_ver = img[:,it[0]:it[-1]]
#             print(str(i)+'_'+str(ii))
#             print(img_ver.shape)
            vlen.append(img_ver.shape[1])
      
        
        ## According to experiment, projection based on original pictures has better results than using edges
        if np.percentile(vlen, 75) < 10:
            flag = 1
            # Corrosion kernel 
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))  
            # dilate numbers
            p= cv2.dilate(img, kernel=kernel) 
            
            rows,cols = p.shape
            ver_list2 = [0] * cols
            for c in range(cols):
                for r in range(rows):
                    # count the number of black pixels
                    if p.item(r,c) != 0:
                        ver_list2[c] = ver_list2[c] + 1
            
            # vertical projection
            vv_list = get_section2(ver_list)
            vlen = []
            for ii,it in enumerate(vv_list):
                img_ver = p[:,it[0]:it[-1]]
#                 print('p'+str(i)+'_'+str(ii))
            cv2.imwrite('../data/ver_num2/'+str(i)+'_'+str(ii)+'.jpg',img_ver)

    
        if flag == 0:
            for ii,it in enumerate(vv_list2):
                img_ver = img[:,it[0]:it[-1]]
                cv2.imwrite('../data/ver_num2/'+str(i)+'_'+str(ii)+'.jpg',img_ver)
                
                
def get_section2(ver_list):
    # get each number
    vv_list = list()
    v_list = list()
    for index,i in enumerate(ver_list):
        if i > 0:
            v_list.append(index)
        else:
            if v_list:
                vv_list.append(v_list)
                v_list=[]
    return vv_list

In [82]:
def rename_data(read_path, out_path):
    num_path = os.listdir(read_path) #0/1/...9
    print(num_path)
    for i in range(1,11):
        print(i)
        img_path = os.listdir(read_path + num_path[i])
#         print(num_path[i])
        n = 0
        for per_img in img_path:
            img = cv2.imread(read_path + num_path[i] + '/' + per_img)
            cv2.imwrite(out_path + str(num_path[i]) + '_' + str(n) + '.jpg', img)
            n = n + 1       

## Part 2: Number Recognition

In [141]:
all_image_path = "../data/"
num_path = all_image_path + 'num/'
import random
import scipy.special as S
import matplotlib.pyplot as plt

# train/test split
list_num = os.listdir(num_path)
list_test = random.sample(list_num, int(len(list_num) * 0.2)) 
list_train = [num for num in list_num if num not in list_test]


input_nodes = 1500
hidden_nodes = 200
output_nodes = 10
learning_rate = 0.05

In [102]:
print(len(list_num))
print(len(list_train),len(list_test))

1215
972 243


In [103]:
class neuralNetwork:
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc 
        self.wih = np.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
        self.who = np.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))
        
        # learning rate
        self.lr = learningrate
        # activation function
        self.activation_function = lambda x: S.expit(x)
        
    
    def train(self, inputs_list, targets_list):
        inputs = np.array(inputs_list,ndmin=2).T
        targets = np.array(targets_list,ndmin=2).T


        hidden_inputs = np.dot(self.wih, inputs)
        hidden_outputs = self.activation_function(hidden_inputs)
        final_inputs = np.dot(self.who, hidden_outputs)
        final_outputs = self.activation_function(final_inputs)
        
        
        output_errors = targets - final_outputs
        hidden_errors = np.dot(self.who.T, output_errors)
        
     
        self.who += self.lr*np.dot((output_errors*final_outputs*(1.0 - final_outputs)), np.transpose(hidden_outputs))
        self.wih += self.lr*np.dot((hidden_errors*hidden_outputs*(1.0 - hidden_outputs)), np.transpose(inputs))
        

    def query(self, inputs_list):
        inputs = np.array(inputs_list, ndmin = 2).T
                
        hidden_inputs = np.dot(self.wih, inputs)
        hidden_outputs = self.activation_function(hidden_inputs)
        final_inputs = np.dot(self.who, hidden_outputs)
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs

In [131]:
n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

In [None]:
# Training
import time
start=time.time()
epochs = 500
for e in range(epochs):
    print(e)
    for per in list_train:
#         print(per)
        if per[0] != '.':
            img_raw = cv2.imread(num_path + per, cv2.IMREAD_GRAYSCALE)
            # shape: (width, height)
            img_n = np.asfarray(cv2.resize(img_raw,(30,50)))
            # scale to [0.01, 1.0]
            inputs = np.array((img_n / 255.0 * 0.99) + 0.01) 
            inputs = inputs.ravel()

            targets = np.zeros(output_nodes) + 0.01
            targets[int(per[0])] = 0.99

            n.train(inputs, targets)
end=time.time()
print('Running time: %s Seconds'%(end-start))

In [139]:
# test the neural network
scorecard = []
# go through all records in the test data set

for per in list_test:
    print(per)
    if per[0] != '.':
        img_raw = cv2.imread(num_path + per, cv2.IMREAD_GRAYSCALE)
        img_n = np.asfarray(cv2.resize(img_raw,(30,50))) 
        inputs = np.array((img_n / 255.0 * 0.99) + 0.01) 
        inputs = inputs.ravel()
        
        # query the network
        outputs = n.query(inputs)
        
        # the index of the highest value corresponds to the label
        label = np.argmax(outputs)
        correct_label = int(per[0])
        print("Answer label is:",correct_label," ; ",label," is network's answer")
        
        # append correct or incorrect to list
        if(label == correct_label):
            # network's answer matches correct answer, add 1 to scorecard
            scorecard.append(1)
            
#             if outputs[int(label)][0] > lb[int(label)]:
#                 lb[int(label)] =  outputs[int(label)][0]
#                 print(outputs)
                
        else:
            scorecard.append(0)        
        




9_11.jpg
Answer label is: 9  ;  3  is network's answer
8_42.jpg
Answer label is: 8  ;  0  is network's answer
6_55.jpg
Answer label is: 6  ;  6  is network's answer
0_288.jpg
Answer label is: 0  ;  0  is network's answer
8_67.jpg
Answer label is: 8  ;  6  is network's answer
3_3.jpg
Answer label is: 3  ;  3  is network's answer
5_35.jpg
Answer label is: 5  ;  5  is network's answer
2_88.jpg
Answer label is: 2  ;  2  is network's answer
2_117.jpg
Answer label is: 2  ;  2  is network's answer
7_44.jpg
Answer label is: 7  ;  7  is network's answer
1_66.jpg
Answer label is: 1  ;  5  is network's answer
8_36.jpg
Answer label is: 8  ;  8  is network's answer
0_78.jpg
Answer label is: 0  ;  0  is network's answer
0_35.jpg
Answer label is: 0  ;  0  is network's answer
2_45.jpg
Answer label is: 2  ;  2  is network's answer
2_120.jpg
Answer label is: 2  ;  2  is network's answer
9_52.jpg
Answer label is: 9  ;  0  is network's answer
0_293.jpg
Answer label is: 0  ;  0  is network's answer
0_297.j

Answer label is: 9  ;  9  is network's answer
4_94.jpg
Answer label is: 4  ;  4  is network's answer
0_295.jpg
Answer label is: 0  ;  0  is network's answer
5_17.jpg
Answer label is: 5  ;  5  is network's answer
0_18.jpg
Answer label is: 0  ;  0  is network's answer
0_218.jpg
Answer label is: 0  ;  0  is network's answer
0_214.jpg
Answer label is: 0  ;  0  is network's answer
0_303.jpg
Answer label is: 0  ;  0  is network's answer
4_65.jpg
Answer label is: 4  ;  4  is network's answer
0_21.jpg
Answer label is: 0  ;  0  is network's answer
0_277.jpg
Answer label is: 0  ;  0  is network's answer
6_79.jpg
Answer label is: 6  ;  6  is network's answer


In [140]:
#calculate the performance score ,the fraction of correct answers
scorecard_array = np.asarray(scorecard)
print("performance = ", scorecard_array.sum() / scorecard_array.size )

performance =  0.8806584362139918
