In [1]:
import os
import cv2
import time
import boto3
import shutil
import numpy as np

In [2]:
padding_factor = 1.5
source_folder = "./input-images"
destination_folder = "./labeled-images"
data_folder = "./dataset"
training_file = "{}/own_data_{}.csv".format(data_folder, str(padding_factor))

In [3]:
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)   

In [4]:
def extract_image(source_folder, input_file): 

    #Read scanned image file
    image = cv2.imread(source_folder+'/'+input_file)
    #Obtain rectangular regions containing digits
    im_th, roi = extract_roi(image)
    #Sort rectangular regions by lines of text
    sorted_rects = sort_roi(roi)
    input_sequences, labeled_image = serialize_pixels(sorted_rects, im_th, image)
    
    return {'data': input_sequences, 'handle': labeled_image} 

In [5]:
def extract_roi(image):
    
    # Convert to grayscale and apply Gaussian filtering
    im_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
    
    # Threshold the image
    ret, im_th = cv2.threshold(im_gray, 127, 255, cv2.THRESH_BINARY_INV)
    
    # Find contours in the image
    _, ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Get rectangles contains each contour
    rects = [cv2.boundingRect(ctr) for ctr in ctrs]
    
    #Return rectangles sorted by position on screen
    return im_th, sorted(rects, key=lambda rect: rect[1]) 

In [6]:
def sort_roi(rects):
    rect_lines = list()
    current_line=list()
    for rect in rects:
        if len(current_line) <= 0:
            current_line.append(rect)
        else:
            if rect[1] > current_line[len(current_line)-1][1] + current_line[len(current_line)-1][3]:
                rect_lines.append(current_line)
                current_line=list()
                current_line.append(rect)
            else:
                current_line.append(rect)
    rect_lines.append(current_line) 

    sorted_rects = list()
    for rect_line in rect_lines:
        sorted_line = sorted(rect_line, key=lambda rect: rect[0])
        for rect in sorted_line:
            sorted_rects.append(rect)
            
    return sorted_rects

In [7]:
def serialize_pixels(sorted_rects, im_th, image):
    
    index=0
    input_sequences=list()
    
    for rect in sorted_rects:

        rec_w = rect[2]
        rec_h = rect[3]
        prec_dim = rec_h
        if rec_w > rec_h:
            prec_dim = rec_w
            
        prec_dim = int(prec_dim * padding_factor)     
        prec_x = int(rect[0] + (rect[2] - prec_dim) // 2) 
        prec_y = int(rect[1] + (rect[3] - prec_dim) // 2) 
                
        roi = im_th[prec_y:prec_y+prec_dim, prec_x:prec_x+prec_dim]
        '''
        # Draw the rectangles
        cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 1) 

        # Make the rectangular region around the digit
        leng = int(rect[3] * padding_factor)
        pt1 = int(rect[1] + rect[3] // 2 - leng // 2) 
        pt2 = int(rect[0] + rect[2] // 2 - leng // 2)                           
        roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
        '''
        if len(roi) > 0 and np.shape(roi)[0] > 50 and np.shape(roi)[1] > 50:                

            roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
            roi = cv2.dilate(roi, (3, 3))     

            # Convert to a 1-D sequence of pixels, ready to feed to digit recognizer model
            pixel_input_seq=roi.ravel() 

            #Display the image and wait for human trainer input
            input_pixels = ''
            for pixel in enumerate(pixel_input_seq):
                if len(input_pixels) > 0:
                    input_pixels = input_pixels +','
                input_pixels = input_pixels + str(pixel[1])
            cv2.imshow("Resulting Image with Rectangular ROIs", roi)

            #Capture numeric key press and add as label
            keypressed = cv2.waitKey()
            if keypressed >= 48 and keypressed <= 57:
                cv2.rectangle(image, (prec_x, prec_y), (prec_x + prec_dim, prec_y + prec_dim), (0, 255, 0), 1) 
                cv2.putText(image, str(keypressed-48), (prec_x, prec_y),cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 255), 1)
                input_pixels = str(keypressed-48) + ',' + input_pixels
            elif keypressed == 81 or keypressed == 113:
                break
            else:
                input_pixels = 'x,' + input_pixels
            input_sequences.append(input_pixels)
            index = index + 1 
    #cv2.waitKey(2000)  
    cv2.destroyAllWindows()
    return input_sequences, image

In [8]:
files=[]
if os.path.exists(source_folder):
    for (dirpath, dirnames, filenames) in os.walk(source_folder):
        files.extend(filenames)
        break

In [9]:
for input_file in files:    
    processed_output = extract_image(source_folder, input_file)
    cv2.imwrite( destination_folder + "/" + "_processed.".join(input_file.rsplit(".", 1)), processed_output['handle'])
    if not os.path.exists(training_file):
        training_file_handle = open(training_file,  "w+")
    else:
        training_file_handle = open(training_file,  "a+")
    for input_pixel in processed_output['data']:
        if len(input_pixel) > 0 and input_pixel[0] != 'x':
            training_file_handle.write(input_pixel + "\n")
    training_file_handle.close() 
    shutil.copy2(source_folder+"/"+input_file, destination_folder+"/"+input_file)
    os.remove(source_folder+"/"+input_file)