In [1]:
#GUI
from tkinter import *
from tkinter.filedialog import askopenfilename
import tkinter.messagebox
import PIL
import os
from PIL import ImageTk, Image

#Model
import cv2
import numpy as np
import matplotlib.pyplot as plt
import skimage.filters as filters
import torch
import torch.nn as nn
import torch.nn.functional as F
import os

device = torch.device('cpu')

In [2]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layer1=nn.Sequential(
            nn.Conv2d(45, 32, kernel_size=5, stride=1, padding=3),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size = 2, stride=2)
        )
        self.layer2=nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=3),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3=nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=3),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.drop_out = nn.Dropout()
        self.ffnn1 = nn.Linear(128*7, 500)
        self.ffnn2 = nn.Linear(500, 250)
        self.ffnn3 = nn.Linear(250, 24)
        
    def forward(self, x):
        output = self.layer1(x)
        output = self.layer2(output)
        output = self.layer3(output)
        output = output.reshape(output.size(0), -1)
        output = self.drop_out(output)
        output = self.ffnn1(output)
        output = self.ffnn2(output)
        output = self.ffnn3(output)
        return F.log_softmax(output)
    
net = Model().to(device)
print(net)   
def imshow(image, nrows=1, ncols=1, cmap='gray'):
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(8, 8))
    ax.imshow(image, cmap=cmap)
    return fig, ax

def define_kernel(kernel_size, std, size):
    half_size = kernel_size // 2
    kernel = np.zeros([kernel_size, kernel_size])
    std_X = std
    std_Y = std * size
    for i in range(kernel_size):
        for j in range(kernel_size):
            x = i-half_size
            y = i-half_size
            
            exponent=np.exp(-x**2/(std_X*2)-y**2/(std_Y*2))
            x_ = (x**2-std_X**2)/(2*np.pi*std_X**5*std_Y)
            y_ = (y**2-std_Y**2)/(2*np.pi*std_Y**5*std_X)
            kernel[i, j] = (x_+y_)/exponent
    return kernel/np.sum(kernel)

def get_category(x):
    labels = np.load('Labels_her.npy')
    return labels[x]

def predict_value(ip, model):
    model.eval()
    with torch.no_grad():
        output = model(ip)
        _, predicted = torch.max(output.data, 1)
        return get_category(predicted)

def recognize_image(image_read):
    net = Model()
    net.load_state_dict(torch.load('her_model', map_location='cpu'))
    print("path=",image_read)
    image = cv2.resize(cv2.imread(image_read, cv2.IMREAD_GRAYSCALE), (400, 224))
    threshold = filters.threshold_local(image, block_size=195, offset=30)
    img = (image > threshold).astype(np.uint8)*255.
    kernel = define_kernel(5, 11, 7)
    img_filtered = cv2.filter2D(img, -1, kernel, borderType = cv2.BORDER_REPLICATE).astype(np.uint8)
    _, img_threshold = cv2.threshold(img_filtered, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    img_threshold = 255-img_threshold
    _, components, _=cv2.findContours(img_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    res = []
    for c in components:
        if cv2.contourArea(c) < 100:
            continue
        current_box = cv2.boundingRect(c)
        x, y, w, h = current_box
        seg_image = img[y:y+h, x:x+w]
        res.append((current_box, seg_image))
    sort = sorted(res, key=lambda entry:entry[0][0])
    predicted = []
    for j, w in enumerate(sort):
        word_box, _ = w
        x, y, w, h = word_box
        temp_img = img[y:y+h, x:x+w]/255.
        temp_img = cv2.resize(temp_img, (35, 35))
        temp_img = cv2.copyMakeBorder(temp_img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value= [1.])
        temp_img = torch.from_numpy(temp_img).float()
        temp_img = temp_img.view(-1, 45, 45, 1)
        predicted.append(predict_value(temp_img, net))
        
    return predicted

Model(
  (layer1): Sequential(
    (0): Conv2d(45, 32, kernel_size=(5, 5), stride=(1, 1), padding=(3, 3))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(3, 3))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(3, 3))
    (1): LeakyReLU(negative_slope=0.01)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (drop_out): Dropout(p=0.5)
  (ffnn1): Linear(in_features=896, out_features=500, bias=True)
  (ffnn2): Linear(in_features=500, out_features=250, bias=True)
  (ffnn3): Linear(in_features=250, out_features=24, bias=True)
)


In [3]:
win = Tk()
win.configure(background = "WHITE")
win.title("Handwritten Equation Recognizer")
win.geometry('800x600')

path = os.path.join(os.getcwd(), "her_banner.png")

def show_image(lbl, lbl2):
    image_file = askopenfilename(initialdir = os.getcwd(),
                            filetypes = (("Image File", "*.png *jpg"),
                                        ("Others", "*.*")
                                        ),
                            title = "Select an image"
                           )
    
    image = ImageTk.PhotoImage(Image.open(image_file).resize((400, 224), Image.ANTIALIAS))
    lbl.config(image = image)
    lbl.image = image
    lbl2.config(text = recognize_image(image_file))
    
    

frame = Frame(win, background = "WHITE")
frame2 = Frame(win, background = "WHITE")
label_inputtext = Label(frame, text = "Opened Image : ", font = ("Google Sans", 12),background = "WHITE")
label_outputtext = Label(frame, text = "Recognized Characters : ", font = ("Google Sans", 12), background = "WHITE")
label_pred = Label(frame2, text = " ", font = ("Google Sans", 20), background = "WHITE")
try:
    image = ImageTk.PhotoImage(Image.open(path).resize((400, 224), Image.ANTIALIAS))
    label = Label(frame, image=image)
    label_inputtext.grid(row = 0, column = 0, rowspan = 2, columnspan = 2, sticky = W)
    label_outputtext.grid(row = 6, column = 0, rowspan = 2, columnspan=2, sticky = W)
    label_pred.grid(row = 10, column = 0, sticky=S)
        
    label.grid(row = 3, column = 3)
    frame.grid()
    frame2.grid()
except:
    lambda: exit()
    
menu = Menu(win)
win.config(menu=menu)
file = Menu(menu, tearoff = False)
help_ = Menu(menu, tearoff = False)

file.add_command(label = "Load an image", command = lambda: show_image(label, label_pred))
file.add_command(label = "Exit Application", command = lambda: exit())

help_.add_command(label = "About Application", command = lambda: tkinter.messagebox.showinfo("About", "Handwritten Equation Recognizer Version 0.6-Alpha (2019)"))

menu.add_cascade(label="File", menu=file)
menu.add_cascade(label="Help", menu=help_)

win.mainloop()