# Handwritten Data Recognition 

#### This script is a small demo demonstrating how CNN can be used for processing Handwritten Text in images. The script will demonstrate how to extract the text of an image with handwritten text by me. Unfortunately due to time constraints it will only be able to print the text of one image to the console. Of course the image can be changed and the application can be tested with another one. The script is split into 5 different sections as follows:
    1. Preparation of the train/test datasets and images - where the Handwritten A-Z CSV Dataset will be transformed to images and the images of the letters will be split to train and test datasets.
    2. Definition of the CNN model.
    3. Training of the model - where the model will be trained using the train and test letters. Since it could take a lot of time (~3hours), there is an option to load and use an already trained model. 
    4. Preparation of the handwritten image - this includes transformations and processes such as tresholding the image, line detections and segmentations of the different characters. So that the whole string can be split up to different characters.
    5. The output - through the use of the model, the different characters will be recognized and the whole sentence of the image will be printed to the console.

#### Installations

In [1]:
# For output cleaning
from IPython.display import clear_output

# Remove the comments in order to perform the necessary installations!

#!pip install pandas
#!pip install ipynb
#!pip install numpy
#!pip install torch
#!pip install pillow
#!pip install matplotlib
#!pip install torchvision
#!pip install opencv-python

# Clear the output
clear_output()

#### Imports

In [2]:
# Import the necessary libraries.

import cv2
import csv
import torch
import ipynb
import string
import numpy as np
import os, os.path
import pandas as pd
from PIL import Image
from torch import nn, optim
import matplotlib.pyplot as plt
from torchvision import datasets, transforms

# Import the needed functions from functions.ipynb.
from ipynb.fs.full.functions import ref_arr, w_letter, l_end_arr, ref_end_word, l_arr, segmentation

# Adjustments to matplot.
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Clear the output
clear_output()

NameError: name 'clear_output' is not defined

### 1. Preparation of the train/test datasets and images.

#### A_Z-Handwritten Dataset will be transformed from CSV  to images.
The dataset will be transformed from csv to images
and they will be saved to the train/test folders.
NOTE - This process takes a bit of time, therefore the already transformed images will be added. Meaning that the following code can be skipped. Otherwise the images from each folder must be deleted and the code must be run.

In [3]:
# Path to csv dataset (change to your location).
csvpath = 'D:\A_Z-Handwritten-Data.csv'

# Open the csv file.
with open(csvpath, newline='') as file:
    # Init variables.
    num = 0
    prev_digit =  None
    letters_list = list(string.ascii_uppercase)
    data = csv.reader(file, delimiter=',', quotechar='|')
    
    # Loop through each row.
    for r in data:
        # Get and remove the first digit, then make the row an array, then an image.
        digit = r.pop(0)
        img_arr = (np.asarray(r)).reshape(28, 28)
        img = Image.fromarray(img_arr.astype('uint8'))
        
        # Check if a new letter has started to be processed, if so reset the counter.
        if str(letters_list[(int)(digit)]) != prev_digit:
            prev_digit = str(letters_list[(int)(digit)])
            num = 0
        
        # Randomly split the images to the training and test folders.
        if np.random.randint(6) != 0:
          path = 'D:/Letters/train/{0}/{1}_{2}.png'.format(prev_digit, str(prev_digit), str(num))
        else :
          path = 'D:/Letters/test/{0}/{1}_{2}.png'.format(prev_digit, str(prev_digit), str(num))
        
        # Increment counter and save the image to the correct directory. 
        num+=1
        img.save(path)

KeyboardInterrupt: 

#### Necessary transformations will be applied to the letter images through torch.

In [4]:
# Create a transform which will be applied to all images.
transform = transforms.Compose([
    # Crop images at center.
    transforms.CenterCrop(21),
    # Resize images to 28px.
    transforms.Resize(28), 
    # Convert image to floattensor shape.
    transforms.ToTensor(),
    # Normalize with mean and standard deviation.
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

# Apply transforms to both training and test images.
train = datasets.ImageFolder('D:/Letters/train', transform=transform)
test = datasets.ImageFolder('D:/Letters/test', transform=transform)

# Init data loaders.
train_loader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=32, shuffle=True)

### 2. Defining a Convolutional Neural Network Model.

In [4]:
# Defining the CNN model.
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Linear(2352,784)
    self.layer2 = nn.Linear(784,256)
    self.layer3 = nn.Linear(256,256)
    self.layer4 = nn.Linear(256,128)
    self.layer5 = nn.Linear(128,64)
    self.layer6 = nn.Linear(64,26)
    self.relu = nn.ReLU()
    self.ls = nn.LogSoftmax( dim=1)
  def forward(self, img):
        flattened = img.view(img.shape[0], -1) 
        activation1 = self.relu(self.layer1(flattened))
        activation2 = self.relu(self.layer2(activation1))
        activation3 = self.relu(self.layer3(activation2))
        activation4 = self.relu(self.layer4(activation3))
        activation5 = self.relu(self.layer5(activation4))
        output = self.ls(self.layer6(activation5))
        return output

In [5]:
# Assign the classifier.
model = Model()
# Using the negative log likelihood loss. 
criterion = nn.NLLLoss()
# Define stochastic gradient descen with learning rate and parameters. 
optimizer = optim.SGD(model.parameters(), lr = 0.005)

### 3. Training the model.

NOTE - This process could take around 3 hours due to the large amount of images. For example, 17:15 started, 19:06 first epoch completed, 19:30 second epoch completed, 20:02 third epoch completed.
NOTE-2 - This process may be skipped and you can load the already created, trained model "my_model.pth" to save time. The code for loading the model is bellow.

In [7]:
# Loop three times over the dataset for better accuracy. 
for epoch in range(3):
    running_loss = 0
    for img, lbl in train_loader:
        # Make parameter gradients zero. 
        optimizer.zero_grad()
        outputs = model(img)
        loss = criterion(outputs, lbl)
        # Computer gradient of loss. 
        loss.backward()
        # Iter over all param (tensors) and update their values.
        optimizer.step()
        running_loss += loss.item()
    #print(epoch) for checking progress

0
1
2


#### Load the model.

In [6]:
# Load the model.
model = Model()
model.load_state_dict(torch.load("./my_model.pth"))  

<All keys matched successfully>

### 4. Preparation of the images.

#### Images Thresholding

In [13]:
# Read image, make a copy and get width/height.
image = cv2.imread('D:/test-img.jpg', 1)
image_copy = image.copy()
height = image.shape[0]
width = image.shape[1]

# Resize image.
rsz = int(1320 * height / width)
image = cv2.resize(image_copy, dsize =(1320, rsz), interpolation = cv2.INTER_AREA)

# Reasigning width and height.
height = image.shape[0]
width = image.shape[1]

# Make image gray.
image_grey = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Apply adaptive threshold and make 2 copies.
image_thr = cv2.adaptiveThreshold(image_grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 21, 20)
image_cp1 = image_thr.copy()
image_cp2 = image_thr.copy()

# Get the kernel as morph shape.
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
k1 = np.array([[1,0,1],[0,1,0],[1,0,1]], dtype = np.uint8)

# Remove noise from the image.
final = cv2.morphologyEx(image_thr, cv2.MORPH_CLOSE, k)
final_copy = final.copy()

#### Line Detection

In [14]:
x_count = np.zeros(shape= (height))
for h in range(height):
    for w in range(width):
        if image_thr[h][w] == 255 :
            x_count[h] += 1


# Call line array function to get the higher and lower lines.
higher, lower = l_arr(x_count)

# Call the refine array function to refine the higher and lower lines.
lines_higher, lines_lower = ref_arr(higher, lower)

len_lhigher = len(lines_higher)
len_llower = len(lines_lower)
all_lines = []

# Fill all lines array.
if len_lhigher == len_llower:
    for i in lines_higher:
        final[i][:] = 255
    for i in lines_lower:
        final[i][:] = 255
    for i in range(len_lhigher):
        all_lines.append((lines_higher[i], lines_lower[i]))
else:
    print("Error! Try with another image!")
    
all_lines = np.array(all_lines)
lines_num = len(all_lines)
lines_image = []

# Set the image lines.
for i in range(lines_num):
    lines_image.append(image_cp2[all_lines[i][0]:all_lines[i][1], :])
    
# Letter width detection.
cont, hier = cv2.findContours(final_copy,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
final_copy = np.zeros((final.shape[0], final.shape[1],3), dtype = np.uint8)

# Draw the contours.
cv2.drawContours(image, cont, -1, (0,255,0), 1)

# Set mean width of each letter.
mean_w = w_letter(cont)

# Function to detect new lines.
def nl_detect(all_lines, index, image_thr, mean_w):
    # Arr of zeros.
    count = np.zeros(shape = width)
    for i in range(width):
        for j in range(all_lines[index][0],all_lines[index][1]):
            if image_thr[j][i] == 255:
                # Increment.
                count[i] = count[i] + 1
    # Call function to get array of end lines.
    endlines = l_end_arr(count, int(mean_w))
    # Call function to refine the end lines. 
    end_lines = ref_end_word(endlines)
    for i in end_lines:
        final[all_lines[index][0]:all_lines[index][1], i] = 255
    return end_lines

lines_new = []
r1 = len(lines_image)

# Detect the new lines.
for i in range(r1):
    lines_new.append(nl_detect(all_lines, i, image_thr, mean_w))

# Get the number of new lines.
r2 = len(lines_new)

for i in range(r2):
    lines_new[i].append(width)

####  Segmentations

In [15]:
range1 = len(all_lines)

# Call the segmentation for each line of the image. 
for i in range(range1):
    segmentation(lines_image, lines_new, i)
    
# Segmentation of chars.
char_image = image_cp1.copy()

# Find the contours.
cont, hier = cv2.findContours(char_image,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for c in cont:
    if cv2.contourArea(c) >= 21:
        x, y, width, height = cv2.boundingRect(c)
        cv2.rectangle(image,(x, y),(x + width, y + height),(0, 255, 0), 2)

#### Get files and paths of the segmented image

In [16]:
# Get file names in the folder and their number.
path, dirs, files = next(os.walk("D:/segmented_img/img1"))

### 5. Printing the handwritten sentence  

In [17]:
# Create transform with normalization.
transform = transforms.Compose([transforms.ToTensor(),
  transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

data = datasets.ImageFolder('D:/segmented_img/', transform=transform)
# Create loader with length of character files. 
l = torch.utils.data.DataLoader(data, batch_size=len(files))
images, lbls = next(iter(l))
img = images[2].view(1, 2352)

# Init variables.
sentence = ''
c = 1
word_num = []
line_num =[]

# Add the indexex of lines and words.
for f in files:
  word_num.append(f[2])
  line_num.append(f[0])

# Add -2 to check for end of word/line.
word_num.append(-2)
line_num.append(-2)

# Loop through all images.
for i in images:
  img = i.view(1, 2352)
  # Model outputs log probabilities.
  with torch.no_grad():
      out = model(img)
  ps = torch.exp(out)
  amax = ps.argmax(1)
  # Combine the new letter to the sentence. 
  sentence = sentence + str(chr(amax.item()+65))
  # Add space if needed.
  if word_num[c] != word_num[c-1]:
    sentence = sentence + " "
  # Add new line if needed.
  if line_num[c] != line_num[c-1]:
    sentece = sentence + "\n"
  # Increment counter.
  c = c + 1

# Print the handwritten sentence. 
print(sentece)

HEY PIDAJ 



#### Save the model

In [15]:
# Save the model.
torch.save(model.state_dict(), 'new_model.pth')