# Functions File

#### This script contains all of the functions that are used in the "Handwritten Data Recognition" file.

#### Installations

In [None]:
# For output cleaning
from IPython.display import clear_output

# Remove the comments in order to perform the necessary installations!

#!pip install pandas
#!pip install numpy
#!pip install torch
#!pip install matplotlib
#!pip install opencv-python

# Clear the output
clear_output()

#### Imports

In [1]:
# Import the necessary libraries.

import os
import cv2 
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions()

# Clear the output
clear_output()

#### Functions 

In [12]:
# End line function.
def l_end(n, arr):
    previous_count = 0
    following_count = 0  
        
    for i in arr[n : n + 10]:
        if i == 0:
            following_count = following_count + 1  
            
    for i in arr[n - 10 : n]:
        if i >= 4:
            previous_count = previous_count + 1  
            
    return following_count, previous_count

# Line array function.
def l_arr(arr):
    higher_list = []
    lower_list = []
    m1 = len(arr) - 5
    
    for n in range(5, m1):
        start_following, start_previous = l_start(n, arr)
        end_following, end_previous = l_end(n, arr)
        
        if start_following > 6 and start_previous > 4:
            higher_list.append(n)
            
        if end_following > 4 and end_previous > 6:
            lower_list.append(n)
            
    return higher_list, lower_list

# Start line function.
def l_start(n, arr):
    previous_count = 0
    following_count = 0
    
    for i in arr[n : n + 107]:
        if i >= 4:
            following_count = following_count + 1  
            
    for i in arr[n - 10 : n]:
        if i == 0:
            previous_count = previous_count + 1  
            
    return following_count, previous_count

# End line array function.
def l_end_arr(arr, a):
    end_lines = []
    
    for n in range(len(arr)):
        end_previous, end_following = l_end_word(n, arr, a)
        
        if end_following >= int(1.5 * a) and end_previous >= int(0.7 * a):
            end_lines.append(n)
            
    return end_lines

# End line word function.
def l_end_word(n, arr, a):
    previous_count = 0
    following_count = 0

    for i in arr[n : n + 2 * a]:
        if i <= 1:
            following_count = following_count + 1  
            
    for i in arr[n - a : n]:
        if i >= 3:
            previous_count = previous_count + 1  
            
    return previous_count ,following_count

# Letter width function.
def w_letter(cnt):
    sum_width = 0
    num = 0
    
    for c in cnt:
        if cv2.contourArea(c) >= 21:
            x, y, width, height = cv2.boundingRect(c)
            sum_width = sum_width + width
            num = num + 1

    return sum_width / num

# Function to refine the array.
def ref_arr(arr_higher, arr_lower):
    l_higher = []
    l_lower = []
    r1 = len(arr_higher)-1
    r2 = len(arr_lower)-1
    
    for i in range(r1):
        if arr_higher[i + 1] > arr_higher[i] + 5:
            l_higher.append(arr_higher[i] - 10)
            
    for i in range(r2):
        if arr_lower[i + 1] > arr_lower[i] + 5:
            l_lower.append(arr_lower[i] + 10)

    l_higher.append(arr_higher[-1]-10)
    l_lower.append(arr_lower[-1]+10)
    
    return l_higher, l_lower

# Function to refine end word.
def ref_end_word(arr):
    refined = []
    r1 = len(arr)-1
    
    for i in range(r1):
        if arr[i+1] > arr[i]+1:
            refined.append(arr[i])
            
    refined.append(arr[-1])
    
    return refined

# Letter segmentation function.
def segmentation(image_lines, lines, index):
    # Init variables
    lines_copy = lines[index].copy()
    image_copy = image_lines[index].copy()
    word_count = 1
    let_rect = []
    let_image = []
    let_index = 0
    
    # Get contours and hierarchy.
    cont, hierarchy = cv2.findContours(image_copy,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)	
    for c in cont:
        if cv2.contourArea(c) >= 51:
            # Highlight the area of interest and append to list.
            x, y, width, height = cv2.boundingRect(c)
            let_rect.append((x, y, width, height))
            
    # Sort the list in ascending order.
    let = sorted(let_rect, key=lambda s: s[0])
    k = np.ones((2,2),np.uint8)
    
    # Loop for each leltter.
    for y in range(len(let)):
        if(let[y][0] < lines_copy[0]):
            # Increment letter index.
            let_index = let_index + 1
            temp = image_lines[index][let[y][1]-5:let[y][1]+let[y][3]+5,let[y][0]-5:let[y][0]+let[y][2]+5]
            # Resize to 28px.
            let_image = cv2.resize(temp, dsize =(28, 28), interpolation = cv2.INTER_AREA)
            # Invert array bits.
            let_image = cv2.bitwise_not(let_image)
            # Dilation.
            let_image = cv2.dilate(255 - let_image, k, iterations = 1)
            # Save image to folder.
            # Path can be changed accordingly.
            path = 'D:/segmented_img/img1/{0}-{1}-{2}.jpg'.format(str(index + 1), str(word_count), str(let_index))
            cv2.imwrite(path, let_image)
        else:
            let_index = 1
            word_count = word_count + 1
            lines_copy.pop(0)
            temp = image_lines[index][let[y][1]-5:let[y][1]+let[y][3]+5,let[y][0]-5:let[y][0]+let[y][2]+5]
            # Resize to 28px.
            let_image = cv2.resize(temp, dsize =(28, 28), interpolation = cv2.INTER_AREA)
            # Invert array bits.
            let_image = cv2.bitwise_not(let_image)
            # Dilation.
            let_image = cv2.dilate(255 - let_image, k, iterations = 1)
            # Save image to folder. 
            path = 'D:/segmented_img/img1/{0}-{1}-{2}.jpg'.format(str(index + 1), str(word_count), str(let_index))
            cv2.imwrite(path, let_image)