In [120]:
import cv2
import numpy as np
import pytesseract
import tempfile
import pytesseract
import jellyfish
from PIL import Image
from matplotlib import pyplot as plt
from difflib import SequenceMatcher

In [131]:
# constants and original text
ori = "Parking: You may park anywhere on the campus where there are no signs of prohibiting par-\nking. Keep in mind the carpool hours and park accordingly so you do not get blocked in the\nafternoon\n\nUnder School Age Children:While we love the younger children, it can be disruptive and \ninappropriate to have them on campus during school hours. There may be special times\nthat they may be invited or can accompany a parent volunteer, but other wise we ask that\nyou adhere to our    policy for the benefit of the students and staff."
print(ori)

Parking: You may park anywhere on the campus where there are no signs of prohibiting par-
king. Keep in mind the carpool hours and park accordingly so you do not get blocked in the
afternoon

Under School Age Children:While we love the younger children, it can be disruptive and 
inappropriate to have them on campus during school hours. There may be special times
that they may be invited or can accompany a parent volunteer, but other wise we ask that
you adhere to our    policy for the benefit of the students and staff.


In [197]:
# resize the image to 300 dpi
def set_image_dpi(path, filename, dpi=(300, 300)):
    img = Image.open(path)
    img.save(filename, dpi=(300, 300))

In [198]:
# get grayscale image
def get_grayscale(img):
    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

In [199]:
# implementing accuracy
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [200]:
def canny(image):
    return cv2.Canny(image, 100, 200)

In [201]:
def compute_accuracy(img):
    result = pytesseract.image_to_string(img)
    print("The converted string is: \n")
    print(result)
    acc = jellyfish.levenshtein_distance(ori, result)
    print(acc)
    return acc

In [208]:
# This section contains all helper functions
# Calculate skew angle of an image
def get_skew_angle(cvImage):
    # Prep image, copy, convert to gray scale, blur, and threshold
    newImage = cvImage.copy()
    blur = cv2.GaussianBlur(newImage, (9, 9), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Apply dilate to merge text into meaningful lines/paragraphs.
    # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
    # But use smaller kernel on Y axis to separate between different blocks of text
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
    dilate = cv2.dilate(thresh, kernel, iterations=5)

    # Find all contours
    contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key = cv2.contourArea, reverse = True)

    # Find largest contour and surround in min area box
    largestContour = contours[0]
    minAreaRect = cv2.minAreaRect(largestContour)

    # Determine the angle. Convert it to the value that was originally used to obtain skewed image
    angle = minAreaRect[-1]
    if angle < -45:
        angle = 90 + angle
    return -1.0 * angle

In [209]:
def rotate_image(cvImage, angle):
    newImage = cvImage.copy()
    (h, w) = newImage.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return newImage

In [241]:
# read and resize image to 300 dpi, the new file name is resized
set_image_dpi('sample01.png', 'resized.png')
img = cv2.imread('resized.png')
compute_accuracy(img)
# get greyscale of image
img = get_grayscale(img)
cv2.imwrite('greyscale.png', img)
compute_accuracy(img)
# apply median blur to the image
img = cv2.medianBlur(img,3)
cv2.imwrite('median_blur.png', img)
compute_accuracy(img)

The converted string is: 

Parking: You may park anywhere on the ce
king. Keep in mind the carpool hours and park
afternoon

Under School Age Children:While we love
inappropriate to have them on campus @ )
that they may be invited or can accompany :
you adhere to our _ policy for the benefit of

 

259
The converted string is: 

Parking: You may park anywhere on the ce
king. Keep in mind the carpool hours and park
afternoon

Under School Age Children:While we love
inappropriate to have them on campus @ )
that they may be invited or can accompany :
you adhere to our _ policy for the benefit of

 

259
The converted string is: 

Parking You may park anywhere on the ¢
king Keep in mind the carpool hours and
afternoon

Under Schoo! Age Children:While we love
inappropriate to have them on campus durin
that they may be invited or can accompany y
you adhere to our _ policy for the benefit of t

 

263


263

In [242]:
'''
# Grid Search to find optimal pair of params
kernel_size = [3, 5, 7, 9, 11]
constants = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for kernel in kernel_size:
    for constant in constants:
        temp = img.copy()
        temp = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, kernel, constant)
        print('Accuracy with kernel size ' + str(kernel) + ' constant ' + str(constant))
        compute_accuracy(temp)
'''

"\n# Grid Search to find optimal pair of params\nkernel_size = [3, 5, 7, 9, 11]\nconstants = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\nfor kernel in kernel_size:\n    for constant in constants:\n        temp = img.copy()\n        temp = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, kernel, constant)\n        print('Accuracy with kernel size ' + str(kernel) + ' constant ' + str(constant))\n        compute_accuracy(temp)\n"

In [243]:
# optimal params of (7,4)
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 7, 4)
cv2.imwrite('adaptive_7_4.png', img)
compute_accuracy(img)

The converted string is: 

Parking: You may park anywhere on the campus where there are no signs prohibiting par-
king. Keep in mind the carpool hours and park accordingly so you do not get blocked in the
afternoon

Under School Age Children:While we love the younger children, it can be disruptive and
inappropriate to have them on campus during school hours. There may be special times
that they may be inviled or can accompany a parent volunteer, but otherwise we ask that
you adhere to our _ policy for the benefit of the students and staff.

10


10

In [244]:
'''
# skipped after findingout openning reduce performance
# apply opening to the image
kernel = np.ones((3,3),np.uint8)
img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
cv2.imwrite('opening.png', img)
compute_accuracy(img)'''

"\n# skipped after findingout openning reduce performance\n# apply opening to the image\nkernel = np.ones((3,3),np.uint8)\nimg = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)\ncv2.imwrite('opening.png', img)\ncompute_accuracy(img)"

In [245]:
angle = get_skew_angle(img)
print(angle)
img = rotate_image(img, -1.0 * angle)
cv2.imwrite('deskew.png', img)
compute_accuracy(img)

0.9296673536300659
The converted string is: 

Parking: You may park anywhere on the campus where there are no signs prohibiting par-
king. Keep in mind the carpool hours and park accordingly so you do not get blocked in the
afternoon

Under Schoo! Age Children:While we love the younger children, it can be disruptive and
inappropriate to have them on campus during school hours. There may be specia! times
that they may be invited or can accompany a parent volunteer, but otherwise we ask that
you adhere to our _ policy for the benefit of the students and staff.

11


11