# OCR on squared paper

In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import pytesseract
import os
import subprocess

# Path to tesseract executable (in case it isn't in your PATH)
try:
    subprocess.call(["tesseract"])
except FileNotFoundError:
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

PARENT_DIR = os.path.dirname(os.path.dirname(os.path.realpath("FILEPATH")))
image_name = "012.jpg"
image = cv2.imread(os.path.join(PARENT_DIR, "images", image_name), cv2.IMREAD_GRAYSCALE)

# Tesseract OCR before processing
print("Before processing:")
print("\"\n" + pytesseract.image_to_string(image) + "\n\"")
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

In [None]:
thresh = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)

# for image 011.jpg, use a different thresholding method
if image_name == "011.jpg":
    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

opening = cv2.morphologyEx(cv2.medianBlur(thresh, 7), cv2.MORPH_OPEN, np.ones((5, 5), np.uint8))
# opening = cv2.erode(opening, np.ones((5,5),np.uint8), iterations=1)

# Tesseract OCR after processing
print("After processing:")
print("\"\n" + pytesseract.image_to_string(opening) + "\n\"")

bitwise_not = cv2.bitwise_not(opening)

plt.imshow(bitwise_not, cmap='gray')

In [None]:
# Template from inside the image
top_left = image[:int(image.shape[0] / 5), :int(image.shape[1] / 5)]
template = cv2.threshold(top_left, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
plt.imshow(template, cmap='gray')

In [None]:
# Scan the whole template, counting the consecutive black pixels and the number of black strings (consecutive black pixels)
black_strings = 0
black_pixels = 0
previous = 0
for i in range(template.shape[0]):
    for j in range(template.shape[1]):
        if template[i][j] == 0:
            black_pixels += 1
            if previous == 0:
                black_strings += 1
                previous = 1
        else:
            previous = 0
print("Black strings: ", black_strings)
print("Black pixels: ", black_pixels)

scaling = 0.8  # The line width might be less thick than the value found
line_width = int((black_pixels / black_strings) * scaling)
print("Average black string length: ", line_width)

In [None]:
# Create structure element for extracting horizontal lines through morphology operations
horizontal_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (line_width, 1))

# Apply morphology operations to extract horizontal lines
horizontal = cv2.morphologyEx(bitwise_not, cv2.MORPH_OPEN, horizontal_structure, iterations=2)
plt.imshow(horizontal, cmap='gray')

In [None]:
# Keep in horizontal lines only the ones that have a length greater than 8% of the image width
horizontal = cv2.erode(horizontal, np.ones((1, int(image.shape[1] * 0.08)), np.uint8), iterations=1)
plt.imshow(horizontal, cmap='gray')

In [None]:
# Create structure element for extracting vertical lines through morphology operations
vertical_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, line_width))

# Apply morphology operations to extract vertical lines
vertical = cv2.morphologyEx(bitwise_not, cv2.MORPH_OPEN, vertical_structure, iterations=2)
plt.imshow(vertical, cmap='gray')

In [None]:
# Keep in vertical lines only the ones that have a length greater than 8% of the image height
vertical = cv2.erode(vertical, np.ones((int(image.shape[0] * 0.08), 1), np.uint8), iterations=1)
plt.imshow(vertical, cmap='gray')

In [None]:
# Combine the two images to extract the grid lines
grid = cv2.add(horizontal, vertical)
plt.imshow(grid, cmap='gray')

# Invert the grid image to get the grid lines
grid = cv2.bitwise_not(grid)

# Erode the grid lines to make them thicker
grid = cv2.erode(grid, np.ones((3, 3), np.uint8), iterations=2)

plt.imshow(grid, cmap='gray')

In [None]:
# Remove grid lines from the image
cleaned = cv2.bitwise_and(bitwise_not, grid)

# Invert the image to get the text in black
#cleaned = cv2.bitwise_not(cleaned)

cleaned = cv2.GaussianBlur(cleaned, (5, 5), 0)
# Opening to remove noise
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, np.ones((2, 2), np.uint8), iterations=2)

dilatation_size = 1
dilation_shape = cv2.MORPH_RECT
element = cv2.getStructuringElement(dilation_shape, (2 * dilatation_size + 1, 2 * dilatation_size + 1),
                                    (dilatation_size, dilatation_size))
dilated = cv2.dilate(cleaned, element)

# Tesseract OCR after processing
print("After processing:")
print("\"\n" + pytesseract.image_to_string(dilated) + "\n\"")

plt.imshow(dilated, cmap='gray')