In [None]:
import numpy as np
from matplotlib import pyplot as plt
import cv2 as cv
import pytesseract
import subprocess

# Path to tesseract executable (in case it isn't in your PATH)
try:
    subprocess.call(["tesseract"])
except FileNotFoundError:
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Load the image
src = cv.imread("../images/014.jpg", cv.IMREAD_COLOR)
# Check if image is loaded fine
if src is None:
    print('Error opening image')

# src = cv.compareHist(src)
img = np.copy(src)

# Shadow removal of the image
rgb_planes = cv.split(img)

result_planes = []
result_norm_planes = []
for plane in rgb_planes:
    dilated_img = cv.dilate(plane, np.ones((7, 7), np.uint8))
    bg_img = cv.medianBlur(dilated_img, 21)
    diff_img = 255 - cv.absdiff(plane, bg_img)
    norm_img = cv.normalize(diff_img, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_8UC1)
    result_planes.append(diff_img)
    result_norm_planes.append(norm_img)

result = cv.merge(result_planes)
result_norm = cv.merge(result_norm_planes)

# Show source image
plt.imshow(result_norm, cmap="gray")

In [None]:
# Apply adaptiveThreshold
result_norm = cv.cvtColor(result_norm, cv.COLOR_BGR2GRAY)
adaptive = cv.adaptiveThreshold(result_norm, 255, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, 11, 2)
otsu = bw = cv.threshold(result_norm, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)[1]

bw = cv.morphologyEx(bw, cv.MORPH_OPEN, np.ones((3, 3), np.uint8))
bw = cv.bitwise_not(bw)

# Show difference between adaptive and otsu thresholding
plt.subplot(121), plt.imshow(adaptive, 'gray'), plt.title('adaptive')
plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(otsu, 'gray'), plt.title('otsu')
plt.xticks([]), plt.yticks([])
plt.show()

In [None]:
# Create the images that will use to extract the horizontal and vertical lines
horizontal = np.copy(bw)
vertical = np.copy(bw)

# Specify size on horizontal axis
cols = horizontal.shape[1]
horizontal_size = cols // 30
# Specify size on vertical axis
rows = vertical.shape[0]
verticalsize = rows // 30
# Create structure element for extracting horizontal lines through morphology operations
horizontalStructure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1))
verticalStructure = cv.getStructuringElement(cv.MORPH_RECT, (1, verticalsize))
# Apply morphology operations
horizontal = cv.morphologyEx(bw, cv.MORPH_OPEN, horizontalStructure, iterations=2)
vertical = cv.morphologyEx(bw, cv.MORPH_OPEN, verticalStructure, iterations=3)

# horizontal = cv.erode(horizontal, np.ones((int(bw.shape[1] * 0.08), 1), np.uint8), iterations=1)
# horizontal = cv.dilate(horizontal, horizontalStructure)
# vertical = cv.erode(vertical, np.ones((int(bw.shape[0] * 0.08), 1), np.uint8), iterations=1)
# vertical = cv.erode(vertical, verticalStructure)

vertical = cv.dilate(vertical, verticalStructure)
horizontal = cv.add(vertical,horizontal)

horizontal = cv.dilate(horizontal, np.ones((3, 3)))
horizontal = cv.bitwise_not(horizontal)
# Show extracted grid
plt.imshow(horizontal, cmap='gray')

In [None]:
remove_grid = cv.bitwise_not(cv.bitwise_and(bw, horizontal))
text = pytesseract.image_to_string(remove_grid)
print("\033[92m{}\033[00m".format('Without opening\n' + text))

plt.rcParams['figure.figsize'] = [16, 10]

plt.subplot(121), plt.imshow(remove_grid, cmap='gray'), plt.title('Only removed grid')
plt.xticks([]), plt.yticks([])
# Doing some opening/closing to remove some major noise
opening = cv.morphologyEx(remove_grid, cv.MORPH_OPEN, np.ones((5, 5)), iterations=2)
closing = cv.morphologyEx(opening, cv.MORPH_CLOSE, np.ones((3, 3)))
text = pytesseract.image_to_string(closing)
print("\033[92m{}\033[00m".format('With opening and closing\n' + text))

plt.subplot(122), plt.imshow(closing, cmap='gray'), plt.title('With opening/closing added')
plt.xticks([]), plt.yticks([])
plt.show()