# Methods to detect lined/squared paper

In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os

PARENT_DIR = os.path.dirname(os.path.dirname(os.path.realpath("FILEPATH")))
image_name = "018.jpg"
image = cv2.imread(os.path.join(PARENT_DIR, "images", image_name), cv2.IMREAD_COLOR)

# Check if image is loaded fine
if image is None:
    print('Error opening image')

# SHADOW REMOVAL
img = np.copy(image)
rgb_planes = cv2.split(img)

result_planes = []
result_norm_planes = []
for plane in rgb_planes:
    dilated_img = cv2.dilate(plane, np.ones((7, 7), np.uint8))
    bg_img = cv2.medianBlur(dilated_img, 21)
    diff_img = 255 - cv2.absdiff(plane, bg_img)
    norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
    result_planes.append(diff_img)
    result_norm_planes.append(norm_img)

result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)

# Show source image
plt.imshow(result_norm, cmap="gray")

## Hough Transform

In [None]:
# gray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(result_norm, 50, 150, apertureSize=3)
lines_on_image = result_norm.copy()

lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength=100,maxLineGap=10)
if lines is not None:
    print("Lines detected")
    # check if the lines are a considerable number (e.g. 5)
    if lines is not None:
        print("Lines detected:", len(lines))
        
        # keep parallel lines only
        lines = np.squeeze(lines)
        # calculate the slope of each line
        slopes = (lines[:, 3] - lines[:, 1]) / (lines[:, 2] - lines[:, 0])
        # keep only lines with slope between -0.1 and 0.1
        lines = lines[np.where((slopes > -0.1) & (slopes < 0.1))]
        # print("Parallel lines only:", len(lines))
        # expand back
        lines = lines[:, np.newaxis, :]

        # check if the lines are a considerable number (e.g. 5)
        if len(lines) > 5:

            # remove lines that are too short
            lines = np.squeeze(lines)
            # calculate the length of each line
            lengths = np.sqrt((lines[:, 2] - lines[:, 0]) ** 2 + (lines[:, 3] - lines[:, 1]) ** 2)
            # keep only lines with length greater than image.shape[1] / 4
            lines = lines[np.where(lengths > image.shape[1] / 4)]
            # print("Lines after removing short ones:", len(lines))
            # expand back
            lines = lines[:, np.newaxis, :]

            # if lines are not > 5, exit the if
            if len(lines) <= 5:
                print("No lines detected")
                plt.imshow(lines_on_image, cmap="gray")
                exit(0)

            # remove lines that are too close one to another
            lines = np.squeeze(lines)
            # calculate the distance between each pair of lines
            distances = np.sqrt((lines[:, 2] - lines[:, 0]) ** 2 + (lines[:, 3] - lines[:, 1]) ** 2)
            # keep only lines with distance greater than 100
            lines = lines[np.where(distances > 300)]
            # print("Lines after removing close ones:", len(lines))
            # expand back
            lines = lines[:, np.newaxis, :]

            # if lines are not > 5, exit the if
            if len(lines) <= 5:
                print("No lines detected")
                plt.imshow(lines_on_image, cmap="gray")
                exit(0)

            # remove lines with vertical distance less than 100
            lines = np.squeeze(lines)
            # sort lines by y1
            lines = lines[lines[:, 1].argsort()]
            # calculate the vertical distance between each pair of lines
            vertical_distances = lines[1:, 1] - lines[:-1, 1]
            # keep only lines with vertical distance greater than 100
            lines = lines[np.where(vertical_distances > 25)]
            print("Lines kept:", len(lines))
            # expand back
            lines = lines[:, np.newaxis, :]

            for line in lines:
                x1, y1, x2, y2 = line[0]
                cv2.line(lines_on_image, (x1, y1), (x2, y2), (0, 0, 255), 2)

            if len(lines) > 3:
                print("\033[92mThe text is on lined paper\033[0m")
else:
    print("No lines detected")

# show the image with lines
plt.imshow(lines_on_image, cmap="gray")

## Evaluation (with Probabilistic Hough Transform)

In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os

# Images for squared paper OCR are from 001.jpg to 020.jpg
image_names = [str(i).zfill(3) + ".jpg" for i in range(1, 21)]

PARENT_DIR = os.path.dirname(os.path.dirname(os.path.realpath("FILEPATH")))

for image_name in image_names:
    print("\nImage:", image_name)
    image = cv2.imread(os.path.join(PARENT_DIR, "images", image_name), cv2.IMREAD_COLOR)

    # Check if image is loaded fine
    if image is None:
        print('Error opening image')

    # SHADOW REMOVAL ==========================================
    img = np.copy(image)
    rgb_planes = cv2.split(img)

    result_planes = []
    result_norm_planes = []
    for plane in rgb_planes:
        dilated_img = cv2.dilate(plane, np.ones((7, 7), np.uint8))
        bg_img = cv2.medianBlur(dilated_img, 21)
        diff_img = 255 - cv2.absdiff(plane, bg_img)
        norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
        result_planes.append(diff_img)
        result_norm_planes.append(norm_img)

    result = cv2.merge(result_planes)
    result_norm = cv2.merge(result_norm_planes)

    # gray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(result_norm, 50, 150, apertureSize=3)
    lines_on_image = result_norm.copy()
    # =========================================================

    lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength=100,maxLineGap=10)
        
    if lines is not None:
        print("Lines detected:", len(lines))
        
        # keep parallel lines only
        lines = np.squeeze(lines)
        # calculate the slope of each line
        slopes = (lines[:, 3] - lines[:, 1]) / (lines[:, 2] - lines[:, 0])
        # keep only lines with slope between -0.1 and 0.1
        lines = lines[np.where((slopes > -0.1) & (slopes < 0.1))]
        # print("Parallel lines only:", len(lines))
        # expand back
        lines = lines[:, np.newaxis, :]

        # check if the lines are a considerable number (e.g. 5)
        if len(lines) > 5:

            # remove lines that are too short
            lines = np.squeeze(lines)
            # calculate the length of each line
            lengths = np.sqrt((lines[:, 2] - lines[:, 0]) ** 2 + (lines[:, 3] - lines[:, 1]) ** 2)
            # keep only lines with length greater than image.shape[1] / 4
            lines = lines[np.where(lengths > image.shape[1] / 4)]
            # print("Lines after removing short ones:", len(lines))
            # expand back
            lines = lines[:, np.newaxis, :]

            # if lines are not > 5, exit the if
            if len(lines) <= 5:
                continue

            # remove lines that are too close one to another
            lines = np.squeeze(lines)
            # calculate the distance between each pair of lines
            distances = np.sqrt((lines[:, 2] - lines[:, 0]) ** 2 + (lines[:, 3] - lines[:, 1]) ** 2)
            # keep only lines with distance greater than 100
            lines = lines[np.where(distances > 300)]
            # print("Lines after removing close ones:", len(lines))
            # expand back
            lines = lines[:, np.newaxis, :]

            # if lines are not > 5, exit the if
            if len(lines) <= 5:
                continue

            # remove lines with vertical distance less than 100
            lines = np.squeeze(lines)
            # sort lines by y1
            lines = lines[lines[:, 1].argsort()]
            # calculate the vertical distance between each pair of lines
            vertical_distances = lines[1:, 1] - lines[:-1, 1]
            # keep only lines with vertical distance greater than 100
            lines = lines[np.where(vertical_distances > 25)]
            print("Lines kept:", len(lines))
            # expand back
            lines = lines[:, np.newaxis, :]

            for line in lines:
                x1, y1, x2, y2 = line[0]
                cv2.line(lines_on_image, (x1, y1), (x2, y2), (0, 0, 255), 2)

            if len(lines) > 3:
                print("\033[92mThe text is on lined paper\033[0m")

            # # check if the lines are long enough (e.g. one fourth of the image width)
            # long_enough_lines = 0
            # for line in lines:
            #     x1, y1, x2, y2 = line[0]
            #     cv2.line(lines_on_image, (x1, y1), (x2, y2), (0, 0, 255), 2)
            #     if abs(x2 - x1) > image.shape[1] / 4:
            #         long_enough_lines += 1
            # print("Long enough lines:",long_enough_lines)
            # if long_enough_lines > 4:
            #     print("The text is on lined paper")
    else:
        print("No lines detected")

    # show original image together with the image with lines
    plt.subplot(121), plt.imshow(image)
    plt.title('Original Image'), plt.xticks([]), plt.yticks([])
    plt.subplot(122), plt.imshow(lines_on_image, cmap="gray")
    plt.title('Image with lines'), plt.xticks([]), plt.yticks([])
    plt.show()