# Automatic filtering images

In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import pytesseract
import subprocess

# Path to tesseract executable (in case it isn't in your PATH)
try:
    subprocess.call(["tesseract"])
except FileNotFoundError:
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

In [None]:
def comparison_plot(image1, image2, operation, title1="Source Image"):
    plt.subplot(121), plt.imshow(image1, 'gray'), plt.title(title1)
    plt.xticks([]), plt.yticks([])
    plt.subplot(122), plt.imshow(image2, 'gray'), plt.title(operation)
    plt.xticks([]), plt.yticks([])
    plt.show()

In [None]:
def tesseract_text(image, operation):
    text = pytesseract.image_to_string(image)
    print("\033[92m{}\033[00m".format(operation + '\n' + text))

In [None]:
def canny_edge_detection(image):
    edges = cv2.Canny(image, 80, 100, apertureSize=3)
    comparison_plot(image, edges, "Canny edge detection", "Black and White image")
    tesseract_text(edges, "Canny edge detection")
    return edges

In [None]:
def denoising(image):
    dst = cv2.fastNlMeansDenoising(image, None, 30.0, 7, 21)
    comparison_plot(image, dst, "Denoised")
    tesseract_text(dst, "Denoising")
    return dst

In [None]:
def gaussian_blur(image):
    blur = cv2.GaussianBlur(image, (7, 7), 0)
    comparison_plot(image, blur, "Blurred")
    tesseract_text(blur, "Blurred")
    return blur

In [None]:
def thresholding(image):
    ret, th3 = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return th3

In [None]:
def erosion(image, erosion_size=5):
    erosion_shape = cv2.MORPH_RECT

    element = cv2.getStructuringElement(erosion_shape, (2 * erosion_size + 1, 2 * erosion_size + 1),
                                        (erosion_size, erosion_size))

    erosion_dst = cv2.erode(image, element)
    comparison_plot(image, erosion_dst, "Eroded")
    tesseract_text(erosion_dst, "Eroded")
    return erosion_dst

In [None]:
def dilation(image, dilation_size=5):
    dilation_shape = cv2.MORPH_RECT
    element = cv2.getStructuringElement(dilation_shape, (2 * dilation_size + 1, 2 * dilation_size + 1),
                                        (dilation_size, dilation_size))
    dilatation_dst = cv2.dilate(image, element)
    comparison_plot(image, dilatation_dst, "Dilation")
    tesseract_text(dilatation_dst, "Dilation")
    return dilatation_dst

In [None]:
def opening(image, dilation_size=5, iteration=1):
    dilation_shape = cv2.MORPH_RECT
    element = cv2.getStructuringElement(dilation_shape, (2 * dilation_size + 1, 2 * dilation_size + 1),
                                        (dilation_size, dilation_size))
    opening_src = cv2.morphologyEx(image, cv2.MORPH_OPEN, element, iterations=iteration)
    comparison_plot(image, opening_src, "Opening")
    tesseract_text(opening_src, "Opening")
    return opening_src

In [None]:
def closing(image, dilation_size=1, iteration=1):
    dilation_shape = cv2.MORPH_RECT
    element = cv2.getStructuringElement(dilation_shape, (2 * dilation_size + 1, 2 * dilation_size + 1),
                                        (dilation_size, dilation_size))
    closing_src = cv2.morphologyEx(image, cv2.MORPH_CLOSE, element, iterations=iteration)
    comparison_plot(image, closing_src, "Closing")
    tesseract_text(closing_src, "Closing")
    return closing_src

In [None]:
if __name__ == "__main__":
    plt.rcParams['figure.figsize'] = [15, 10]
    # img = cv2.imread('../images/001.png')
    img = cv2.imread('../images/005.jpg')
    assert img is not None, "file could not be read, check with os.path.exists()"
    tesseract_text(img, "Original")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    canny_res = canny_edge_detection(img)
    denois_res = denoising(canny_res)
    gaussian_res = gaussian_blur(denois_res)
    thresh_res = thresholding(gaussian_res)

    eroded_res = erosion(thresh_res, 1)
    dilated_res = dilation(eroded_res, 1)
    closing_res = closing(thresh_res, 2, 1)
    opening(closing_res, 2, 1)