In [1]:
import copy
import cv2 as cv
import math
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import pytesseract
import os

In [12]:
def show_wait_destroy(winname, img):
    """
    Function to display image and destroy window
    """
    cv.imshow(winname, img)
    cv.moveWindow(winname, 500, 0)
    cv.waitKey(0)
    cv.destroyWindow(winname)

In [13]:
def getHorizontalLines(image):
    """
    This functions returns a binary image containing only Horizontal lines. 
    """
    cols = image.shape[1]
    horizontal_size = int(cols / 5)
    # Create structure element for extracting vertical lines through morphology operations
    horizontalStructure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1))
    
    # Apply morphology operations
    horizontal = cv.erode(image, horizontalStructure)
    horizontal = cv.dilate(horizontal, horizontalStructure)

    # Show extracted horizontal lines
    show_wait_destroy("horizontal", horizontal)
    return horizontal

In [16]:
def getVerticalLines(image):
    """
    This functions returns a binary image containing only Vertical lines.
    """
    rows = image.shape[0]
    verticalsize = int(rows / 5)
    # Create structure element for extracting vertical lines through morphology operations
    verticalStructure = cv.getStructuringElement(cv.MORPH_RECT, (1, verticalsize))

    # Apply morphology operations
    vertical = cv.erode(image, verticalStructure)
    vertical = cv.dilate(vertical, verticalStructure)

    # Show extracted vertical lines
    show_wait_destroy("vertical", vertical)
    return vertical

In [27]:
def getFirstRow(image):
    """
    Returns first horizontal row
    """
    rows = image.shape[0]
    cols = image.shape[1]
    col_ref = int(cols/4)
    for i in range(rows):
        if image[i,col_ref] == 255:
            return i

In [28]:
def getFirstCol(image):
    """
    Returns first vertical row
    """
    rows = image.shape[0]
    cols = image.shape[1]
    row_ref = int(rows/4)
    for i in range(cols):
        if image[row_ref,i] == 255:
            return i

In [59]:
def readData(row,col,hlineimage,vlineimage,image):
    """
    Reads data from segmented image and appendsit into a l
    """
    data = []
    i = row + 8
    j = col + 8
    col_ref = int(hlineimage.shape[1]/4)
    row_ref = int(vlineimage.shape[0]/4)
    no_rows = hlineimage.shape[0]
    no_cols = vlineimage.shape[1]
    prev_row = row
    prev_col = col
    while i < no_rows:
        if(hlineimage[i,col_ref]==255):
            while j < no_cols:
                if(vlineimage[i-10,j]==255):
                    roi = image[prev_row:i+2,prev_col:j+2]
                    show_wait_destroy("cropped images", roi)
                    text = pytesseract.image_to_string(roi)
                    data.append(text)
                    prev_col = j
                    j = j + 7
                else:
                    j = j + 1
            prev_row = i
            i = i + 7
            j = col + 8
            prev_col = col
        else:
            i = i + 1
    
    return data

In [113]:
img_loc = input("Enter image name or location Ex: imgs/crop.jpg");
image = cv.imread(img_loc)
show_wait_destroy("image",image)
gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
show_wait_destroy("gray", gray_image)
gray_invert = cv.bitwise_not(gray_image)
show_wait_destroy("gray_invert", gray_invert)
image_binary = cv.adaptiveThreshold(gray_invert, 255, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, 15, -2)
show_wait_destroy("binary", image_binary)

Enter image name or location Ex: imgs/crop.jpgimgs/crop.jpg


In [114]:
horizontalLinesImage = getHorizontalLines(image_binary)
verticalLinesImage = getVerticalLines(image_binary)

In [115]:
row_pixel_ref = getFirstRow(horizontalLinesImage)
col_pixel_ref = getFirstCol(verticalLinesImage)

In [116]:
data = readData(row_pixel_ref,col_pixel_ref,horizontalLinesImage,verticalLinesImage,image)

In [117]:
print(len(data))
print(data)

16
['', '‘ 10.10.1996', '', '', '', '0 10 #121549', '', '', '', '', '', 'WVlZZZ7OZVI-I058554 ,', '', '', '', '']


In [118]:
# denoiced_image = cv2.fastNlMeansDenoisingColored(image_copy,None,10,10,7,21)