In [1]:
import os
import re
from pdf2image import convert_from_path
from tqdm import tqdm
import cv2
import numpy as np
import PIL
from PIL import Image, ImageDraw
from scipy.ndimage.filters import rank_filter
PIL.Image.MAX_IMAGE_PIXELS = None
import pytesseract
from pytesseract import Output

In [2]:
train_pdfs = "Dataset/Train"
test_pdfs = "Dataset/Test"

train_imgs = "Dataset/Train_Images"
test_imgs = "Dataset/Test_Images"

In [28]:
def pdf2image(pdf_path, img_path):
    
    if not os.path.exists(img_path):
        os.makedirs(img_path)
    
    pdf_list = sorted(os.listdir(pdf_path))
    pdf_list.sort(key=lambda f: int(re.sub('\D', '', f)))
    
    for pdf in tqdm(pdf_list):
        src_file = os.path.join(pdf_path, pdf)
        tgt_file = os.path.join(img_path, pdf.split('.')[0] + ".jpg")
        
        pages = convert_from_path(src_file)
        for page in pages:
            page.save(tgt_file, 'JPEG')    

In [29]:
pdf2image(train_pdfs, train_imgs)

100%|██████████| 190/190 [03:26<00:00,  1.09s/it]


In [30]:
pdf2image(test_pdfs, test_imgs)

100%|██████████| 70/70 [01:00<00:00,  1.16it/s]


In [34]:
image = cv2.imread(train_imgs + "/TR_1.jpg")
image = cv2.resize(image, (700, 700))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
cv2.imshow("image", image)
cv2.waitKey(0)
cv2.imshow("gray", gray)
cv2.waitKey(0)
cv2.imshow("blurred", blurred)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [55]:
sigma=0.75
v = np.median(blurred)
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
edged = cv2.Canny(blurred, lower, upper)
cv2.imshow("edged", edged)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Playground

In [3]:
# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    # threshold the image, setting all foreground pixels to
    # 255 and all background pixels to 0
    return cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(gray)
    thresh = cv2.threshold(gray, 0, 255,
        cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    coords = np.column_stack(np.where(thresh > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h),
        flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)    
    return rotated

#template matching
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 

In [15]:
image = cv2.imread(train_imgs + "/TR_1.jpg")
image = cv2.resize(image, (700, 700))
cv2.imshow("image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [16]:
deskew = deskew(image)
cv2.imshow("deskew", deskew)
cv2.waitKey(0)

gray = get_grayscale(deskew)
cv2.imshow("gray", gray)
cv2.waitKey(0)

thresh = thresholding(gray)
cv2.imshow("thresh", thresh)
cv2.waitKey(0)

rnoise = remove_noise(gray)
cv2.imshow("rnoise", rnoise)
cv2.waitKey(0)

dilate = dilate(gray)
cv2.imshow("dilate", dilate)
cv2.waitKey(0)

erode = erode(gray)
cv2.imshow("erode", erode)
cv2.waitKey(0)

opening = opening(gray)
cv2.imshow("opening", opening)
cv2.waitKey(0)

canny = canny(gray)
cv2.imshow("canny", canny)
cv2.waitKey(0)
cv2.destroyAllWindows()

TypeError: 'numpy.ndarray' object is not callable

In [10]:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,4))
dilated = cv2.dilate(canny, kernel, iterations=9)
cont
ours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cv2.drawContours(image, contours, -1, (0, 255, 0), 3) 
cv2.imshow('Contours', image) 
cv2.waitKey(0) 
cv2.destroyAllWindows() 

In [13]:
ROI_number = 0
for c in contours:
    area = cv2.contourArea(c)
    if area > 1000:
        x,y,w,h = cv2.boundingRect(c)
        cv2.rectangle(image, (x, y), (x + w, y + h), (255,0,0), 3)
        cv2.imshow('image', image)
        cv2.waitKey()
cv2.destroyAllWindows()