In [2]:
import numpy as np
import cv2
import imutils
import pytesseract

Import image.

Take the ratio of original height to new height. Maintain a copy of original image and resize the image.

In [3]:
image = cv2.imread('test_img_3.jpg')
ratio = image.shape[0] / 500.0
orig = image.copy()
image = imutils.resize(image, height = 500)

Convert image to b/w and blur it. Detect edges.

In [4]:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5,5), 0)
edged = cv2.Canny(gray, 15, 120)

In [5]:
cv2.imshow("Image", gray)
cv2.imshow("Edged", edged)
cv2.waitKey(0)
cv2.destroyAllWindows()

Find Contours. Keep the 5 largest contours.

In [6]:
contours = cv2.findContours(edged.copy(),cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(contours)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]

Loop over the contours and approximate all as polynomials. The first contour approximated to have 4 vertices can be taken as the required portion.

In [7]:
img2 = image.copy()
for c in cnts:
    peri = cv2.arcLength(c,True)
    approx = cv2.approxPolyDP(c,0.02*peri,True)
    if len(approx) == 4:
        screenCnt = approx
        break
for c in cnts:
    cv2.drawContours(img2, [c], -1, (255,0,0),2)
cv2.imshow("Test", img2)
cv2.waitKey(0)
cv2.destroyAllWindows()

Display the contours.

In [8]:
cv2.drawContours(image, [screenCnt], -1, (0,255,0) , 2)
cv2.imshow("Outlined", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Perspective Transform

re-order the points in the order- top-left -> top-right -> bot-right -> bot-left

In [9]:
pts = screenCnt.reshape(4,2)*ratio
rect = np.zeros((4,2), dtype= "float32")

Top left point has smallest sum of co-ordinates and highest for bot-right.

In [10]:
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]

Top-right point will have min diff b/w co-ordinates and bot-left will have largest diff b/w co-ordinates

In [11]:
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]

multiply the rectangle by the original ratio to restore the size

Find the width and height of the portion of the doc required

In [12]:
(tl, tr, br, bl) = rect
widthA = np.sqrt((bl[0]-br[0])**2 + (bl[1]-br[1])**2)
widthB = np.sqrt((tl[0]-tr[0])**2 + (tl[1]-tr[1])**2)

heightA = np.sqrt((bl[0]-tl[0])**2 + (bl[1]-tl[1])**2)
heightB = np.sqrt((br[0]-tr[0])**2 + (br[1]-tr[1])**2)

maxWidth = max(int(widthA),int(widthB))
maxHeight = max(int(heightA), int(heightB))

Construct the destination points

In [13]:
dst = np.array([
    [0,0],
    [maxWidth-1,0],
    [maxWidth-1,maxHeight-1],
    [0,maxHeight-1]
], dtype = "float32")

Calculate the perspective transform matrix and warp the matrix to get the final view of the doc

In [14]:
M = cv2.getPerspectiveTransform(rect,dst)
warped = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))

Cleaning the final image a bit

In [15]:
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ret, warped = cv2.threshold(warped, 80, 255, cv2.THRESH_BINARY) 

In [16]:
cv2.imshow("Original", imutils.resize(orig, height = 650))
cv2.imshow("Scanned", imutils.resize(warped, height = 650))
cv2.waitKey(0)
cv2.destroyAllWindows()

# OCR

Specify the path to tesseract.exe file on the system

In [17]:
pytesseract.pytesseract.tesseract_cmd = r"D:\Tesseract_OCR\tesseract.exe"

Specify Kernel size and structure shape. Kernel size determines area of rectangle to be detected. Small value such as (10,10) will lwad to a small rectangle which will detect individual words instead of complete scentences.

In [18]:
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18,18))

Applying Dilation on thresholded image and detecting contours.

In [21]:
dilation = cv2.dilate(warped, rect_kernel, iterations = 1)
contours = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = imutils.grab_contours(contours)
im2 = warped.copy()
#cv2.imshow("Scanned",imutils.resize(warped,height=650))
#cv2.imshow("Dilated",imutils.resize(dilation,height=650))
#cv2.waitKey(0)
#cv2.destroyAllWindows()

Create a new txt file where our detected text will be stored.

In [22]:
file = open("recognized.txt", "w+") 
file.write("") 
file.close() 

Loop through the identified contours and crop each rectangular part and pass it to pytesseract for generating the text and writing it to the txt file

In [23]:
for c in contours:
    x,y,w,h = cv2.boundingRect(c)
    rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (255, 0, 0), 2)
    cropped = im2[y:y + h, x:x + w] 
    file = open("recognized.txt", "a") 
    text = pytesseract.image_to_string(cropped)
    file.write(text) 
    file.write("\n") 
    file.close()

In [24]:
cv2.imshow("Text", imutils.resize(im2,height = 650))
cv2.waitKey(0)
cv2.destroyAllWindows()

In [25]:
cv2.imwrite("Scanned_test_img_3.jpg", warped)

True