#### For Cropping

In [None]:
import cv2
import numpy as np

# load image
img = cv2.imread('Scan.pdf2-1.jpg') 
rsz_img = cv2.resize(img, None, fx=0.25, fy=0.25) # resize since image is huge
gray = cv2.cvtColor(rsz_img, cv2.COLOR_BGR2GRAY) # convert to grayscale

# threshold to get just the signature
retval, thresh_gray = cv2.threshold(gray, thresh=100, maxval=255, type=cv2.THRESH_BINARY)

# find where the signature is and make a cropped region
points = np.argwhere(thresh_gray==0) # find where the black pixels are
points = np.fliplr(points) # store them in x,y coordinates instead of row,col indices
x, y, w, h = cv2.boundingRect(points) # create a rectangle around those points
x, y, w, h = x-10, y-10, w+20, h+20 # make the box a little bigger
crop = gray[y:y+h, x:x+w] # create a cropped region of the gray image

# get the thresholded crop
retval, thresh_crop = cv2.threshold(crop, thresh=200, maxval=255, type=cv2.THRESH_BINARY)

# display
cv2.imshow("Cropped and thresholded image", thresh_crop) 
cv2.waitKey(0)

In [None]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

#define the vertical filter
vertical_filter = [[-1,-2,-1], [0,0,0], [1,2,1]]

#define the horizontal filter
horizontal_filter = [[-1,0,1], [-2,0,2], [-1,0,1]]

#read in the pinwheel image
img = plt.imread('image.jpg')

#get the dimensions of the image
n,m,d = img.shape

#initialize the edges image
edges_img = img.copy()

#loop over all pixels in the image
for row in range(3, n-2):
    for col in range(3, m-2):
        
        #create little local 3x3 box
        local_pixels = img[row-1:row+2, col-1:col+2, 0]
        
        #apply the vertical filter
        vertical_transformed_pixels = vertical_filter*local_pixels
        #remap the vertical score
        vertical_score = vertical_transformed_pixels.sum()/4
        
        #apply the horizontal filter
        horizontal_transformed_pixels = horizontal_filter*local_pixels
        #remap the horizontal score
        horizontal_score = horizontal_transformed_pixels.sum()/4
        
        #combine the horizontal and vertical scores into a total edge score
        edge_score = (vertical_score**2 + horizontal_score**2)**.5
        
        #insert this edge score into the edges image
        edges_img[row, col] = [edge_score]*3

#remap the values in the 0-1 range in case they went out of bounds
edges_img = edges_img/edges_img.max()

In [None]:
# import the necessary packages
import numpy as np
import argparse
import imutils
import cv2
 
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
        key=lambda b:b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

In [None]:
import cv2
import numpy as np

In [None]:
# Read the image
img = cv2.imread('image.jpg', 0)
 
# Thresholding the image
(thresh, img_bin) = cv2.threshold(img, 128, 255,cv2.THRESH_BINARY| cv2.THRESH_OTSU)# Invert the image
img_bin = 255-img_bin 
cv2.imwrite("Image_bin2.jpg",img_bin)

In [None]:
cv2.imshow(img_bin,)

In [None]:
# Defining a kernel length
kernel_length = np.array(img).shape[1]//80
 
# A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))# A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))# A kernel of (3 X 3) ones.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

In [None]:
# Morphological operation to detect vertical lines from an image
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
cv2.imwrite("verticle_lines.jpg",verticle_lines_img)# operation to detect horizontal lines from an image
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)

In [None]:
# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
alpha = 0.5
beta = 1.0 - alpha# This function helps to add two image with specific weight parameter to get a third image as summation of two image.
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite("img_final_bin.jpg",img_final_bin)

In [None]:
# Find contours for image, which will detect all the boxes
im2, contours, hierarchy = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Sort all the contours by top to bottom.
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")

In [None]:
import cv2
import numpy as np
def box_extraction(img_for_box_extraction_path, cropped_dir_path):
    img = cv2.imread(img_for_box_extraction_path, 0)  # Read the image
    (thresh, img_bin) = cv2.threshold(img, 128, 255,
                                      cv2.THRESH_BINARY | cv2.THRESH_OTSU)  # Thresholding the image
    img_bin = 255-img_bin  # Invert the imagecv2.imwrite("Image_bin.jpg",img_bin)

    # Defining a kernel length
    kernel_length = np.array(img).shape[1]//40

    # A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
    verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
    
    # A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
    
    # A kernel of (3 X 3) ones.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))# Morphological operation to detect verticle lines from an image
    img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
    verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
    cv2.imwrite("verticle_lines.jpg",verticle_lines_img)# Morphological operation to detect horizontal lines from an image
    img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
    cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
    alpha = 0.5
    beta = 1.0 - alpha
    
    # This function helps to add two image with specific weight parameter to get a third image as summation of two image.
    img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
    img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
    (thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)# For Debugging
    
    # Enable this line to see verticle and horizontal lines in the image which is used to find boxes
    cv2.imwrite("img_final_bin.jpg",img_final_bin)
    
    # Find contours for image, which will detect all the boxes
    im2, contours, hierarchy = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    # Sort all the contours by top to bottom.
    (contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")
    idx = 0
    for c in contours:
        # Returns the location and width,height for every contour
        x, y, w, h = cv2.boundingRect(c)# If the box height is greater then 20, widht is >80, then only save it as a box in "cropped/" folder.
        if (w > 80 and h > 20) and w > 3*h:
            idx += 1
            new_img = img[y:y+h, x:x+w]
            cv2.imwrite(cropped_dir_path+str(idx) + '.png', new_img)
    box_extraction("41.jpg", "./Cropped/")

In [None]:
box_extraction('image.jpg','image2.jpg')

In [None]:
import cv2
import numpy as np

img = cv2.imread('image.jpg')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(img_gray, 127, 255,0)
contours,hierarchy = cv2.findContours(thresh,2,1)
cnt = contours[0]

hull = cv2.convexHull(cnt,returnPoints = False)
defects = cv2.convexityDefects(cnt,hull)

for i in range(defects.shape[0]):
    s,e,f,d = defects[i,0]
    start = tuple(cnt[s][0])
    end = tuple(cnt[e][0])
    far = tuple(cnt[f][0])
    cv2.line(img,start,end,[0,255,0],2)
    cv2.circle(img,far,5,[0,0,255],-1)

cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [70]:
import cv2
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

#Load the image in black and white (0 - b/w, 1 - color).
img = cv2.imread('image2.jpg', 0)

#Get the height and width of the image.
h, w = img.shape[:2]
imgray = 255 - img
#Invert the image to be white on black for compatibility with findContours function.
#imgray = 255 - img
#Binarize the image and call it thresh.
ret, thresh = cv2.threshold(imgray, 127, 255, cv2.THRESH_BINARY)


# kernel = cv2.getStructuringElement(cv2.RETR_LIST, (3, 3))
# dilated = cv2.dilate(thresh, kernel, iterations=2)
contours, hierarchy = cv2.findContours(imgray, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)


#Find all the contours in thresh. 
#contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
#Calculate bounding rectangles for each contour.
rects = [cv2.boundingRect(cnt) for cnt in contours]

#Calculate the combined bounding rectangle points.
# top_x = min([x for (x, y, w, h) in rects])
# top_y = min([y for (x, y, w, h) in rects])
# bottom_x = max([x+w for (x, y, w, h) in rects])
# bottom_y = max([y+h for (x, y, w, h) in rects])

# #Draw the rectangle on the image
# out = cv2.rectangle(img, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 2)
# #Save it as out.jpg
# cv2.imwrite('out.jpg', out)

In [76]:
import cv2

#Load the image in black and white (0 - b/w, 1 - color).
img = cv2.imread('tilted_filled_temp2.jpg', 0)

#Get the height and width of the image.
h, w = img.shape[:2]

#Invert the image to be white on black for compatibility with findContours function.
imgray = 255 - img
#Binarize the image and call it thresh.
ret, thresh = cv2.threshold(imgray, 127, 255, cv2.THRESH_BINARY)

#Find all the contours in thresh. 
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
#Calculate bounding rectangles for each contour.
rects = [cv2.boundingRect(cnt) for cnt in contours]

In [43]:
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
        key=lambda b:b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

In [77]:
# sort all rect by their y
rects.sort(key=lambda b: b[1])
# initially the line bottom is set to be the bottom of the first rect
line_bottom = rects[0][1]+rects[0][3]-1
line_begin_idx = 0
for i in range(len(rects)):
    # when a new box's top is below current line's bottom
    # it's a new line
    if rects[i][1] > line_bottom:
        # sort the previous line by their x
        rects[line_begin_idx:i] = sorted(rects[line_begin_idx:i], key=lambda b: b[0])
        line_begin_idx = i
    # regardless if it's a new line or not
    # always update the line bottom
    line_bottom = max(rects[i][1]+rects[i][3]-1, line_bottom)
# sort the last line
rects[line_begin_idx:] = sorted(rects[line_begin_idx:], key=lambda b: b[0])

In [63]:
contours2,boundingBoxes2=sort_contours(contours,'right-to-left')

In [64]:
rects = [cv2.boundingRect(cnt) for cnt in contours2]

In [2]:
cv2.imshow("sub",img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [79]:
len(rects)

1935

In [78]:
rects

[(45, 285, 524, 207),
 (46, 287, 24, 19),
 (51, 292, 3, 5),
 (53, 263, 525, 207),
 (54, 265, 24, 19),
 (56, 297, 2, 2),
 (57, 293, 2, 3),
 (58, 273, 2, 2),
 (60, 269, 2, 1),
 (60, 271, 2, 2),
 (60, 295, 4, 6),
 (61, 241, 525, 207),
 (62, 243, 24, 19),
 (63, 271, 5, 6),
 (66, 247, 5, 6),
 (66, 294, 24, 20),
 (69, 219, 525, 207),
 (69, 273, 4, 5),
 (70, 299, 4, 6),
 (71, 221, 23, 19),
 (71, 250, 2, 4),
 (74, 228, 3, 3),
 (74, 252, 1, 3),
 (74, 272, 24, 20),
 (76, 305, 1, 2),
 (77, 197, 525, 207),
 (77, 225, 2, 3),
 (77, 254, 2, 3),
 (77, 301, 2, 3),
 (78, 199, 24, 19),
 (78, 251, 2, 2),
 (78, 281, 2, 1),
 (80, 305, 3, 3),
 (81, 227, 3, 6),
 (81, 277, 1, 3),
 (82, 203, 5, 6),
 (82, 250, 24, 20),
 (82, 303, 2, 2),
 (83, 279, 5, 4),
 (85, 175, 525, 207),
 (85, 230, 1, 1),
 (86, 228, 1, 1),
 (86, 255, 4, 6),
 (86, 280, 7, 6),
 (86, 301, 24, 20),
 (87, 177, 24, 19),
 (87, 208, 2, 3),
 (87, 232, 1, 2),
 (90, 207, 4, 4),
 (90, 228, 24, 20),
 (90, 285, 2, 1),
 (90, 306, 4, 6),
 (91, 257, 2, 5),


In [31]:
# rects=sorted(rects, key=lambda x: x[1])
# rects

In [80]:
count=0
for i in rects:
    try:
        x=i[0]
        y=i[1]
        w=i[2]
        h=i[3]
        if ((w>19 and w<27) and (h>13 and h <22)):# and hierarchy[0][count][2]==-1):
        #if (hierarchy[0][count][2]==-1):
            print(i)
            cv2.imwrite(r"C:\Users\Rahul\Desktop\Capstone\OCR\result\img"+str(count)+".jpg",imgray[y:y+h,x:x+w])
        count+=1
    except:
        continue

(46, 287, 24, 19)
(54, 265, 24, 19)
(62, 243, 24, 19)
(66, 294, 24, 20)
(71, 221, 23, 19)
(74, 272, 24, 20)
(78, 199, 24, 19)
(82, 250, 24, 20)
(86, 301, 24, 20)
(87, 177, 24, 19)
(90, 228, 24, 20)
(94, 279, 24, 20)
(95, 155, 24, 19)
(99, 206, 23, 20)
(102, 257, 24, 20)
(103, 133, 24, 20)
(106, 309, 24, 19)
(107, 184, 23, 20)
(110, 235, 24, 20)
(111, 111, 24, 19)
(114, 287, 24, 19)
(115, 162, 24, 20)
(118, 214, 24, 19)
(119, 89, 24, 20)
(122, 265, 24, 19)
(123, 140, 24, 20)
(126, 316, 24, 20)
(127, 67, 24, 20)
(127, 191, 23, 20)
(130, 243, 24, 19)
(131, 118, 24, 20)
(134, 294, 24, 20)
(135, 45, 24, 20)
(135, 169, 24, 20)
(138, 221, 24, 19)
(139, 96, 24, 20)
(142, 272, 24, 20)
(143, 147, 24, 20)
(144, 23, 23, 20)
(146, 199, 24, 20)
(146, 323, 24, 20)
(147, 74, 24, 20)
(150, 250, 24, 20)
(151, 125, 24, 20)
(154, 301, 24, 20)
(155, 53, 24, 19)
(155, 177, 23, 20)
(158, 228, 24, 20)
(159, 104, 24, 19)
(162, 279, 24, 20)
(163, 155, 23, 20)
(164, 31, 23, 19)
(166, 206, 24, 20)
(166, 331, 24, 

In [12]:
cv2.imshow("sub",imgray[752:752+36,1551:1551+41])
cv2.waitKey(0)
cv2.destroyAllWindows()