In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
img = cv2.imread('images/corect.png', 0) 
(thresh, img_bin) = cv2.threshold(img, 128, 255,cv2.THRESH_BINARY|cv2.THRESH_OTSU) #perform both global and otsu thresholding

img_bin = 255-img_bin 
# plt.imshow(img_bin)

In [None]:
# need morphological operations (based on shape=RECT) to detect boxes
kernel_length = np.array(img).shape[1]//80 # Defining a kernel length [originaly 80]
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length)) #detect all the verticle lines - EROSION
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1)) #to detect all the horizontal lines - DILATION
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3) )#cv2.getStructuringElement(shape,(num_of_columns,num_of_rows))

# Morphological operation to detect vertical lines from an image
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
# plt.imshow(verticle_lines_img)

# Morphological operation to detect horizontal lines from an image
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
# plt.imshow(horizontal_lines_img)

# add these two images = only boxes 
# no info = no noise

# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
alpha = 0.5
beta = 1.0 - alpha
# This function helps to add two image with specific weight parameter to get a third image as summation of two image.
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# plt.imshow(img_final_bin)


In [None]:
def sort_contours(cnts, method="left-to-right"):
	# initialize the reverse flag and sort index
	reverse = False
	i = 0
	# handle if we need to sort in reverse
	if method == "right-to-left" or method == "bottom-to-top":
		reverse = True
	# handle if we are sorting against the y-coordinate rather than
	# the x-coordinate of the bounding box
	if method == "top-to-bottom" or method == "bottom-to-top":
		i = 1
	# construct the list of bounding boxes and sort them from top to
	# bottom
	boundingBoxes = [cv2.boundingRect(c) for c in cnts]
	(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
		key=lambda b:b[1][i], reverse=reverse))
	# return the list of sorted contours and bounding boxes
	return (cnts, boundingBoxes)

In [None]:
contours, hierarchy = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
(contours, boundingBoxes) = sort_contours(contours, method="left-to-right")
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")

# print(boundingBoxes)

In [None]:
def get_closest_63_indices(lst):
    sorted_lst = sorted(enumerate(lst), key=lambda x: x[1])
    best_diff = float('inf')
    best_slice = []
    for i in range(len(sorted_lst)-62):
        slice_diff = sorted_lst[i+62][1] - sorted_lst[i][1]
        if slice_diff < best_diff:
            best_diff = slice_diff
            best_slice = sorted_lst[i:i+63]
    return sorted([x[0] for x in best_slice])

def get_area(contours):
    x, y, w, h = cv2.boundingRect(contours)
    a= cv2.contourArea(contours)
    area = np.array(a).astype(int).tolist()
    return w, y, w, h, area

def get_bounding_rects(indices, bounding_rects):
    return [bounding_rects[i] for i in indices]

In [None]:
areaList =[]
boxes=[]

for c in contours:
    gblist = get_area(c)
    areaList.append(gblist[4])
    dList = get_closest_63_indices(areaList)
ggList = get_bounding_rects(dList, boundingBoxes)
# print(ggList)

In [None]:
for x, y, w, h in ggList:
    # print("x:", x, "y:", y, "w:", w, "h:", h)
    image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
    boxes.append([x,y,w,h])
plotting = plt.imshow(image,cmap='gray')
plt.title("Identified contours")
plt.show()

In [None]:
# idx =0
# for x, y, w, h in ggList:
#     idx += 1
#     new_img = img[y:y+h, x:x+w]
#     cv2.imwrite(str(idx) + '.png', new_img)