In [2]:
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.color import rgb2gray
from skimage.filters import threshold_otsu
from skimage.morphology import closing, square
from skimage.util import invert

import numpy as np

In [3]:
def get_sorted_bboxes(image):
    ''' Generate the sorted bounding boxes '''
    filter = _get_background_filter(image)
    cleared = clear_border(filter)
    label_image = label(cleared)
    coords = []
    for region in regionprops(label_image, coordinates='rc'):
        if (region.area < 1000) \
            or (region.area > 100000) \
            or ((region.major_axis_length / region.minor_axis_length) < 0.2) \
            or ((region.minor_axis_length / region.major_axis_length) < 0.2):
            continue

        coords.append(region.bbox) # minr, minc, maxr, maxc

    sorted_bbxs = _sort_bbxs(coords, image.shape[0])

    return sorted_bbxs


def _sort_bbxs(regions, num_rows):
    ''' Sort bboxes left to right, top to bottom '''

    def overlap(el1, el2):
        ''' determine if bounding boxes overlap along a row '''
        upper_max = max(el1[0], el2[0])
        lower_min = min(el1[2], el2[2])
        return (upper_max < lower_min)

    rows = []

    while(len(regions)):
        sorted_by_y = sorted(regions, key=lambda x: x[0])
        first_el = sorted_by_y[0]
        rows.append([first_el])
        regions.remove(first_el)
        sorted_by_y.pop(0)
        for el in sorted_by_y:
            if overlap(el, first_el) or overlap(el, rows[-1][-1]):
                rows[-1].append(el)
                regions.remove(el)

    sorted_bbxs = []
    for row in rows:
        sorted_bbxs += sorted(row, key=lambda x: x[1])
    return sorted_bbxs


def _get_background_filter(image):
    ''' Get's the binary filter of the segmented image '''
    if not image.ndim == 2:
        image = rgb2gray(image)
    thresh = threshold_otsu(image)
    bw = closing(image > thresh, square(3))
    return invert(bw)