In [1]:
%matplotlib qt

In [2]:
from glob import glob
import time
import numpy as np
import matplotlib.pyplot as plt
import cv2
from imutils.object_detection import non_max_suppression


import pytesseract
import keras_ocr

In [3]:
def angle_cos(p0, p1, p2):
    d1, d2 = (p0-p1).astype('float'), (p2-p1).astype('float')
    return abs( np.dot(d1, d2) / np.sqrt( np.dot(d1, d1)*np.dot(d2, d2) ) )

def reorder_vertices(vertices):
    x_min = np.min(vertices[:,0])
    x_max = np.max(vertices[:,0])
    y_min = np.min(vertices[:,1])
    y_max = np.max(vertices[:,1])
    new_vertices = np.zeros((4,2), dtype=np.int32)
    new_vertices[0,0], new_vertices[0,1] = x_min, y_min
    new_vertices[1,0], new_vertices[1,1] = x_max, y_min
    new_vertices[2,0], new_vertices[2,1] = x_max, y_max
    new_vertices[3,0], new_vertices[3,1] = x_min, y_max
    return new_vertices

def find_all_contours(img):
    # get image area
    img_area = np.prod(img.shape[:2])
    
    img = cv2.GaussianBlur(img, (5, 5), 0)
    buttons = []
    for gray in cv2.split(img):
        for thrs in range(0, 255, 26):
            if thrs == 0:
                bin = cv2.Canny(gray, 0, 50, apertureSize=5)
                bin = cv2.dilate(bin, None)
            else:
                _retval, bin = cv2.threshold(gray, thrs, 255, cv2.THRESH_BINARY)
            contours, _hierarchy = cv2.findContours(bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
            for cnt in contours:
                cnt_len = cv2.arcLength(cnt, True)
                cnt = cv2.approxPolyDP(cnt, 0.02*cnt_len, True)
                if len(cnt) == 4 and cv2.isContourConvex(cnt):
                    cnt_area = cv2.contourArea(cnt)
                    if cnt_area/img_area > 0.02 and cnt_area/img_area < 0.10:
                        cnt = cnt.reshape(-1, 2)
                        max_cos = np.max([angle_cos( cnt[i], cnt[(i+1) % 4], cnt[(i+2) % 4] ) for i in range(4)])
                        if max_cos < 0.1:
                            cnt = reorder_vertices(cnt)
                            buttons.append(cnt)
    return buttons

def remove_duplicate_contours(buttons):
    threshold = 50
    pos_y = [buttons[i][0][1] for i in range(len(buttons))]
    idx_y = np.argsort(pos_y)
    new_idx = []
    tmp_idx = []

    for i in range(len(idx_y)):
        if len(tmp_idx) == 0:
            tmp_idx.append(idx_y[i])
        if abs(pos_y[idx_y[i]] - pos_y[tmp_idx[-1]]) < threshold:
            if tmp_idx[-1] != idx_y[i]:
                tmp_idx.append(idx_y[i])
        else:
            pos_x = [buttons[j][0][0] for j in tmp_idx]
            idx_x = np.argsort(pos_x)
            tmp2_idx = []

            for j in range(len(idx_x)):
                if len(tmp2_idx) == 0:
                    tmp2_idx.append(tmp_idx[idx_x[j]])
                if abs(buttons[tmp_idx[idx_x[j]]][0][0] - buttons[tmp2_idx[-1]][0][0] ) > threshold:
                    tmp2_idx.append(tmp_idx[idx_x[j]])
            new_idx.extend(tmp2_idx)
            tmp_idx = []
    pos_x = [buttons[j][0][0] for j in tmp_idx]
    idx_x = np.argsort(pos_x)
    tmp2_idx = []

    for j in range(len(idx_x)):
        if len(tmp2_idx) == 0:
            tmp2_idx.append(tmp_idx[idx_x[j]])
        if abs(buttons[tmp_idx[idx_x[j]]][0][0] - buttons[tmp2_idx[-1]][0][0] ) > threshold:
            tmp2_idx.append(tmp_idx[idx_x[j]])
    new_idx.extend(tmp2_idx)
    new_buttons = [buttons[i] for i in new_idx]
    return new_buttons

def find_text(image, net):
    args = dict(width=320, height=320)
    (H, W) = image.shape[:2]

    # set the new width and height and then determine the ratio in change
    # for both the width and height
    (newW, newH) = (args["width"], args["height"])
    rW = W / float(newW)
    rH = H / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
#     print("[INFO] loading EAST text detector...")
#     net = cv2.dnn.readNet(args["east"])

    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
        (123.68, 116.78, 103.94), swapRB=True, crop=False)
#     start = time.time()
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
#     end = time.time()

    # show timing information on text prediction
#     print("[INFO] text detection took {:.6f} seconds".format(end - start))

    # grab the number of rows and columns from the scores volume, then
    # initialize our set of bounding box rectangles and corresponding
    # confidence scores
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # loop over the number of columns
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            #if scoresData[x] < args["min_confidence"]:
            if scoresData[x] < 0.3:
                continue

            # compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # apply non-maxima suppression to suppress weak, overlapping bounding
    # boxes
    boxes = non_max_suppression(np.array(rects), probs=confidences)
    
    return boxes, rW, rH

def get_dominant_color(img):
    px = cv2.resize(img, (1,1))[0,0]
    return px

def get_text(image):
    text = pytesseract.image_to_string(image).rstrip()
    return text

def detect_buttons(img):
    # setup
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
    net = cv2.dnn.readNet('frozen_east_text_detection.pb')
#     api = tesserocr.PyTessBaseAPI()
    
    # find contours
    buttons = find_all_contours(img)
    buttons = remove_duplicate_contours(buttons)
    
    # Get button statistics
    img_area = np.prod(img.shape[:2])
    area = []
    for i in range(len(buttons)):
        area.append(cv2.contourArea(buttons[i]))
    median_area = np.median(area)

    # validate buttons
    # if normal size and not all whitespace --> valid
    # if normal size and all whitespace and no text --> invalid
    # if bigger size --> valid
    valid_buttons, text_boxes, valid_text = [], [], []
    for i in range(len(buttons)):
        print('.',end='')
        
        # crop to button
        x,y,w,h = cv2.boundingRect(buttons[i])
        crop = img[y:y+h,x:x+w]
        
        # save button
        cv2.imwrite('output/btn_%d.png' % i, crop)

        # find text boxes
#         tb, rW, rH = [], 1., 1.
        tb, rW, rH = find_text(crop, net)
        text_boxes.append(tb)

        # check if button is of normal size
        if abs(area[i] - median_area) <= median_area * 0.25:
            # check amount of white space in button
            common_pix = get_dominant_color(crop)
            num_background = np.count_nonzero((crop == common_pix).all(axis = 2))
            num_pixels = np.prod(crop.shape[:2])
#             print('%d: %0.2f' % (i, num_background / num_pixels))
            if num_background / num_pixels > 0.9:
                # skip button if there is no text
                if len(text_boxes[i]) == 0:
                    continue

        # button is valid, add to list
        valid_buttons.append(buttons[i])

        # extract text
        if len(text_boxes[i]) == 0:
            valid_text.append('')
        else:
            # crop image to text only
            startX = int(text_boxes[i][0][0] * rW)
            startY = int(text_boxes[i][0][1] * rH)
            endX = int(text_boxes[i][0][2] * rW)
            endY = int(text_boxes[i][0][3] * rH)+10
            cropt = crop[startY:endY,startX:endX]
            valid_text.append(get_text(cropt))

    #         cv2.imshow('cropt', cropt)
    #         ch = cv2.waitKey()
    #         cv2.destroyAllWindows()
    
    print('')
    return valid_buttons, valid_text, text_boxes

In [10]:
# DETECT BUTTONS

# Read image
fn = 'data/_comm5_home.png'
# fn = 'data/_comm5_things.png'
img = cv2.imread(fn)
# img = cv2.resize(img, (0,0), fx=0.8, fy=0.8)

# detect buttons
start = time.time()
buttons, text, tbox = detect_buttons(img)
end = time.time()
print("[INFO] button detection took {:.6f} seconds".format(end - start))
print('num buttons: ', len(buttons))
print(text)

# plot detection
cv2.drawContours( img, buttons, -1, (0, 255, 0), 3 )
cv2.imshow('buttons', img)
ch = cv2.waitKey()
cv2.destroyAllWindows()

# save output
cv2.imwrite('output/output.png', img)

.................................
[INFO] button detection took 11.337340 seconds
num buttons:  33
['', '', '', 'Abc &', '', '', 'Toys', 'Clothes', 'Things', 'Food', 'Nature', 'Household', 'Pronoun', 'People', 'Actions', 'Describing', 'Words', 'Places', 'Time', '', 'you', 'go', 'like', 'play', 'more', 'that', '', 'do', 'help', 'make', 'want', 'not', 'to']


True

In [5]:
# from autocorrect import Speller
# spell = Speller()
# net = cv2.dnn.readNet('frozen_east_text_detection.pb')
# text = []
# start = time.time()
# for i in range(len(buttons)):
#     # read button image
#     btn = cv2.imread('output/btn_%d.png' % i)
    
# #     # plot image
# #     cv2.imshow('btn', btn)
# #     ch = cv2.waitKey()
# #     cv2.destroyAllWindows()
    
#     # detect text
#     tb, rW, rH = find_text(btn, net)
#     txt = ''
#     if len(tb) > 0:
#         startX = int(tb[0][0] * rW)
#         startY = int(tb[0][1] * rH)
#         endX = int(tb[0][2] * rW)
#         endY = int(tb[0][3] * rH)+10
#         cropt = btn[startY:endY,startX:endX]
#         txt = get_text(cropt)
#     print('%d: %s\t%s' % (i, txt, spell(txt)))
    
# end = time.time()
# print("[INFO] button detection took {:.6f} seconds".format(end - start))

In [6]:
# import keras_ocr
# import cv2, time
# import matplotlib.pyplot as plt

# pipeline = keras_ocr.pipeline.Pipeline()

In [7]:
# # Get a set of three example images
# fn = 'data/_comm5_home.png'
# # fn = 'data/empty.png'
# # fn = 'data/i.png'
# # fn = 'data/it.png'
# # fn = 'data/short_words.png'
# # fn = 'data/you_button.png'
# # fn = 'output/btn_10.png'

# # image = [keras_ocr.tools.read(fn)]
# image = cv2.imread(fn)
# # image = cv2.resize(image, (320,320))
# image = cv2.resize(image, (0,0), fx=2, fy=2)
# image = [image]

# start = time.time()
# prediction = pipeline.recognize(image)
# end = time.time()
# print("[INFO] text detection took {:.6f} seconds".format(end - start))

In [8]:
# # prediction_groups
# plt.close('all')
# keras_ocr.tools.drawAnnotations(image=image[0], predictions=prediction[0])