In [None]:
#!unzip '/content/text_detection_2.zip'

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import os
import matplotlib.pyplot as plt
from PIL  import Image
import cv2
import keras.backend as K
from keras import regularizers
from keras.layers import Conv2D, concatenate, BatchNormalization, Lambda, Input, multiply, add, ZeroPadding2D, Activation, Layer, MaxPooling2D, Dropout
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras.preprocessing.image import ImageDataGenerator

## Data Preprocessing

In [None]:
img_dir = r'/content/text_detection_2/images'
label_dir = r'/content/text_detection_2/ground_truth/gt_'

In [None]:
imgs = []

for i in range(100,106):
    img = np.array(Image.open(img_dir+'/'+str(i)+'.jpg'), dtype=np.uint8)
    imgs.append(img)
print(len(imgs))

In [None]:
idx_ex = 5 #example index
print(imgs[idx_ex].shape)
fig,ax = plt.subplots(1)
ax.imshow(img)
plt.show()

In [None]:
bbox = []

for i in range(100,106):
    b = []
    with open(label_dir+str(i)+'.txt','r') as reader:
        for line in reader:
            arr = np.array(line.split()[:-1])
            x1,y1,x3,y3 = arr.astype(int)
            x2,y2,x4,y4 = x3,y1,x1,y3
            b.append([[x1,y1],[x2,y2],[x3,y3],[x4,y4]])
        bbox.append(b)
print(len(bbox))

In [None]:
bbox[idx_ex]

### Cropping the image

In [None]:
def crop_area(img, polys, max_tries=50):
    '''
    Cropping out part of the image
    Reference: "https://github.com/kurapan/EAST"
    '''
    h, w, _ = img.shape
    pad_h = h//10
    pad_w = w//10
    h_array = np.zeros((h + pad_h*2), dtype=np.int32)
    w_array = np.zeros((w + pad_w*2), dtype=np.int32)
    for poly in polys:
        poly = np.round(poly, decimals=0).astype(np.int32)
        minx = np.min(poly[:, 0])
        maxx = np.max(poly[:, 0])
        w_array[minx+pad_w:maxx+pad_w] = 1
        miny = np.min(poly[:, 1])
        maxy = np.max(poly[:, 1])
        h_array[miny+pad_h:maxy+pad_h] = 1
    # ensure the cropped area not across a text
    h_axis = np.where(h_array == 0)[0]
    w_axis = np.where(w_array == 0)[0]
    if len(h_axis) == 0 or len(w_axis) == 0:
        return img, polys
    for i in range(max_tries):
        xx = np.random.choice(w_axis, size=2)
        xmin = np.min(xx) - pad_w
        xmax = np.max(xx) - pad_w
        xmin = np.clip(xmin, 0, w-1)
        xmax = np.clip(xmax, 0, w-1)
        yy = np.random.choice(h_axis, size=2)
        ymin = np.min(yy) - pad_h
        ymax = np.max(yy) - pad_h
        ymin = np.clip(ymin, 0, h-1)
        ymax = np.clip(ymax, 0, h-1)
        if polys.shape[0] != 0:
            poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
                                & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
            selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0]
        else:
            selected_polys = []
        if len(selected_polys) == 0:
            # no text in this area
            continue
        img = img[ymin:ymax+1, xmin:xmax+1, :]
        polys = polys[selected_polys]
        polys[:, :, 0] -= xmin
        polys[:, :, 1] -= ymin
        return img, polys

### Padding the image 

In [None]:
def pad_image(img, input_size):
    
    new_h, new_w, _ = img.shape
    max_h_w_i = np.max([new_h, new_w, input_size])
    img_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8)
    
    shift_h = np.random.randint(max_h_w_i - new_h + 1)
    shift_w = np.random.randint(max_h_w_i - new_w + 1)
    
    img_padded[shift_h:new_h+shift_h, shift_w:new_w+shift_w, :] = img.copy()
    img = img_padded
    
    return img, shift_h, shift_w

### Resizing the image

In [None]:
def resize_image(img, text_polys, input_size, shift_h, shift_w):
    
    new_h, new_w, _ = img.shape
    img = cv2.resize(img, dsize=(input_size, input_size))
    
    # pad and resize text polygons
    resize_ratio_x = (int)(input_size/new_w)
    resize_ratio_y = (int)(input_size/new_h)
    text_polys[:, :, 0] += shift_w
    text_polys[:, :, 1] += shift_h
    text_polys[:, :, 0] *= resize_ratio_x
    text_polys[:, :, 1] *= resize_ratio_y
    
    return img, text_polys

### Rescaling the image

In [None]:
def rescale_image(img):
    
    img = (img / 127.5) - 1.
    return img

### Preparing labels

In [None]:
def labels(img, polys):
    
    l = np.zeros((img.shape[0],img.shape[1],1), dtype=np.int8)

    for poly in polys:
        x1,y1,x3,y3 = poly[0][0], poly[0][1], poly[2][0], poly[2][1]
        l[y1:y3, x1:x3,:] = 1
    
    l = l[::4, ::4, :]
    return l   

## Defining EAST model architecture

In [None]:
RESIZE_FACTOR = 2

def resize_bilinear(x):
    return tf.image.resize_bilinear(x, size=[K.shape(x)[1]*RESIZE_FACTOR, K.shape(x)[2]*RESIZE_FACTOR])

In [None]:
input_image = Input(shape=(None, None,3), name='input_image')
resnet = ResNet50(input_tensor=input_image, weights='imagenet', include_top=False, pooling=None)
x = resnet.get_layer('activation_49').output

x = Lambda(resize_bilinear, name='resize_1')(x)
x = concatenate([x, resnet.get_layer('activation_40').output], axis=3)
x = Conv2D(128, (1,1), padding='same', kernel_regularizer=regularizers.l2(1e-5))(x)
x = BatchNormalization(momentum=0.997, epsilon=1e-5, scale=True)(x)
x = Activation('relu')(x)
x = Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5))(x)
x = BatchNormalization(momentum=0.997, epsilon=1e-5, scale=True)(x)
x = Activation('relu')(x)

x = Lambda(resize_bilinear, name='resize_2')(x)
x = concatenate([x, resnet.get_layer('activation_22').output], axis=3)
x = Conv2D(64, (1, 1), padding='same', kernel_regularizer=regularizers.l2(1e-5))(x)
x = BatchNormalization(momentum=0.997, epsilon=1e-5, scale=True)(x)
x = Activation('relu')(x)
x = Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5))(x)
x = BatchNormalization(momentum=0.997, epsilon=1e-5, scale=True)(x)
x = Activation('relu')(x)

x = Lambda(resize_bilinear, name='resize_3')(x)
x = concatenate([x, resnet.get_layer('activation_10').output], axis=3)
x = Conv2D(32, (1, 1), padding='same', kernel_regularizer=regularizers.l2(1e-5))(x)
x = BatchNormalization(momentum=0.997, epsilon=1e-5, scale=True)(x)
x = Activation('relu')(x)
x = Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5))(x)
x = BatchNormalization(momentum=0.997, epsilon=1e-5, scale=True)(x)
x = Activation('relu')(x)

x = Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(1e-5))(x)
x = BatchNormalization(momentum=0.997, epsilon=1e-5, scale=True)(x)
x = Activation('relu')(x)

pred_score_map = Conv2D(1, (1, 1), activation=tf.nn.sigmoid, name='pred_score_map')(x)
#rbox_geo_map, angle_map has been removed for simplicity

model = Model(inputs=input_image, outputs=pred_score_map)

In [None]:
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [None]:
def batch_generator(imgs, bbox):
    '''
    Returns a random image X, label y
    '''
    input_size = 512
    
    while True:
        idx = np.random.randint(0,len(imgs))

        img = imgs[idx]
        polys = bbox[idx]

        img, polys = crop_area(img, np.array(polys), max_tries=50)                    #cropping
        img, shift_h, shift_w = pad_image(img, input_size)                            #padding
        img, polys = resize_image(img, polys, input_size, shift_h, shift_w)           #resizing
        polys = polys.tolist()
        img = rescale_image(img)                                                      #rescaling
        label = labels(img, polys)

        print(img.shape, label.shape)
        p,q,r = img.shape
        a,b,c = label.shape
        img = np.reshape(img,(1,p,q,r))
        label = np.reshape(label,(1,a,b,c))
        print(img.shape, label.shape)

        yield img, label

In [None]:
train_gen = batch_generator(imgs, bbox)

In [None]:
model.fit_generator(train_gen, steps_per_epoch=6, epochs=1, verbose=1)