## RPN Training - using predefined resnet50 keras model

In [1]:
from keras.applications.resnet50 import ResNet50
from keras.layers import Conv2D, Input
from keras.models import Model
import cv2
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.resnet50 import preprocess_input
from keras import backend as K
import numpy as np
import os
import pandas as pd
import numpy.random as npr
K.set_image_data_format('channels_last')

Using TensorFlow backend.


In [2]:
from utils import bbox_overlaps, bbox_transform, unmap

In [3]:
model_resnet = ResNet50(include_top=False, input_shape = (800, 800, 3)) #random weight initialization



In [4]:
model_resnet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 800, 800, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 806, 806, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 400, 400, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 400, 400, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [4]:
#define the different loss functions for RPN training
def loss_cls(y_true, y_pred):
    condition = K.not_equal(y_true, -1)
    indices = K.tf.where(condition)

    target = K.tf.gather_nd(y_true, indices)
    #print(target.shape)
    output = K.tf.gather_nd(y_pred, indices)
    #print(output.shape)
    loss = K.binary_crossentropy(target, output)
    return K.mean(loss)


def smoothL1(y_true, y_pred):
    nd = K.tf.where(K.tf.not_equal(y_true,0)) #list of indices where y_true != 0
    y_true = K.tf.gather_nd(y_true,nd)
    y_pred = K.tf.gather_nd(y_pred,nd)
    x = K.tf.losses.huber_loss(y_true,y_pred)
    return x

In [5]:
def RegionProposalNet(k):
    # k is the number of anchors
    feature_map = Input(shape=(None,None,2048)) #just a sample shape
    
    conv1 = Conv2D(filters=512,kernel_size=(3, 3),name="RPN_layer1")(feature_map)

    boxes = Conv2D(filters= 4 * k,kernel_size=(1, 1),activation="linear",kernel_initializer="uniform",name="RPN_boxes")(conv1)

    objectScore = Conv2D(filters=1 * k,kernel_size=(1, 1),activation="sigmoid",kernel_initializer="uniform",name="RPN_score")(conv1)

    model_RPN = Model(inputs=[feature_map], outputs=[boxes, objectScore])
    
    return model_RPN

In [6]:
import math

orientations = [0, math.pi/6, math.pi/3, math.pi/2, 2*(math.pi/3), 5*(math.pi/6)]

In [7]:
k = 27 #anchor no  change k
anchor_ratios = [1, 1/2, 2, 1/3, 3, 1/4, 4, 1/5, 5] 
#anchor_ratios = [0.5, 1, 2]
model_rpn = RegionProposalNet(k)
model_rpn.compile(optimizer='adam', loss={'RPN_score':loss_cls, 'RPN_boxes':smoothL1})

In [8]:
model_rpn.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, None, None, 2 0                                            
__________________________________________________________________________________________________
RPN_layer1 (Conv2D)             (None, None, None, 5 9437696     input_2[0][0]                    
__________________________________________________________________________________________________
RPN_boxes (Conv2D)              (None, None, None, 1 55404       RPN_layer1[0][0]                 
__________________________________________________________________________________________________
RPN_score (Conv2D)              (None, None, None, 2 13851       RPN_layer1[0][0]                 
Total params: 9,506,951
Trainable params: 9,506,951
Non-trainable params: 0
_________________________________

In [8]:
def read_file(filename):  
    col = ['bottomLeftX','bottomLeftY','bottomRightX','bottomRightY','topRightX','topRightY','topLeftX','topLeftY','category','difficult']
    dfr = pd.read_csv(filename,sep=" ",names = col,index_col=None, header=None)
    #print(dfr)
    return dfr

In [9]:
def parse_(filename,w_scale, h_scale):
    df = read_file(filename)
    width=0
    height=0
    list_of_widths = abs(df['topLeftX']-df['topRightX'])
    list_of_hts = abs(df['topLeftY']-df['bottomLeftY'])
    
    category=[]
    xmin=[]
    ymin=[]
    xmax=[]
    ymax=[]
    #print(w_scale, h_scale)
    for i in df['category']:
        category.append(i)
    
    for j in df['bottomLeftX']:
        xmin.append(int(j)*(w_scale))
    
    for k in df['bottomLeftY']:
        ymin.append(int(k)*(h_scale))
    
    for l in df['topRightX']:
        xmax.append(int(l)*(w_scale))
    
    for m in df['topRightY']:
        ymax.append(int(m)*(h_scale))
        
    gt_boxes=[list(box) for box in zip(xmin,ymin,xmax,ymax)]
    
    return category, np.asarray(gt_boxes, np.float)


In [10]:
BG_FG_FRAC=2

In [102]:
########### GENERATE IMAGE BATCHES (PROPOSED IMAGE REGIONS)

In [11]:
def generate_anchors(ratios, base_width, base_height,scales=np.asarray([3,6,12])):
    """
    Generate anchor (reference) windows by enumerating aspect ratios X
    scales wrt a reference (0, 0, w_stride-1, h_stride-1) window.
    """
    base_anchor = np.array([0, 0, base_width-1, base_height-1])
    ratio_anchors = _ratio_enum(base_anchor, ratios)
    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) for i in range(ratio_anchors.shape[0])]) #shape[0] gives ratio.size number of anchors
    #oriented_anchors = np.vstack([_rotate_enum(anchors[j,:]) for j in range(anchors.shape[0])])
    return anchors

def _whctrs(anchor):
    """
    Return width, height, x center, and y center for an anchor (window).
    """
    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)
    y_ctr = anchor[1] + 0.5 * (h - 1)
    return w, h, x_ctr, y_ctr

def _mkanchors(ws, hs, x_ctr, y_ctr):
    """
    Given a vector of widths (ws) and heights (hs) around a center
    (x_ctr, y_ctr), output a set of anchors (windows).
    """

    ws = ws[:, np.newaxis]
    hs = hs[:, np.newaxis]
    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
                         y_ctr - 0.5 * (hs - 1),
                         x_ctr + 0.5 * (ws - 1),
                         y_ctr + 0.5 * (hs - 1)))
    return anchors

def _ratio_enum(anchor, ratios):
    """
    Enumerate a set of anchors for each aspect ratio wrt an anchor.
    """

    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    size_ratios = size / ratios
    ws = np.round(np.sqrt(size_ratios))
    hs = np.round(ws * ratios)
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr) # center reamins the same
    return anchors

def _scale_enum(anchor, scales):
    """
    Enumerate a set of anchors for each scale wrt an anchor.
    """

    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors


In [12]:
def _mkanchorsoriented(w,h,x_ctrs,y_ctrs):
    """
    Given a vector of xcoords (x_ctrs) and y_coords (y_ctrs) with
    (w,h), output a set of anchors (windows).
    """
    xs = x_ctrs[:, np.newaxis]
    ys = y_ctrs[:, np.newaxis]
    anchors = np.hstack((xs - 0.5 * (w - 1),
                         ys - 0.5 * (h - 1),
                         xs + 0.5 * (w - 1),
                         ys + 0.5 * (h - 1)))
    return anchors
    
def _rotate_enum(anchor):
    """
    Enumerate a set of anchors for each orientation angle
    """
    w, h, x_ctr, y_ctr = _whctrs(anchor) # single anchor
    x_tleft = x_ctr - 0.5*(w-1)
    y_tleft = y_ctr - 0.5*(h-1)
    
    xx_tleft = [((x_tleft-x_ctr)*math.cos(theta) + (y_tleft-y_ctr)*math.sin(theta) + x_ctr) for theta in orientations]
    yy_tleft = [((y_tleft-y_ctr)*math.cos(theta) - (x_tleft-x_ctr)*math.sin(theta) + y_ctr) for theta in orientations]
    
    x_ctr = xx_tleft + 0.5(w-1)
    y_ctr = yy_tleft + 0.5(h-1)
    
    anchors = _mkanchorsoriented(w,h,x_ctr,y_ctr)
    return anchors

In [13]:
def minibatch(filepath, gt_boxes, img):#, scale):
    #create the dataset to train the RPN
    #img = cv2.imread(filepath)
    img_width = np.shape(img)[1]#*scale[1]
    img_height = np.shape(img)[0]#*scale[0]
    #dim = (int(img_width),int(img_height))
    #img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    
    #feed image to pretrained model and get feature map
    img = np.expand_dims(img, axis=0)
    feature_map = model_resnet.predict(img)
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width*height
    
    #calculate output w, h stride
    strideX = img_width / width  
    strideY = img_height / height

    #generate base anchors according to output stride. For every tile we have 27 different size anchors defined
    #base anchors are 9*3 anchors wrt a tile (0,0,strideX-1,strideY-1)
    base_anchors = generate_anchors(anchor_ratios,strideX,strideY) #create the anchors which each tile will have
    #returns anchors (x1,y1,x2,y2) which is transformed to (deltax,deltay,deltaw,deltaH) by bboxtransform->these
    # are the bbox targets
    shift_x = np.arange(0, width) * strideX
    shift_y = np.arange(0, height) * strideY
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),shift_y.ravel())).transpose()
    total_anchors = num_feature_map*27
    all_anchors = (base_anchors.reshape((1, 27, 4)) + shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2)))
    
    all_anchors = all_anchors.reshape((total_anchors, 4))
    #border=0
    inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & 
            (all_anchors[:, 2] < img_width) &  # width
            (all_anchors[:, 3] < img_height) )[0]  #height
    anchors = all_anchors[inds_inside]
    
    # calculate overlaps of each anchors with each gt boxes,
    # a matrix with shape [len(anchors) x len(gt_boxes)]
    overlaps = bbox_overlaps(anchors, gt_boxes)
    # find the gt box with biggest overlap to each anchors,
    # and the overlap ratio. result (len(anchors),)
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    # find the anchor with biggest overlap to each gt boxes,
    # and the overlap ratio. result (len(gt_boxes),)
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    #labels, 1=fg/0=bg/-1=ignore
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)
    # set positive label, define in Paper3.1.2:
    # We assign a positive label to two kinds of anchors: (i) the
    # anchor/anchors with the highest Intersection-overUnion
    # (IoU) overlap with a ground-truth box, or (ii) an
    # anchor that has an IoU overlap higher than 0.7 with any gt boxes
    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= .6] = 1
    labels[max_overlaps <= .3] = 0
    
    # subsample positive labels if we have too many
    # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    # subsample negative labels if we have too many
    num_bg = int(len(fg_inds) * BG_FG_FRAC)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
        
    batch_inds = inds_inside[labels!=-1]
    
    batch_inds = (batch_inds / k).astype(np.int) # represents which anchor box belong to which feature map point
    # it will range from 0 to total_num_feature_map_pts
    full_labels = unmap(labels, total_anchors, inds_inside, fill=-1)
    
    batch_label_targets = full_labels.reshape(-1,1,1,1*k)[batch_inds]
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]
    pos_anchors = all_anchors[inds_inside[labels==1]]
    bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels==1])
    bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels==1], fill=0) #require unmapping because each target
    # represent a partcular pixel in feature map
    batch_bbox_targets = bbox_targets.reshape(-1,1,1,4*k)[batch_inds]
    padded_fcmap = np.pad(feature_map,((0,0),(1,1),(1,1),(0,0)),mode='constant') ##############
    padded_fcmap = np.squeeze(padded_fcmap)
    
    batch_tiles=[]
    for ind in batch_inds:
        x = ind % width
        y = int(ind/width)
        fc_3x3=padded_fcmap[y:y+3,x:x+3,:]
        batch_tiles.append(fc_3x3)
    return np.asarray(batch_tiles), batch_label_targets.tolist(), batch_bbox_targets.tolist()

In [14]:
import pandas as pd

In [16]:
def bbox_overlaps(boxes, query_boxes):
    """ `-
    Standard IOU
    Parameters
    ----------
    boxes: (N, 4) ndarray of float
    query_boxes: (K, 4) ndarray of float
    Returns
    -------
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    boxes=boxes.astype(int)
    N = boxes.shape[0]
    K = query_boxes.shape[0]
    #print(N*K)
    overlaps = np.zeros((N, K), dtype=np.float64)
    count=0
    for k in range(K):
        box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1))
        for n in range(N):
            iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1)
            if iw > 0:
                ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1)
                #count+=1
                if ih > 0:
                    ua = float((boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + box_area - (iw * ih))
                    overlaps[n, k] = round((iw * ih / ua)*10,2)
                    #print(n,k)
                    #print(overlaps[n,k])
                    count+=1
                    
    #print(count)
    return overlaps

In [17]:
img_path = r'F:\DOTA (Dataset)\Training Set\part 1\Sample Training images'
anno_path = r'F:\DOTA (Dataset)\Training Set\part 1\Sample Training annotations'

In [18]:
import glob
import traceback
from keras.callbacks import ModelCheckpoint
BATCH_SIZE = 512 #######change batch size

In [19]:
def preprocess_(file):
    img = cv2.imread(file)
    dim = (800,800)
    img_ = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    scale_w = img_.shape[0]/img.shape[0]
    scale_h = img_.shape[1]/img.shape[1]
    return img_,scale_w, scale_h

In [20]:
batch_tiles = []
batch_labels = []
batch_bboxes = []

In [21]:
################# testing - not part of main code

for filename in os.listdir(anno_path):
    file = anno_path + '\\'
    filepath = img_path + '\\' + filename[:-4]+ '.png'
    img, scw, sch = preprocess_(filepath)
    category, gt_boxes = parse_(file+filename,scw,sch)
    print('building...')
    tiles, labels, bboxes = minibatch(filepath, gt_boxes, img)
    print('One image down')
    for i in range(len(tiles)):
        batch_tiles.append(tiles[i])
        batch_labels.append(labels[i])
        batch_bboxes.append(bboxes[i])
        
a=np.asarray(batch_tiles)
b=np.asarray(batch_labels)
c=np.asarray(batch_bboxes)
if not a.any() or not b.any() or not c.any():
    print("empty array found.")
print('done predicting')

building...
One image down
done predicting


In [22]:
print(a.shape, b.shape, c.shape)# 800x800 image

(7114, 3, 3, 2048) (7114, 1, 1, 27) (7114, 1, 1, 108)


In [23]:
model_rpn.fit(a, [c,b], epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x27267d137f0>

In [24]:
model_rpn.save('rpnmodel0412.h5')

In [32]:
def input_generator():
    batch_tiles = []
    batch_labels = []
    batch_bboxes = []
    count = 0
    while 1:
        for filename in os.listdir(anno_path):
            if filename.endswith(".txt"):
                try:
                    file = anno_path + '\\'
                    #im_path = img_path + '\\'
                    filepath = img_path + '\\' + filename[:-4]+ '.png'
                    img, scw, sch = preprocess_(filepath)
                    category, gt_boxes = parse_(file+filename,scw,sch)
                    
                    if len(gt_boxes)==0:
                        continue
                    tiles, labels, bboxes = minibatch(filepath, gt_boxes, img)#, scale)
                    #print('minibatch done')
                except Exception:
                    print('parse label or produce batch failed: for: '+ filename)
                    traceback.print_exc()
                    continue
                print("len titles- ", len(tiles))
                for i in range(len(tiles)):
                    batch_tiles.append(tiles[i])
                    batch_labels.append(labels[i])
                    batch_bboxes.append(bboxes[i])
                    if(len(batch_tiles)==BATCH_SIZE):
                        a=np.asarray(batch_tiles)
                        b=np.asarray(batch_labels)
                        c=np.asarray(batch_bboxes)
                        if not a.any() or not b.any() or not c.any():
                            print("empty array found.")
                        yield a, [c, b]
                        batch_tiles=[]
                        batch_labels=[]
                        batch_bboxes=[]

In [27]:
### uncomment to use the generator for training
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='./weights.hdf5', verbose=1, save_best_only=True)
model_rpn.fit_generator(input_generator(), steps_per_epoch=60, epochs=3, callbacks=[checkpointer])

Epoch 1/3
len titles-  2997
len titles-  3219
 1/60 [..............................] - ETA: 49:57 - loss: 1.6231 - RPN_boxes_loss: 0.7586 - RPN_score_loss: 0.8646len titles-  2871
 6/60 [==>...........................] - ETA: 19:57 - loss: 6.0556 - RPN_boxes_loss: 2.6656 - RPN_score_loss: 3.3900len titles-  7595
 7/60 [==>...........................] - ETA: 20:24 - loss: 5.9864 - RPN_boxes_loss: 2.8540 - RPN_score_loss: 3.1324empty array found.
empty array found.
Epoch 2/3




len titles-  3219
 6/60 [==>...........................] - ETA: 14:15 - loss: 12.8589 - RPN_boxes_loss: 9.8422 - RPN_score_loss: 3.0167len titles-  2871
11/60 [====>.........................] - ETA: 13:23 - loss: 14.8336 - RPN_boxes_loss: 11.9418 - RPN_score_loss: 2.8918len titles-  7595
12/60 [=====>........................] - ETA: 13:56 - loss: 14.6886 - RPN_boxes_loss: 11.9574 - RPN_score_loss: 2.7312empty array found.
Epoch 3/3
 5/60 [=>............................] - ETA: 12:15 - loss: 9.3706 - RPN_boxes_loss: 8.9318 - RPN_score_loss: 0.4388len titles-  3219
11/60 [====>.........................] - ETA: 12:07 - loss: 10.8924 - RPN_boxes_loss: 10.1486 - RPN_score_loss: 0.7438len titles-  2871


<keras.callbacks.History at 0x1d475180dd8>

In [29]:
model_rpn.save('rpnweights_input_generator.h5')