In [1]:
from __future__ import print_function


from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard,ReduceLROnPlateau
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
# import imgaug as ia
from tqdm import tqdm

# from imgaug import augmenters as iaa
import numpy as np
import pickle
import os, cv2
from preprocessing import parse_annotation, BatchGenerator
from utils import WeightReader, decode_netout, draw_boxes

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os

Using TensorFlow backend.


In [2]:
LABELS = ['obj']#['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

IMAGE_H, IMAGE_W = 224, 224#416, 416
GRID_H,  GRID_W  = 9, 9#13 , 13
BOX              = 10
CLASS            = len(LABELS)
CLASS_WEIGHTS    = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD    = 0.3#0.5
NMS_THRESHOLD    = 0.3#0.45
# ANCHORS          = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]

# ANCHORS = [12.02, 11.4, 10.27, 9.23, 8.97, 7.48, 6.85, 5.9, 4.76, 3.54, 2.4, 1.51]
# ANCHORS = [10.27, 9.23, 8.97, 7.48, 6.85, 5.9, 4.76, 3.54, 2.4, 1.51]

# flipkart data
# ANCHORS = [6.2, 6.13, 5.81, 5.78, 5.34, 4.36, 3.71, 3.21, 2.29, 1.33]

ANCHORS = [1.15,4.87, 2.11,5.05, 3.01,5.72, 3.71,3.79, 4.09,1.77, 4.42,5.69, 4.96,2.48, 5.55,3.28, 5.77,4.46, 6.12,6.22]

NO_OBJECT_SCALE  = 1.0
OBJECT_SCALE     = 5.0
COORD_SCALE      = 1.0
CLASS_SCALE      = 1.0

BATCH_SIZE       = 64
WARM_UP_BATCHES  = 0
TRUE_BOX_BUFFER  = 5#0

In [3]:
from keras.models import Model
import tensorflow as tf
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU

import numpy as np
import os
import cv2
from utils import decode_netout, compute_overlap, compute_ap
from keras.applications.mobilenet import MobileNet
from keras.layers.merge import concatenate
from keras.optimizers import SGD, Adam, RMSprop
from preprocessing import BatchGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
# from backend import MobileNetFeature 

MOBILENET_BACKEND_PATH  = "mobile-net-yolo.h5"   # should be hosted on a server

class BaseFeatureExtractor(object):
    """docstring for ClassName"""

    # to be defined in each subclass
    def __init__(self, input_size):
        raise NotImplementedError("error message")

    # to be defined in each subclass
    def normalize(self, image):
        raise NotImplementedError("error message")       

    def get_output_shape(self):
        return self.feature_extractor.get_output_shape_at(-1)[1:3]

    def extract(self, input_image):
        return self.feature_extractor(input_image)

    
class MobileNetFeature(BaseFeatureExtractor):
    """docstring for ClassName"""
    def __init__(self, input_size):
        input_image = Input(shape=(input_size, input_size, 3))

        mobilenet = MobileNet(input_shape=(224,224,3), include_top=False, weights=None)
        mobilenet.load_weights(MOBILENET_BACKEND_PATH)

        x = mobilenet(input_image)

        self.feature_extractor = Model(input_image, x)  

    def normalize(self, image):
        image = image / 255.
        image = image - 0.5
        image = image * 2.

        return image
 

In [4]:
input_size = 224
max_box_per_image = TRUE_BOX_BUFFER # ANCHORS * BOX
nb_box = BOX
nb_class = CLASS


input_image     = Input(shape=(input_size, input_size, 3), name='input_img')
true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))  

feature_extractor = MobileNetFeature(input_size)

In [5]:
# print(feature_extractor.get_output_shape())    
grid_h, grid_w = feature_extractor.get_output_shape()        
features = feature_extractor.extract(input_image)            

# make the object detection layer
output = Conv2D(nb_box * (4 + 1 + nb_class), 
                (1,1), strides=(1,1), 
                padding='same', 
                name='DetectionLayer', 
                kernel_initializer='lecun_normal')(features)
output = Reshape((grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output)
output = Lambda(lambda args: args[0])([output, true_boxes])

model = Model([input_image, true_boxes], output)


# initialize the weights of the detection layer
layer = model.layers[-4]
weights = layer.get_weights()

new_kernel = np.random.normal(size=weights[0].shape)/(grid_h*grid_w)
new_bias   = np.random.normal(size=weights[1].shape)/(grid_h*grid_w)

layer.set_weights([new_kernel, new_bias])

# print a summary of the whole model
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_img (InputLayer)          (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 7, 7, 1024)   3228864     input_img[0][0]                  
__________________________________________________________________________________________________
DetectionLayer (Conv2D)         (None, 7, 7, 60)     61500       model_1[1][0]                    
__________________________________________________________________________________________________
reshape_1 (Reshape)             (None, 7, 7, 10, 6)  0           DetectionLayer[0][0]             
__________________________________________________________________________________________________
input_1 (I

In [11]:
features

<tf.Tensor 'model_1/mobilenet_1.00_224/conv_pw_13_relu/Relu6:0' shape=(?, 7, 7, 1024) dtype=float32>

In [13]:
Conv2D(512, (3,3), dilation_rate=3, padding='same')(features)

<tf.Tensor 'conv2d_2/BiasAdd:0' shape=(?, 7, 7, 512) dtype=float32>

In [None]:
    # Add classifier on top.
    # v2 has BN-ReLU before Pooling
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    # out-size = (None, 8, 8, 256)
    
    
    # start attention block
#     res_feat = Input(shape=(hh,hh,cc))
#     enc_feat = Input(shape=(cc,))
    res_feat = x
    enc_feat_c = RepeatVector(64)(enc_feat)
#     enc_feat_c = K.repeat_elements(enc_feat, 64, axis=1)
    enc_feat_c = Reshape(target_shape=(8,8,256))(enc_feat_c)
    
    x = keras.layers.concatenate([res_feat, enc_feat_c], axis=-1) #(8,8,512)
    x = Conv2D(256,(1, 1), #gfhfghfg
               padding='same',
               kernel_initializer='he_normal')(x) #(8,8,256)
    x = Activation('relu')(x)
#     print(x)
    # softmax to calculate weights
    x = Reshape(target_shape=(64,256))(x)
#     print(x)
    x = Dense(1, activation='softmax')(x) #(8,8,1)
#     print(x)
    x = Reshape(target_shape=(8,8,1))(x)
#     print(x)
#     x = Dense(1)(x) #(8,8,1)
#     print(x)
    # weighted multiply
    x = multiply([x, res_feat]) # (8,8,256)
#     print(x)
    # skip connection
    x = keras.layers.concatenate([x, res_feat], axis=-1)
#     print(x)
#     print(x)
    x = Conv2D(256,(1, 1), 
               padding='same',
               kernel_initializer='he_normal')(x) #(8,8,256)
#     print(x)
#     print(x)
#     att_feat = Model(inputs=[res_feat, enc_feat], outputs=x)

    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(256,(3, 3), 
               padding='same',
               kernel_initializer='he_normal')(x) #(8,8,256)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = AveragePooling2D(pool_size=8)(x)
    x = Flatten()(x)
    
    final_out = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(x)

In [14]:
import keras

from keras.layers import GlobalMaxPool2D, concatenate

In [None]:
input_size        = 224
max_box_per_image = TRUE_BOX_BUFFER # ANCHORS * BOX
nb_box            = BOX
nb_class          = CLASS


input_image       = Input(shape=(input_size, input_size, 3), name='input_img')
true_boxes        = Input(shape=(1, 1, 1, max_box_per_image , 4))  

feature_extractor = MobileNetFeature(input_size)

# print(feature_extractor.get_output_shape())    
grid_h, grid_w    = feature_extractor.get_output_shape()        
features          = feature_extractor.extract(input_image)            


In [49]:
# input_size        = 224
# max_box_per_image = TRUE_BOX_BUFFER # ANCHORS * BOX
# nb_box            = BOX
# nb_class          = CLASS


# input_image       = Input(shape=(input_size, input_size, 3), name='input_img')
# true_boxes        = Input(shape=(1, 1, 1, max_box_per_image , 4))  

# feature_extractor = MobileNetFeature(input_size)

# # print(feature_extractor.get_output_shape())    
# grid_h, grid_w    = feature_extractor.get_output_shape()        
# features          = feature_extractor.extract(input_image)            

# # feat1 = Conv2D(512, (3, 3), dilation_rate=2, padding='same')(feature)
# # global_feat = GlobalMaxPool2D()(feat1)




#################################
# features

# (7,7,1024)
feat1 = Conv2D(128, (1, 1), dilation_rate=2, padding='same')(features)
feat11 = Conv2D(256, (3, 3), dilation_rate=2, padding='same')(feat1)
# feat1

global_feat = keras.layers.GlobalMaxPool2D()(feat11)
# global_feat

global_feat_ = keras.layers.RepeatVector(49)(global_feat)
# print(global_feat_)


global_feat_ = keras.layers.Reshape(target_shape=(7,7,256))(global_feat_)
# print(global_feat_)
mix_feat = keras.layers.Concatenate(axis=-1)([feat11, global_feat_])#, axis=-1)
# mix_feat


mix_feat = Conv2D(256, (1, 1), padding='same')(mix_feat)
# mix_feat

mix_feat_ = Reshape(target_shape=(49,256))(mix_feat)
# print(mix_feat_)

# spatial_w = Dense(1, activation='softmax')(mix_feat_)
# print(spatial_w)

# spatial_w = Flatten()(spatial_w)
# print(spatial_w)

spatial_w_ = Dense(1, activation='softmax')(mix_feat)
# print(spatial_w_)


weighted_feat = keras.layers.Multiply()([feat11, spatial_w_])
# weighted_feat

# Lambda(myFunc, output_shape=....)([d1,d4])

# import keras.backend as K

# def myFunc(x):
#     return x[0] * x[1]

new_feat = keras.layers.Add()([feat11, weighted_feat])

#################################


# mix_feat = concatenate([feat1, global_feat])



# make the object detection layer
output = Conv2D(nb_box * (4 + 1 + nb_class), 
                (1,1), strides=(1,1), 
                padding='same', 
                name='DetectionLayer', 
                kernel_initializer='lecun_normal')(new_feat)#(features)
output = Reshape((grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output)
output = Lambda(lambda args: args[0])([output, true_boxes])

model = Model([input_image, true_boxes], output)


# initialize the weights of the detection layer
layer = model.layers[-4]
weights = layer.get_weights()

new_kernel = np.random.normal(size=weights[0].shape)/(grid_h*grid_w)
new_bias   = np.random.normal(size=weights[1].shape)/(grid_h*grid_w)

layer.set_weights([new_kernel, new_bias])

# print a summary of the whole model
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_img (InputLayer)          (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
model_5 (Model)                 (None, 7, 7, 1024)   3228864     input_img[0][0]                  
__________________________________________________________________________________________________
conv2d_19 (Conv2D)              (None, 7, 7, 128)    131200      model_5[1][0]                    
__________________________________________________________________________________________________
conv2d_20 (Conv2D)              (None, 7, 7, 256)    295168      conv2d_19[0][0]                  
__________________________________________________________________________________________________
global_max

In [50]:
#################################
# features

inp = Input(shape=(7,7,1024))
# (7,7,1024)
feat1 = Conv2D(128, (1, 1), dilation_rate=2, padding='same')(inp)
feat11 = Conv2D(256, (3, 3), dilation_rate=2, padding='same')(feat1)
# feat1

global_feat = keras.layers.GlobalMaxPool2D()(feat11)
# global_feat

global_feat_ = keras.layers.RepeatVector(49)(global_feat)
# print(global_feat_)


global_feat_ = keras.layers.Reshape(target_shape=(7,7,256))(global_feat_)
# print(global_feat_)
mix_feat = keras.layers.Concatenate(axis=-1)([feat11, global_feat_])#, axis=-1)
# mix_feat


mix_feat = Conv2D(256, (1, 1), padding='same')(mix_feat)
# mix_feat

mix_feat_ = Reshape(target_shape=(49,256))(mix_feat)
# print(mix_feat_)

# spatial_w = Dense(1, activation='softmax')(mix_feat_)
# print(spatial_w)

# spatial_w = Flatten()(spatial_w)
# print(spatial_w)

spatial_w_ = Dense(1, activation='softmax')(mix_feat)
# print(spatial_w_)


weighted_feat = keras.layers.Multiply()([feat11, spatial_w_])
# weighted_feat

# Lambda(myFunc, output_shape=....)([d1,d4])

# import keras.backend as K

# def myFunc(x):
#     return x[0] * x[1]

new_feat = keras.layers.Add()([feat11, weighted_feat])

#################################
vgg_attention = Model(inp, new_feat)
vgg_attention.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_13 (InputLayer)           (None, 7, 7, 1024)   0                                            
__________________________________________________________________________________________________
conv2d_22 (Conv2D)              (None, 7, 7, 128)    131200      input_13[0][0]                   
__________________________________________________________________________________________________
conv2d_23 (Conv2D)              (None, 7, 7, 256)    295168      conv2d_22[0][0]                  
__________________________________________________________________________________________________
global_max_pooling2d_8 (GlobalM (None, 256)          0           conv2d_23[0][0]                  
__________________________________________________________________________________________________
repeat_vec

In [67]:
#################################
# features

inp = Input(shape=(7,7,1024))
# (7,7,1024)
feat1 = Conv2D(128, (1, 1), dilation_rate=2, padding='same')(inp)
feat11 = Conv2D(256, (3, 3), dilation_rate=2, padding='same')(feat1)

global_feat = keras.layers.AveragePooling2D(pool_size=(7,7))(feat11)
global_feat = Conv2D(256, (1, 1), padding='same')(global_feat)
global_feat = Activation('relu')(global_feat)
global_feat = Conv2D(256, (1, 1), padding='same')(global_feat)
global_feat = Activation('sigmoid')(global_feat)

weighted_feat = keras.layers.Multiply()([feat11, global_feat])

new_feat = keras.layers.Add()([feat11, weighted_feat])

#################################
channel_att = Model(inp, new_feat)
channel_att.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_17 (InputLayer)           (None, 7, 7, 1024)   0                                            
__________________________________________________________________________________________________
conv2d_33 (Conv2D)              (None, 7, 7, 128)    131200      input_17[0][0]                   
__________________________________________________________________________________________________
conv2d_34 (Conv2D)              (None, 7, 7, 256)    295168      conv2d_33[0][0]                  
__________________________________________________________________________________________________
average_pooling2d_2 (AveragePoo (None, 1, 1, 256)    0           conv2d_34[0][0]                  
__________________________________________________________________________________________________
conv2d_35 

<keras.layers.core.Activation at 0x7f4509d1c090>

In [72]:
#################################
# features

inp = Input(shape=(7,7,1024))
# (7,7,1024)
feat1 = Conv2D(128, (1, 1), dilation_rate=2, padding='same')(inp)
feat11 = Conv2D(256, (3, 3), dilation_rate=2, padding='same')(feat1)
# feat1

global_feat = Conv2D(256, (1, 1), padding='same')(feat11)
global_feat = Activation('relu')(global_feat)
global_feat = Conv2D(1, (1, 1), padding='same')(global_feat)
global_feat = Activation('sigmoid')(global_feat)

# keras.layers.Multiply()([feat11, Dense(1)(feat11)])

weighted_feat = keras.layers.Multiply()([feat11, global_feat])

new_feat = keras.layers.Add()([feat11, weighted_feat])

#################################
spatial_att = Model(inp, new_feat)
spatial_att.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_21 (InputLayer)           (None, 7, 7, 1024)   0                                            
__________________________________________________________________________________________________
conv2d_49 (Conv2D)              (None, 7, 7, 128)    131200      input_21[0][0]                   
__________________________________________________________________________________________________
conv2d_50 (Conv2D)              (None, 7, 7, 256)    295168      conv2d_49[0][0]                  
__________________________________________________________________________________________________
conv2d_51 (Conv2D)              (None, 7, 7, 256)    65792       conv2d_50[0][0]                  
__________________________________________________________________________________________________
activation

In [73]:
#################################
# features

inp = Input(shape=(7,7,1024))

feat1 = Conv2D(128, (1, 1), dilation_rate=2, padding='same')(inp)
feat11 = Conv2D(256, (3, 3), dilation_rate=2, padding='same')(feat1)

#################################


#################################
# channel attention

global_feat = keras.layers.AveragePooling2D(pool_size=(7,7))(feat11)
global_feat = Conv2D(256, (1, 1), padding='same')(global_feat)
global_feat = Activation('relu')(global_feat)
global_feat = Conv2D(256, (1, 1), padding='same')(global_feat)
global_feat = Activation('sigmoid')(global_feat)

channel_weighted_feat = keras.layers.Multiply()([feat11, global_feat])
channel_weighted_feat = Conv2D(256, (1, 1), padding='same')(channel_weighted_feat)
# new_feat = keras.layers.Add()([feat11, weighted_feat])

#################################


#################################
# spatial attention

global_feat = Conv2D(256, (1, 1), padding='same')(feat11)
global_feat = Activation('relu')(global_feat)
global_feat = Conv2D(1, (1, 1), padding='same')(global_feat)
global_feat = Activation('sigmoid')(global_feat)

# keras.layers.Multiply()([feat11, Dense(1)(feat11)])

spatial_weighted_feat = keras.layers.Multiply()([feat11, global_feat])
spatial_weighted_feat = Conv2D(256, (1, 1), padding='same')(spatial_weighted_feat)

#################################


#################################
# concat both attention features

concat_s_c_feat = keras.layers.Concatenate(axis=-1)([channel_weighted_feat, spatial_weighted_feat])
concat_s_c_feat = Conv2D(256, (1, 1), padding='same')(concat_s_c_feat)

final_feat = keras.layers.Add()([feat11, concat_s_c_feat])

#################################
parallel_attention = Model(inp, final_feat)
parallel_attention.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_22 (InputLayer)           (None, 7, 7, 1024)   0                                            
__________________________________________________________________________________________________
conv2d_53 (Conv2D)              (None, 7, 7, 128)    131200      input_22[0][0]                   
__________________________________________________________________________________________________
conv2d_54 (Conv2D)              (None, 7, 7, 256)    295168      conv2d_53[0][0]                  
__________________________________________________________________________________________________
average_pooling2d_3 (AveragePoo (None, 1, 1, 256)    0           conv2d_54[0][0]                  
__________________________________________________________________________________________________
conv2d_55 

In [None]:
def spatial_attention():
    #################################
    # features

    # (7,7,1024)
    feat1 = Conv2D(128, (1, 1), dilation_rate=2, padding='same')(features)
    feat11 = Conv2D(256, (3, 3), dilation_rate=2, padding='same')(feat1)
    # feat1

    global_feat = keras.layers.GlobalMaxPool2D()(feat11)
    # global_feat

    global_feat_ = keras.layers.RepeatVector(49)(global_feat)
    # print(global_feat_)


    global_feat_ = keras.layers.Reshape(target_shape=(7,7,256))(global_feat_)
    # print(global_feat_)
    mix_feat = keras.layers.Concatenate(axis=-1)([feat11, global_feat_])#, axis=-1)
    # mix_feat


    mix_feat = Conv2D(256, (1, 1), padding='same')(mix_feat)
    # mix_feat

    mix_feat_ = Reshape(target_shape=(49,256))(mix_feat)
    # print(mix_feat_)

    # spatial_w = Dense(1, activation='softmax')(mix_feat_)
    # print(spatial_w)

    # spatial_w = Flatten()(spatial_w)
    # print(spatial_w)

    spatial_w_ = Dense(1, activation='softmax')(mix_feat)
    # print(spatial_w_)


    weighted_feat = keras.layers.Multiply()([feat11, spatial_w_])
    # weighted_feat

    # Lambda(myFunc, output_shape=....)([d1,d4])

    # import keras.backend as K

    # def myFunc(x):
    #     return x[0] * x[1]

    new_feat = keras.layers.Add()([feat11, weighted_feat])

    #################################


In [9]:
from __future__ import print_function


from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard,ReduceLROnPlateau
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
from keras.layers import AveragePooling2D, Multiply, Concatenate, Add

import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
# import imgaug as ia
from tqdm import tqdm

# from imgaug import augmenters as iaa
import numpy as np
import pickle
import os, cv2
from preprocessing import parse_annotation, BatchGenerator
from utils import WeightReader, decode_netout, draw_boxes

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os




from keras.regularizers import l2


from keras.models import Model
import tensorflow as tf
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU

import numpy as np
import os
import cv2
from utils import decode_netout, compute_overlap, compute_ap
from keras.applications.mobilenet import MobileNet
from keras.layers.merge import concatenate
from keras.optimizers import SGD, Adam, RMSprop
from preprocessing import BatchGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
# from backend import MobileNetFeature 

In [15]:
# input_size = 224
# max_box_per_image = TRUE_BOX_BUFFER # ANCHORS * BOX
# nb_box = BOX
# nb_class = CLASS


# input_image     = Input(shape=(input_size, input_size, 3), name='input_img')
# true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))  

# feature_extractor = MobileNetFeature(input_size)

# # print(feature_extractor.get_output_shape())    
# grid_h, grid_w = feature_extractor.get_output_shape()        
# features = feature_extractor.extract(input_image)            
##################################################################
##################################################################
##################################################################
# direct feature
block_res1 = features
# make the object detection layer
output_direct = Conv2D(nb_box * (4 + 1 + nb_class), 
                (1,1), strides=(1,1), 
                padding='same', 
                name='DetectionLayer_direct', 
                kernel_initializer='lecun_normal')(block_res1)#(features)
output_direct = Reshape((grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output_direct)

##################################################################
##################################################################


features_ = BatchNormalization()(block_res1)
features_ = Activation('relu')(features_)
features_ = Conv2D(256, (1, 1), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_)
features_ = BatchNormalization()(features_)
features_ = Activation('relu')(features_)

features_ = Conv2D(256, (3, 3), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_)
features_ = BatchNormalization()(features_)
features_ = Activation('relu')(features_)
#################################

block_res2 = features_
features_ = Conv2D(1024, (1, 1), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_)

#################################
features_ = Add()([block_res1, features_])

features_ = Conv2D(256, (1, 1), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_)
features_ = BatchNormalization()(features_)
features_ = Activation('relu')(features_)

features_ = Conv2D(256, (3, 3), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_)
# features_ = BatchNormalization()(features_)
# features_ = Activation('relu')(features_)

#################################


new_block1 = features_

features_ = BatchNormalization()(features_)
features_ = Activation('relu')(features_)

new_block2 = features_

#################################

#################################
# channel attention

c_global_feat = AveragePooling2D(pool_size=(7, 7))(new_block2)
c_global_feat = Conv2D(256, (1, 1), 
                       padding='same',
                       kernel_initializer='he_normal',
                       kernel_regularizer=l2(1e-4))(c_global_feat)
#     c_global_feat = BatchNormalization()(c_global_feat)
c_global_feat = Activation('relu')(c_global_feat)

c_global_feat = Conv2D(256, (1, 1), 
                       padding='same',
                       kernel_initializer='he_normal',
                       kernel_regularizer=l2(1e-4))(c_global_feat)
c_global_feat = Activation('sigmoid')(c_global_feat)

channel_weighted_feat = Multiply()([new_block2, c_global_feat])
channel_weighted_feat = Conv2D(256, (1, 1), 
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(1e-4))(channel_weighted_feat)
# new_feat = keras.layers.Add()([feat11, weighted_feat])

#################################


#################################
# spatial attention

s_global_feat = Conv2D(256, (1, 1), 
                       padding='same',
                       kernel_initializer='he_normal',
                       kernel_regularizer=l2(1e-4))(new_block2)
s_global_feat = Activation('relu')(s_global_feat)

s_global_feat = Conv2D(1, (1, 1), 
                       padding='same',
                       kernel_initializer='he_normal',
                       kernel_regularizer=l2(1e-4))(s_global_feat)
s_global_feat = Activation('sigmoid')(s_global_feat)

# keras.layers.Multiply()([feat11, Dense(1)(feat11)])

spatial_weighted_feat = Multiply()([new_block2, s_global_feat])
spatial_weighted_feat = Conv2D(256, (1, 1), 
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(1e-4))(spatial_weighted_feat)

#################################


#################################
# concat both attention features

concat_s_c_feat = Concatenate(axis=-1)([channel_weighted_feat, spatial_weighted_feat])
concat_s_c_feat = Conv2D(256, (1, 1), 
                         padding='same',
                         kernel_initializer='he_normal',
                         kernel_regularizer=l2(1e-4))(concat_s_c_feat)

final_feat = Add()([new_block1, concat_s_c_feat])

#################################
# parallel_attention = Model(inp, final_feat)
# parallel_attention.summary()

x = final_feat
x = BatchNormalization()(x)
x = Activation('relu')(x)
spatial_channel_feat = x

output_sc = Conv2D(nb_box * (4 + 1 + nb_class), 
                (1,1), strides=(1,1), 
                padding='same', 
                name='DetectionLayer_sc', 
                kernel_initializer='lecun_normal')(spatial_channel_feat)#(features)
output_sc = Reshape((grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output_sc)

##################################################################
##################################################################
##################################################################
# atrous conv

# block_res1 = features

features_at = BatchNormalization()(block_res1)
features_at = Activation('relu')(features_at)
features_at = Conv2D(256, (1, 1), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_at)
features_at = BatchNormalization()(features_at)
features_at = Activation('relu')(features_at)

# Conv2D(dilation_rate=2)
features_at = Conv2D(256, (3, 3), 
                   padding='same',
                   dilation_rate=2,
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_at)
features_at = BatchNormalization()(features_at)
features_at = Activation('relu')(features_at)


# block_res2 = features_
features_at = Conv2D(1024, (1, 1), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_at)


features_at = Add()([block_res1, features_at])

features_at = BatchNormalization()(features_at)
features_at = Activation('relu')(features_at)

# attn weights
features_at = Conv2D(1, (1, 1), 
                   padding='same',
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(1e-4))(features_at)
features_at = Activation('sigmoid')(features_at)

features_at = Multiply()([block_res1, features_at])
features_at = BatchNormalization()(features_at)
features_at = Activation('relu')(features_at)

# features_at = Conv2D(256, (1, 1), 
#                      padding='same',
#                      kernel_initializer='he_normal',
#                      kernel_regularizer=l2(1e-4))(features_at)

output_atrous = Conv2D(nb_box * (4 + 1 + nb_class), 
                (1,1), strides=(1,1), 
                padding='same', 
                name='DetectionLayer_atrous', 
                kernel_initializer='lecun_normal')(features_at)#(features)
output_atrous = Reshape((grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output_atrous)

##################################################################
##################################################################

# concatenate every box
output_concat = Concatenate(axis=-2)([output_direct, output_sc, output_atrous])

# output = Lambda(lambda args: args[0])([output, true_boxes])

model = Model([input_image, true_boxes], output_concat)

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_img (InputLayer)          (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 7, 7, 1024)   3228864     input_img[0][0]                  
__________________________________________________________________________________________________
batch_normalization_39 (BatchNo (None, 7, 7, 1024)   4096        model_1[1][0]                    
__________________________________________________________________________________________________
activation_53 (Activation)      (None, 7, 7, 1024)   0           batch_normalization_39[0][0]     
__________________________________________________________________________________________________
conv2d_54 