In [1]:
import pandas as pd
import batch_generate
import tools
import bbox_encode
import bbox_decode
from bbox import bbox_overlaps

In [2]:
data = pd.read_csv('kitti.csv')
data = data.drop('Unnamed: 0', 1)
data.head()

Unnamed: 0,index,cls,x1,y1,x2,y2,File_Path
0,1221,car,790.01,184.59,1210.73,374.0,./KITTI/training/image_2/001221.png
1,1221,cyclist,113.04,166.59,291.61,374.0,./KITTI/training/image_2/001221.png
2,1221,car,54.64,200.99,321.79,344.57,./KITTI/training/image_2/001221.png
3,1221,car,661.86,164.18,813.92,274.49,./KITTI/training/image_2/001221.png
4,1221,car,229.61,184.82,388.04,274.36,./KITTI/training/image_2/001221.png


In [3]:
data_car = data[(data['cls']=='car')].reset_index()
data_car = data_car.drop('level_0', 1)
data_car.head()

Unnamed: 0,index,cls,x1,y1,x2,y2,File_Path
0,1221,car,790.01,184.59,1210.73,374.0,./KITTI/training/image_2/001221.png
1,1221,car,54.64,200.99,321.79,344.57,./KITTI/training/image_2/001221.png
2,1221,car,661.86,164.18,813.92,274.49,./KITTI/training/image_2/001221.png
3,1221,car,229.61,184.82,388.04,274.36,./KITTI/training/image_2/001221.png
4,1221,car,589.7,174.77,653.16,221.56,./KITTI/training/image_2/001221.png


In [4]:
gen = batch_generate.generate_train_anchor_batch(data_car)

In [5]:
img, bbox = next(gen)

./KITTI/training/image_2/003041.png


In [6]:
import tensorflow as tf
from keras import backend as K
from keras.objectives import categorical_crossentropy
import cv2
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D,Lambda
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.preprocessing import image

Using TensorFlow backend.


In [7]:
_num_anchors = 9

In [8]:
img_rows = 640
img_cols = 960

In [9]:
#Region Proposal Nets
def vgg16():
    inputs = Input((img_rows, img_cols,3))
    inputs_norm = Lambda(lambda x: x/127.5 - 1.)
    conv1 = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name = 'conv1_1')(inputs)
    conv1 = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name = 'conv1_2')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name = 'conv2_1')(pool1)
    conv2 = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name = 'conv2_2')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name = 'conv3_1')(pool2)
    conv3 = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name = 'conv3_2')(conv3)
    conv3 = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name = 'conv3_3')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name = 'conv4_1')(pool3)
    conv4 = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name = 'conv4_2')(conv4)
    conv4 = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name = 'conv4_4')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name = 'conv5_1')(pool4)
    conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name = 'conv5_2')(conv5)
    conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name = 'conv5_3')(conv5)
    
    #Region Proposal Network
    rpn = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='rpn_conv1')(conv5)
    rpn_cls = Convolution2D(_num_anchors, 1, 1, activation='sigmoid', name='rpn_out_class')(rpn)
    rpn_reg = Convolution2D(_num_anchors *4, 1, 1, activation='linear', name='rpn_out_regress')(rpn)

    model = Model(input=inputs, output=[rpn_cls, rpn_reg])

    return model

In [10]:
#def loss
lambda_rpn_regr = 1.0
lambda_rpn_class = 1.0
epsilon = 1e-4
def rpn_loss_regr(num_anchors):
    def rpn_loss_regr_fixed_num(y_true, y_pred):
        x = y_true[:, :, :, 4 * num_anchors:] - y_pred
        x_abs = K.abs(x)
        x_bool = K.cast(tf.less_equal(x_abs, 1.0), tf.float32)

        return lambda_rpn_regr * K.sum(y_true[:, :, :, :4 * num_anchors] 
                                       * (x_bool * (0.5 * x * x) + (1 - x_bool) 
                                       * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors]
                                       )
    return rpn_loss_regr_fixed_num


def rpn_loss_cls(num_anchors):
    def rpn_loss_cls_fixed_num(y_true, y_pred):
        return lambda_rpn_class * K.sum(y_true[:, :, :, :num_anchors] * 
                                        K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, :, :, num_anchors:])) / K.sum(epsilon + y_true[:, :, :, :num_anchors])
    return rpn_loss_cls_fixed_num

In [11]:
rpn = vgg16()
rpn.load_weights('vgg16_weights_tf_dim_ordering_tf_kernels.h5', by_name=True)
optimizer = Adam(lr=1e-4)
rpn.compile(optimizer=optimizer, loss=[rpn_loss_cls(_num_anchors), rpn_loss_regr(_num_anchors)])
gen = batch_generate.generate_train_anchor_batch(data_car)

In [12]:
while True:
    x, y = next(gen)#gen.next()#next(gen)
    loss_rpn = rpn.train_on_batch(x, y)
    P_rpn = rpn.predict_on_batch(x)
    print(loss_rpn)

./KITTI/training/image_2/005228.png


ValueError: Error when checking model input: expected input_1 to have 4 dimensions, but got array with shape (640, 960, 3)