In [1]:
import tensorflow as tf
import keras
from keras.applications import vgg19
from keras.layers import Conv2D, Input
from keras.models import Model
from functools import partial, update_wrapper
from DataGenerator import DataGenerator
%load_ext autoreload
%autoreload 2
from keras import optimizers
from keras.metrics import sparse_categorical_accuracy

Using TensorFlow backend.


In [2]:
def wrapped_partial(func, *args, **kwargs):
    partial_func = partial(func, *args, **kwargs)
    update_wrapper(partial_func, func)
    return partial_func

In [3]:
num_classes = 21
aspect_ratios = [1, 2, 3, 1 / 2.0, 1 / 3.0]
num_aspect_ratios = len(aspect_ratios)+1 # +1 for the last box with aspect_ratio 1 but bigger size

feature_sizes = [26, 12, 5]

batch_size = 32

In [4]:
# y_ph1 = tf.placeholder(tf.int32, shape=[None, 26, 26])
# yph_1 = Input(shape=[None, 26, 26], dtype='int32', name='y1')
# yph_2 = Input(shape=[None, 12, 12], dtype='int32', name='y2')
# yph_3 = Input(shape=[None, 5, 5], dtype='int32', name='y3')
# y_ph2 = tf.placeholder(tf.int32, shape=[None, 12, 12])
# y_ph3 = tf.placeholder(tf.int32, shape=[None, 5, 5])

In [5]:
model = vgg19.VGG19(include_top=False, input_shape=(224, 224, 3))

In [6]:
for layer in model.layers:
    print layer, layer.output_shape
    layer.trainable = False

<keras.engine.topology.InputLayer object at 0x1044c7410> (None, 224, 224, 3)
<keras.layers.convolutional.Conv2D object at 0x11489b290> (None, 224, 224, 64)
<keras.layers.convolutional.Conv2D object at 0x1148b3c50> (None, 224, 224, 64)
<keras.layers.pooling.MaxPooling2D object at 0x1148d6dd0> (None, 112, 112, 64)
<keras.layers.convolutional.Conv2D object at 0x1148d6990> (None, 112, 112, 128)
<keras.layers.convolutional.Conv2D object at 0x114a1da90> (None, 112, 112, 128)
<keras.layers.pooling.MaxPooling2D object at 0x114a59d90> (None, 56, 56, 128)
<keras.layers.convolutional.Conv2D object at 0x114a59c50> (None, 56, 56, 256)
<keras.layers.convolutional.Conv2D object at 0x114a9b7d0> (None, 56, 56, 256)
<keras.layers.convolutional.Conv2D object at 0x114aabd90> (None, 56, 56, 256)
<keras.layers.convolutional.Conv2D object at 0x114af7550> (None, 56, 56, 256)
<keras.layers.pooling.MaxPooling2D object at 0x114aba910> (None, 28, 28, 256)
<keras.layers.convolutional.Conv2D object at 0x114b5a490> 

In [7]:
out1 = Conv2D(padding='valid', filters=num_classes*num_aspect_ratios, kernel_size=3,
              activation=None, name='26')(model.layers[-7].output)

out2 = Conv2D(padding='valid', filters=num_classes*num_aspect_ratios, kernel_size=3, 
              activation=None, name='12')(model.layers[-2].output)

out3 = Conv2D(padding='valid', filters=num_classes*num_aspect_ratios, kernel_size=3,
              activation=None, name='5')(model.layers[-1].output)

In [8]:
print out1.shape
print out2.shape
print out3.shape

(?, 26, 26, 126)
(?, 12, 12, 126)
(?, 5, 5, 126)


In [9]:
# out1_flat = tf.reshape(out1, [-1, num_classes])
# out2_flat = tf.reshape(out2, [-1, num_classes])
# out3_flat = tf.reshape(out3, [-1, num_classes])

In [10]:
# all_outputs = tf.concat([out1_flat, out2_flat, out3_flat], 0)

In [11]:
# print all_outputs.shape

In [12]:
# loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_ph1, logits=out1)
# loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_ph2, logits=out2)
# loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_ph3, logits=out3)

In [13]:
def loss_with_negative_mining(y_true, y_pred, k, num_aspect_ratios, num_classes):
#     zero = tf.constant(0.0)
    print y_true, y_pred
    zero = tf.constant(0, dtype=tf.int32)
    
    # remove the zero padding from the input to get sparse labels
    y_true = tf.slice(y_true, begin=[0, 0, 0, 0], size=[-1, -1, -1, num_aspect_ratios])
    y_pred_shape = tf.shape(y_pred)
    y_pred = tf.reshape(y_pred, shape = [y_pred_shape[0], y_pred_shape[1], y_pred_shape[2],
                                         num_aspect_ratios, num_classes])
    y_true = tf.cast(y_true, tf.int32)
    
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
    print y_true, y_pred, loss
    
    pos_samples = tf.not_equal(y_true, zero)
    pos_loss = tf.multiply(loss, tf.cast(pos_samples, tf.float32))
    pos_loss = tf.reduce_sum(pos_loss, [1, 2, 3])
    
    neg_samples = tf.equal(y_true, zero)
    neg_loss = tf.multiply(loss, tf.cast(neg_samples, tf.float32))
#     neg_loss = tf.reduce_sum(neg_loss, [1, 2, 3])
    
    neg_loss_flattened = tf.reshape(neg_loss, [tf.shape(y_true)[0], -1])
    top_k_neg_loss, _ = tf.nn.top_k(neg_loss_flattened, k=k, sorted=False)
    top_k_neg_loss = tf.reduce_sum(top_k_neg_loss, axis=1)
    N = tf.cast(tf.reduce_sum(y_true, [1, 2, 3]), tf.float32) + tf.ones([y_pred_shape[0]], dtype=tf.float32, name='smoothing')
    total_loss = (pos_loss + top_k_neg_loss)/N
    
    return tf.reduce_mean(total_loss)


def my_accuracy(y_true, y_pred, num_aspect_ratios, num_classes):
#     sparse_categorical_accuracy
    y_pred_shape = tf.shape(y_pred)
    y_pred = tf.reshape(y_pred, shape = [y_pred_shape[0], y_pred_shape[1], y_pred_shape[2],
                                         num_aspect_ratios, num_classes])
    y_pred = tf.reshape(y_pred, shape = [-1, num_classes])
    y_true = tf.reshape(y_true, shape=[ -1])
    return sparse_categorical_accuracy(y_true, y_pred)

In [14]:
k = 40 # set randomly for now
# hard_neg_loss1 = loss_with_negative_mining(loss1, y_ph1, k)
# hard_neg_loss2 = loss_with_negative_mining(loss2, y_ph2, k)
# hard_neg_loss3 = loss_with_negative_mining(loss3, y_ph3, k)

In [15]:
# total_loss = hard_neg_loss1 + hard_neg_loss2 + hard_neg_loss3


In [16]:
ssd_model = Model(inputs=model.input, outputs = [out1, out2, out3])
loss_fun = wrapped_partial(loss_with_negative_mining, k=k, num_aspect_ratios=num_aspect_ratios, num_classes=num_classes)
acc_fun = wrapped_partial(my_accuracy, num_aspect_ratios=num_aspect_ratios, num_classes=num_classes)
sgd = optimizers.SGD(lr=0.00001, decay=1e-6, momentum=0.9)

ssd_model.compile(optimizer="adam", 
              loss={'26': loss_fun, '12': loss_fun, '5':loss_fun},
                 metrics=[acc_fun])

Tensor("26_target:0", shape=(?, ?, ?, ?), dtype=float32) Tensor("26/BiasAdd:0", shape=(?, 26, 26, 126), dtype=float32)
Tensor("Cast:0", shape=(?, ?, ?, 6), dtype=int32) Tensor("Reshape:0", shape=(?, ?, ?, 6, 21), dtype=float32) Tensor("SparseSoftmaxCrossEntropyWithLogits/Reshape_2:0", shape=(?, ?, ?, 6), dtype=float32)
Tensor("12_target:0", shape=(?, ?, ?, ?), dtype=float32) Tensor("12/BiasAdd:0", shape=(?, 12, 12, 126), dtype=float32)
Tensor("Cast_5:0", shape=(?, ?, ?, 6), dtype=int32) Tensor("Reshape_2:0", shape=(?, ?, ?, 6, 21), dtype=float32) Tensor("SparseSoftmaxCrossEntropyWithLogits_1/Reshape_2:0", shape=(?, ?, ?, 6), dtype=float32)
Tensor("5_target:0", shape=(?, ?, ?, ?), dtype=float32) Tensor("5/BiasAdd:0", shape=(?, 5, 5, 126), dtype=float32)
Tensor("Cast_10:0", shape=(?, ?, ?, 6), dtype=int32) Tensor("Reshape_4:0", shape=(?, ?, ?, 6, 21), dtype=float32) Tensor("SparseSoftmaxCrossEntropyWithLogits_2/Reshape_2:0", shape=(?, ?, ?, 6), dtype=float32)


In [20]:
data_gen = DataGenerator(data_dir='../data/VOCdevkit/VOC2012/JPEGImages/', 
                        label_dir='../data/VOCdevkit/VOC2012/Preprocessed/', 
                        num_classes=num_classes, num_aspect_ratios=num_aspect_ratios,
                        feature_sizes=feature_sizes, 
                        batch_size=batch_size)

(17125,)


In [21]:
# t = data_gen.generate()
# print t.next()

In [None]:
ssd_model.fit_generator(generator=data_gen.generate(),steps_per_epoch=50, epochs=50, 
                       validation_data=data_gen.generate(train=False), validation_steps=32)

Epoch 1/50
 2/50 [>.............................] - ETA: 2089s - loss: 40874.0596 - 26_loss: 39486.4023 - 12_loss: 556.4903 - 5_loss: 831.1667 - 26_my_accuracy: 0.0356 - 12_my_accuracy: 0.0374 - 5_my_accuracy: 0.0322 

In [None]:
Epoch 1/50
50/50 [==============================] - 490s - loss: 1604.6983 - 26_loss: 1485.4940 - 12_loss: 56.4526 - 5_loss: 62.7517   
Epoch 2/50
50/50 [==============================] - 466s - loss: 67.7751 - 26_loss: 2.2011 - 12_loss: 27.7908 - 5_loss: 37.7832   
Epoch 3/50
50/50 [==============================] - 502s - loss: 66.2491 - 26_loss: 1.1493 - 12_loss: 27.4748 - 5_loss: 37.6250    
Epoch 4/50
28/50 [===============>..............] - ETA: 226s - loss: 68.4365 - 26_loss: 1.2382 - 12_loss: 31.2384 - 5_loss: 35.9600