In [1]:
import sys
!{sys.executable} -m pip install --user import_ipynb

[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import numpy as np
import tensorflow as tf
import pandas as pd
import import_ipynb
#from loss_function import loss_op
import shutil

In [3]:
# 1 if using cudnn on GPU else using CPU
CUDNN_GPU = 0
model_path = 'my_model.csv'
data_path = '../dataset/my_dict.npy'
model_saver = '../model_saver/'
n_classes = 4
n_input = [224,224,3]
n_output = [7,7,9]
batch_size = 32

In [4]:
data = np.load(data_path).item()
pd.read_csv(model_path)

Unnamed: 0.1,Unnamed: 0,Layer,Filter,kernel,Stride,Output
0,0,Input,,,,224x224x3
1,1,Conv,16.0,3x3,1.0,224x224x16
2,2,MaxPooling,,2x2,2.0,112x112x16
3,3,Conv,32.0,3x3,1.0,112x112x32
4,4,MaxPooling,,2x2,2.0,56x56x32
5,5,Conv,64.0,3x3,1.0,56x56x64
6,6,MaxPooling,,2x2,2.0,28x28x64
7,7,Conv,128.0,3x3,1.0,28x28x128
8,8,MaxPooling,,2x2,2.0,14x14x128
9,9,Conv,256.0,3x3,1.0,14x14x256


In [5]:
def cnn_model(input, model_path, is_training=True):
    use_cudnn_on_gpu = True if CUDNN_GPU == 1 else False
    my_model = pd.read_csv(model_path)
    
    ### Define some function for making layers
    def make_input(input_, out_shape, name):
        #result = tf.reshape(input_, [1,out_shape[0],out_shape[1],out_shape[2]], name=name)
        result = input_/255.0
        #tf.summary.histogram(name, result)
        return result

    def make_conv(input_, in_channel, out_channel, filter_, strides, name):
        out_channel = int(out_channel)
        strides = int(strides)
        filter_ = list(map(int,filter_.split('x')))
        filter_ = tf.Variable(tf.random_normal([filter_[0],filter_[1],in_channel,out_channel],stddev=0.1), name=name+'_filter')
        # conv
        result = tf.nn.conv2d(input=input_, 
                         filter=filter_,
                         strides=[1, strides, strides, 1],
                         padding='SAME',
                         use_cudnn_on_gpu=use_cudnn_on_gpu,
                         name=name)
        # add bias
        bias = tf.Variable(tf.random_normal([out_channel]))
        result = tf.nn.bias_add(result, bias)
        # relu
        result = tf.nn.leaky_relu(result,alpha=0.1)
        
        #tf.summary.histogram(name, result)
        return result
        
    def make_maxpool(input_, in_channel, filter_, strides, name):
        strides = int(strides)
        filter_ = list(map(int,filter_.split('x')))
        result = tf.nn.max_pool(value=input_,
                             ksize=[1, filter_[0], filter_[1], 1],
                             strides=[1, strides, strides, 1],
                             padding='SAME',
                             name=name)
        return result
        
    def make_flatten(input_, name):
        return tf.contrib.layers.flatten(inputs=input_, scope=name)
        
    def make_fc(input_, out_shape, name):
        result = tf.contrib.layers.fully_connected(inputs=input_,
                                                    activation_fn=tf.nn.relu,
                                                    num_outputs=int(out_shape[0]),
                                                    scope=name)
        #tf.summary.histogram(name, result)
        return result
    
    def make_dropout(input_, name):
        return tf.nn.dropout(input_, 0.75)
    
    def make_reshape(input_, out_shape, name):
        result = tf.reshape(tensor=input_,
                         shape=[tf.shape(input_)[0],out_shape[0],out_shape[1],out_shape[2]],
                         name=name)
        #tf.summary.histogram(name, result)
        return result
    
    
    ### Generate the model base on the model file
    output = input
    layer_match = {
            'Input':      lambda input_, params, _:       make_input(input_, params[5], 'input_image'),
            'Conv':       lambda input_, params, channel: make_conv(input_, channel, params[2], params[3], params[4], 'layer_'+str(params[0])),
            'MaxPooling': lambda input_, params, channel: make_maxpool(input_, channel, params[3], params[4], 'layer_'+str(params[0])),
            'Flatten':    lambda input_, params, _:       make_flatten(input_, 'layer_'+str(params[0])),
            'Fc':         lambda input_, params, _:       make_fc(input_, params[5], 'layer_'+str(params[0])),
            'Reshape':    lambda input_, params, _:       make_reshape(input_, params[5], 'layer_'+str(params[0])),
            'Dropout':    lambda input_, params, _:       make_dropout(input_, 'layer_'+str(params[0]))
        }
    prev_channel = None
    for layer in my_model.values:
        # preprocessing layer input
        layer[-1] = np.array(list(map(int,layer[-1].split('x'))))
        # map layer
        output = layer_match[layer[1]](output, layer, prev_channel)
        prev_channel = layer[-1][-1]
        
    return output

In [6]:
def lstm_model(X, weights, biases, isTraining, num_classes):
    # Preprocess data input
    
    # Create LSTM cell
    lstm_cell = None;
    if CUDNN_GPU == 0:
        lstm_cell = tf.contrib.rnn.LSTMBlockCell(n_hidden, 
                                                forget_bias=1.0)
    else:
        lstm_cell = tf.contrib.cudnn_rnn.CudnnLSTM(num_layers=1, 
                                                   num_units=n_hidden,
                                                   kernel_initializer=tf.initializers.random_uniform(-0.01, 0.01),
                                                   bias_initializer=tf.initializers.constant(0))
    
    # Creates a recurrent neural network specified by RNNCell cell.
    lstm_out, _ = tf.contrib.rnn.static_rnn(cell=lstm_cell, 
                                            inputs=X,
                                            dtype=tf.float32)
    # Dropout layer
    dropout = tf.layers.dropout(inputs=lstm_out, 
                               rate=0.5,
                               training=isTraining)
    
    # Fully connected layer
    # weights_initializer is gaussian distribution
    # bias_initializer is constant by zero
    fc = tf.contrib.layers.fully_connected(inputs=dropout,
                                            num_outputs=num_classes,
                                            activation_fn=None,
                                            weights_initializer=tf.initializers.truncated_normal(stddev=0.01),
                                            bias_initializer=tf.initializers.constant(0))
    
    # Batch Norm + Scale layer
    batch_norm = tf.layers.batch_normalization(inputs=fc,
                                                axis=2,
                                                training=isTraining)
    
    # ReLU activation
    relu = tf.nn.relu_layer(batch_norm)
    
    lstm_last_output = outputs[-1]
    return lstm_last_output

In [7]:
train_len = data['X_train'].shape[0]
test_len = data['X_test'].shape[0]

In [8]:
# reset graph
tf.reset_default_graph()

# the placeholders will be used for the feed_dict param of tf.Session.run()
is_training = tf.placeholder(tf.bool, name='is_training')

X = tf.placeholder(tf.float32, [None, n_input[0], n_input[1], n_input[2]], name='X_train')
y = tf.placeholder(tf.float32, [None, n_output[0], n_output[1], n_output[2]], name='y_train')

In [9]:
LAMDA_NOOBJ = 0.5
LAMDA_COORD = 5.0

def loss_op(y_pred, y_true):
    mask_shape = tf.shape(y_true)[:3]
    
    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(7), [7]), (1, 7, 7, 1)))
    cell_y = tf.transpose(cell_x, (0,2,1,3))
    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [batch_size, 1, 1, 1])

    coord_mask = tf.zeros(mask_shape)
    conf_mask  = tf.zeros(mask_shape)
    class_mask = tf.zeros(mask_shape)

    seen = tf.Variable(0.)
    total_recall = tf.Variable(0.)

    """
    Adjust prediction
    """
    ### adjust x and y      
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid

    ### adjust w and h
    pred_box_wh = tf.exp(tf.sigmoid(y_pred[..., 2:4]))
    #tf.summary.scalar("pred_box_wh_1", tf.reduce_max(y_pred[..., 2:4]))

    ### adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 4])

    ### adjust class probabilities
    pred_box_class = y_pred[..., 5:]

    """
    Adjust ground truth
    """
    ### adjust x and y
    true_box_xy = y_true[..., 0:2] # relative position to the containing cell

    ### adjust w and h
    true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically

    ### adjust confidence
    true_wh_half = true_box_wh / 2.
    true_mins    = true_box_xy - true_wh_half
    true_maxes   = true_box_xy + true_wh_half

    pred_wh_half = pred_box_wh / 2.
    pred_mins    = pred_box_xy - pred_wh_half
    pred_maxes   = pred_box_xy + pred_wh_half       

    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
    pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)

    true_box_conf = iou_scores * y_true[..., 4]

    ### adjust class probabilities
    true_box_class = tf.argmax(y_true[..., 5:], -1)

    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1)

    ### confidence mask: penalize predictors + penalize boxes with low IOU
    conf_noobj_mask = 1 - y_true[..., 4]

    # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
    conf_obj_mask = y_true[..., 4]

    ### class mask: simply the position of the ground truth boxes (the predictors)
    class_mask = y_true[..., 4] * tf.ones(tf.shape(y_true[..., 4])) 

    """
    Finalize the loss
    """
    loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)           * coord_mask)
    loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)           * coord_mask)
    loss_conf_obj  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf)   * conf_obj_mask) 
    loss_conf_noobj  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_noobj_mask) 
    loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
    loss_class = tf.reduce_sum(loss_class * class_mask)

    loss = (loss_xy + loss_wh) * LAMDA_COORD + loss_conf_obj + loss_conf_noobj * LAMDA_NOOBJ + loss_class
    #loss = loss_wh

    nb_true_box = tf.reduce_sum(y_true[..., 4])
    nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))

    """
    Debugging code
    """    
    current_recall = nb_pred_box/(nb_true_box + 1e-6)
    total_recall = tf.assign_add(total_recall, current_recall) 

    loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
    loss = tf.Print(loss, [loss_conf_obj], message='Loss Conf Obj \t', summarize=1000)
    loss = tf.Print(loss, [loss_conf_noobj], message='Loss Conf Noobj \t', summarize=1000)
    loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
    loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
    loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
    loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)

    return loss


In [10]:
# model
cnn_model = cnn_model(X, model_path, is_training)

# learning rate
global_step = tf.Variable(0, trainable=False, name='global_step')
#learning_rate = tf.train.exponential_decay(
#                        0.001,  # Base learning rate.
#                        global_step,  # Current index into the dataset.
#                        train_len,  # Decay step.
#                        0.95,  # Decay rate.
#                        staircase=True,
#                        name='learning_rate')
learning_rate = 0.0005
# loss 
loss_op = loss_op(cnn_model, y);
# optimizer 
optimal = tf.train.AdamOptimizer(learning_rate, name='adam_optimizer')
# increment global_step at each step.
train_op = optimal.minimize(loss_op, name='optimal_min')

# evaluate model
correct_prediction = tf.equal(tf.argmax(cnn_model, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [11]:
# tao summary cua cac monitor de quan sat cac bien
tf.summary.scalar('loss_op', loss_op)
tf.summary.scalar('learning_rate', learning_rate)
tf.summary.scalar('accuracy', accuracy)

# gop cac summaries vao mot operation
merged_summary_op = tf.summary.merge_all()

# tao doi tuong log writer va ghi vao Tensorboard
tf_writer = tf.summary.FileWriter('../checkpoint', graph=tf.get_default_graph())

# khoi tao cac variables
init = tf.global_variables_initializer()
# Add ops to save and restore all the variables.
saver = tf.train.Saver()

In [12]:
def get_batch(dict_field_name, batch):
    return data[dict_field_name][batch*batch_size:min((batch+1)*batch_size,train_len)]

In [None]:
# remove model_saver folder
shutil.rmtree(model_saver, ignore_errors=True)

# training
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    sess.run(init, feed_dict={is_training:True})
    # Training cycle
    epoch = 0;
    while True:
        total_batch =  train_len // batch_size
        for batch in range(total_batch):
            # lay batch tiep theo
            batch_input = get_batch('X_train', batch) 
            batch_label = get_batch('y_train', batch)
            # chay train_op, loss_op, accuracy
            _, cost, acc, summary = sess.run([train_op, loss_op, accuracy, merged_summary_op], feed_dict={X:batch_input, y:batch_label, is_training:True})
            # Write logs at every iteration
            tf_writer.add_summary(summary, epoch * total_batch + batch)
            print("---Batch:" + ('%04d,' % (batch)) + ("cost={%.9f}, training accuracy %.5f" % (cost, acc)) + "\n")

        epoch += 1;
        
        # hien thi ket qua sau moi epoch
        print("Epoch:" + ('%04d,' % (epoch)) + ("cost={%.9f}, training accuracy %.5f" % (cost, acc)) + "\n")
        
        if epoch % 1 == 0:
            # Luu tru variables vao disk.
            save_path = saver.save(sess, model_saver + 'nn_model_%04d.ckpt'%(epoch))
            print("Model saved in path: %s \n" % save_path)

---Batch:0000,cost={6832.280761719}, training accuracy 0.18204

---Batch:0001,cost={12618.822265625}, training accuracy 0.15526

---Batch:0002,cost={8532.790039062}, training accuracy 0.14683

---Batch:0003,cost={8782.526367188}, training accuracy 0.17808

---Batch:0004,cost={7843.971679688}, training accuracy 0.21081

---Batch:0005,cost={6778.716308594}, training accuracy 0.23859

---Batch:0006,cost={5760.497070312}, training accuracy 0.25744

---Batch:0007,cost={6387.741210938}, training accuracy 0.24256

---Batch:0008,cost={5586.432617188}, training accuracy 0.23264

---Batch:0009,cost={6495.916503906}, training accuracy 0.23760

---Batch:0010,cost={5793.320312500}, training accuracy 0.27331

---Batch:0011,cost={6715.105468750}, training accuracy 0.28919

---Batch:0012,cost={6174.022949219}, training accuracy 0.25744

---Batch:0013,cost={5534.975585938}, training accuracy 0.27431

---Batch:0014,cost={6659.664550781}, training accuracy 0.33135

---Batch:0015,cost={4985.574218750}, tr

---Batch:0017,cost={5604.184082031}, training accuracy 0.47421

---Batch:0018,cost={4688.659179688}, training accuracy 0.48413

---Batch:0019,cost={5103.925292969}, training accuracy 0.47173

---Batch:0020,cost={4857.295410156}, training accuracy 0.50397

---Batch:0021,cost={4879.206542969}, training accuracy 0.51538

---Batch:0022,cost={4616.955566406}, training accuracy 0.51984

---Batch:0023,cost={4886.441406250}, training accuracy 0.49306

---Batch:0024,cost={4875.813964844}, training accuracy 0.51141

---Batch:0025,cost={4422.620605469}, training accuracy 0.51141

---Batch:0026,cost={5889.623535156}, training accuracy 0.51637

---Batch:0027,cost={5968.868164062}, training accuracy 0.50744

---Batch:0028,cost={4968.015136719}, training accuracy 0.51885

---Batch:0029,cost={5001.129394531}, training accuracy 0.53075

---Batch:0030,cost={5481.179687500}, training accuracy 0.53621

---Batch:0031,cost={4555.959960938}, training accuracy 0.53522

---Batch:0032,cost={5334.738769531}, tra

In [None]:
# evaluate
# TODO: Change the eval_epoch_num variable by a suitable number of epoch.
eval_epoch_num = 300

with tf.Session() as sess:
    saver.restore(sess, model_saver + 'nn_model_%04d.ckpt'%(eval_epoch_num))
    avg_acc = 0.
    total_batch =  test_len // batch_size
    for batch in range(total_batch):
        # get next batch
        batch_input = get_batch('X_test', batch) 
        batch_label = get_batch('y_test', batch) 
        acc = sess.run(accuracy, feed_dict={X:batch_input, y:batch_label, is_training:False})
        avg_acc += acc / total_batch
    print("Accuracy on test set: %.5f \n" % (avg_acc))