In [1]:
# code examples from shekkizh [https://github.com/shekkizh/FCN.tensorflow]

In [2]:
import tensorflow as tf
import numpy as np
import scipy.misc as misc
import os, sys
from six.moves import urllib
import tarfile
import zipfile
import scipy.io
import matplotlib.pyplot as plt
import TensorflowUtils as utils
import vgg19 as vgg
import pickle
import imgaug as ia
from imgaug import augmenters as iaa
import random
import cv2
import pandas

  from ._conv import register_converters as _register_converters


In [None]:
'''setting'''
gpus = [1] # Here I set CUDA to only see one GPU
os.environ['CUDA_VISIBLE_DEVICES']=','.join([str(i) for i in gpus])

# Load VGG-19

In [3]:
# load model
MODEL_URL = 'http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat'
data_dir = './dataset/'
logs_dir = './fcn/logs/'
debug = False
training = False

In [4]:
NUM_OF_CLASSESS = int(68 + 1)
IMAGE_SIZE = 224
learning_rate = 1e-4
batch_size = 4
MAX_ITERATION = int(1e5 + 1)

# inference

In [5]:
def inference(image, keep_prob):
    # load VGG19
    model_data = utils.get_model_data("Model_zoo/", MODEL_URL)
    # preprocessing
    mean = model_data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1));#print(mean_pixel)
    weights = np.squeeze(model_data['layers'])
    processed_image = utils.process_image(image, mean_pixel)
    
    # model
    with tf.variable_scope("inference"):
        # trnasfer VGG-19
        image_net = vgg.vgg_net(weights, processed_image)
        conv_final_layer = image_net["conv5_3"];print("conv_final_layer",conv_final_layer)
        pool5 = utils.max_pool_2x2(conv_final_layer); print("pool5",pool5)

        # conv layers
        conv6=tf.contrib.layers.conv2d(pool5, 4096, [7,7], stride=1,padding='SAME')
        relu6 = tf.nn.relu(conv6, name="relu6")
        if debug:
            utils.add_activation_summary(relu6)
        relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)

        conv7=tf.contrib.layers.conv2d(relu_dropout6, 4096, [1,1], stride=1,padding='SAME')
        relu7 = tf.nn.relu(conv7, name="relu7")
        if debug:
            utils.add_activation_summary(relu7)
        relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)

        conv8=tf.contrib.layers.conv2d(relu_dropout7, NUM_OF_CLASSESS, [1,1], stride=1,padding='SAME')

        # now to upscale to actual image size
        deconv_shape1 = image_net["pool4"].get_shape()
        conv_t1 = tf.contrib.layers.conv2d_transpose(conv8, deconv_shape1[3].value, [4, 4],
                                                     stride=2, padding='SAME',activation_fn=None)
        fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1");print("fuse_1",fuse_1)

        deconv_shape2 = image_net["pool3"].get_shape()
        conv_t2 = tf.contrib.layers.conv2d_transpose(fuse_1, deconv_shape2[3].value, [4, 4],
                                                     stride=2, padding='SAME',activation_fn=None)
        fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2");print("fuse_2",fuse_2)

        conv_t3 = tf.contrib.layers.conv2d_transpose(fuse_2, NUM_OF_CLASSESS, [16, 16],
                                                     stride=8, padding='SAME',activation_fn=None);print("conv_t3",conv_t3)

        annotation_pred = tf.argmax(conv_t3, axis=3, name="prediction");print("annotation_pred",annotation_pred)
        return tf.expand_dims(annotation_pred, dim=3), conv_t3

In [6]:
# setting inputs
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image")
annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")

pred_annotation, logits = inference(image, keep_probability)

conv_final_layer Tensor("inference/BiasAdd_14:0", shape=(?, 14, 14, 512), dtype=float32)
pool5 Tensor("inference/MaxPool:0", shape=(?, 7, 7, 512), dtype=float32)
fuse_1 Tensor("inference/fuse_1:0", shape=(?, 14, 14, 512), dtype=float32)
fuse_2 Tensor("inference/fuse_2:0", shape=(?, 28, 28, 256), dtype=float32)
conv_t3 Tensor("inference/Conv2d_transpose_2/BiasAdd:0", shape=(?, 224, 224, 69), dtype=float32)
annotation_pred Tensor("inference/prediction:0", shape=(?, 224, 224), dtype=int64)


In [7]:
total_parameters = 0
for variable in tf.trainable_variables():
    # shape is an array of tf.Dimension
    shape = variable.get_shape()
#         print(shape)
#         print(len(shape))
    variable_parameters = 1
    for dim in shape:
#             print(dim)
        variable_parameters *= dim.value
#         print(variable_parameters)
    total_parameters += variable_parameters
print('total_parameters', total_parameters)

total_parameters 147038154


In [8]:
# loss
loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                                      labels=tf.squeeze(annotation, squeeze_dims=[3]),
                                                                      name="entropy")))
loss_summary = tf.summary.scalar("entropy", loss)

In [9]:
def train(loss_val, var_list):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    grads = optimizer.compute_gradients(loss_val, var_list=var_list)
    if debug:
        # print(len(var_list))
        for grad, var in grads:
            utils.add_gradient_summary(grad, var)
    return optimizer.apply_gradients(grads)

In [10]:
trainable_var = tf.trainable_variables()
if debug:
    for var in trainable_var:
        utils.add_to_regularization_and_summary(var)
train_op = train(loss, trainable_var)

print("Setting up summary op...")
summary_op = tf.summary.merge_all()

print("Setting up Saver...")
saver = tf.train.Saver()

Setting up summary op...
Setting up Saver...


In [11]:
sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
if (training == False):
    ckpt = tf.train.get_checkpoint_state(logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('Loading sucessfully')
    else:
        print('No checkpoint file found')
        raise
else:
    init = tf.global_variables_initializer()
    sess.run(init)

# create two summary writers to show training loss and validation loss in the same graph
# need to create two folders 'train' and 'validation' inside FLAGS.logs_dir
train_writer = tf.summary.FileWriter(logs_dir+'/train', sess.graph)
validation_writer = tf.summary.FileWriter(logs_dir+'/validation', sess.graph)

INFO:tensorflow:Restoring parameters from ./fcn/logs/model.ckpt-91000
Loading sucessfully


In [12]:
def data_augmentation(images, pts, rot=(-30, 30), s=(0.7, 1.0)):
    keypoints_on_images = []
    for idx_img in range(images.shape[0]):
        image = images[idx_img]
        height, width = image.shape[0:2]
        keypoints = []
        for p in range(pts.shape[1]):
            keypoints.append(ia.Keypoint(x=pts[idx_img,p,0], y=pts[idx_img,p,1]))
        keypoints_on_images.append(ia.KeypointsOnImage(keypoints, shape=image.shape))

    seq = iaa.Sequential([iaa.Affine(rotate=rot,scale=s)])
    seq_det = seq.to_deterministic() # call this for each batch again, NOT only once at the start

    # augment keypoints and images
    images_aug = seq_det.augment_images(images)
    keypoints_aug = seq_det.augment_keypoints(keypoints_on_images)
    
    pts_aug=[]
    for img_idx, keypoints_after in enumerate(keypoints_aug):
        img_pts_aug=[]
        for kp_idx, keypoint in enumerate(keypoints_after.keypoints):
            img_pts_aug.append([round(keypoint.x), round(keypoint.y)])
        pts_aug.append(np.asarray(img_pts_aug))

    pts_aug = np.asarray(pts_aug).astype(np.int32)
    
#     print('images_aug', images_aug.shape)
#     print('pts_aug', pts_aug.shape)
    return images_aug, pts_aug

def get_batch(X, Y, batch_size = 32):
    # print ('shuffle training dataset')
    idx = np.arange(len(X))    
    while True:
        np.random.shuffle(idx)
        tb = int(len(X)/batch_size)
        #print('total batches %d' % tb)
        for b_idx in range(tb):
            tar_idx = idx[(b_idx*batch_size):((b_idx+1)*batch_size)]
            t_batch_x = X[tar_idx]
            t_batch_y = Y[tar_idx]
            # print(b_idx, t_batch_x.shape, t_batch_y.shape)
            yield t_batch_x, t_batch_y
            
def pts2map(ys):
    #print(ys.shape)
    maps = np.zeros(shape=(ys.shape[0],IMAGE_SIZE,IMAGE_SIZE,1));#print(maps.shape)
    for i in range(ys.shape[0]):
        for p in range(ys.shape[1]):
            if(((ys[i,p,0]) < IMAGE_SIZE) & ((ys[i,p,1]) < IMAGE_SIZE) & (ys[i,p,0] > -1) & (ys[i,p,1] > -1)):
                maps[i,ys[i,p,0],  ys[i,p,1],0] = p+1 # shift label from 0:68 to 1:69
    return maps

def map2pts(pts_maps):
    b_idxs = []
    for b in range(pts_maps.shape[0]): 
        idxs = []
        for p in range(pts_maps.shape[3]): 
            idx = np.unravel_index(np.argmax(pts_maps[b,...,p], axis=None), pts_maps[b,...,p].shape)
            idxs.append(idx)
        b_idxs.append(np.asarray(idxs))
    b_idxs = np.asarray(b_idxs);#print(b_idxs.shape)
    b_idxs = b_idxs[:,1:,:]
    return b_idxs

# training

In [13]:
db_helen = pickle.load(open(data_dir+"HELEN.pickle", "rb" ) )
# print the data structure
print(db_helen.keys())
print(db_helen['pts'].keys())
# print the shape of tratining set
print(db_helen['pts']['trainset'].shape)
print(db_helen['img']['trainset'].shape)
# print the shape of testing set
print(db_helen['pts']['testset'].shape)
print(db_helen['img']['testset'].shape)

dict_keys(['img', 'pts'])
dict_keys(['testset', 'trainset'])
(2000, 68, 2)
(2000, 224, 224, 3)
(330, 68, 2)
(330, 224, 224, 3)


In [14]:
# declear data iterator
train_batches = get_batch(db_helen['img']['trainset'], db_helen['pts']['trainset'], batch_size = batch_size)
valid_batches = get_batch(db_helen['img']['testset'], db_helen['pts']['testset'], batch_size = batch_size)

In [15]:
# write image to 
def write_result(batch_xs_valid, pts_maps, iter_num):
    b = random.randint(0, batch_xs_valid.shape[0]-1)
    img = batch_xs_valid[b].copy()
    pts = map2pts(pts_maps)[b] #print(pts)
#     print(img.shape)
#     print(pts.shape)
    for p in range(pts.shape[0]):
        #print("p",p, pts[p+1,0],pts[p+1,1])
        cv2.circle(img,(pts[p,0],pts[p,1]), 2, (255,0,0), -1)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    cv2.imwrite('./fcn/imgs/infer_'+str(iter_num)+'.png', img) 

def eval_norm_error_image(infer, gt):
    # loss of all landmarks
    l2d = np.sum(np.sqrt(np.sum(np.square(infer-gt),axis=2)), axis=1)
    # distance of eye corners
    cd = np.sqrt(np.sum(np.square(gt[:,45,:]-gt[:,36,:]),axis=1))
    norm_error_image = l2d/cd/68
    return norm_error_image

In [None]:
if training ==True:
    max_validloss = 99999
    for itr in range(MAX_ITERATION):
        # prepare training input
        batch_xs, batch_ys = next(train_batches)
        batch_xs_aug, batch_ys_aug = data_augmentation(batch_xs, batch_ys)
        batch_ymap_aug = pts2map(batch_ys_aug)

        feed_dict = {image: batch_xs_aug, annotation: batch_ymap_aug, keep_probability: 0.85}
        sess.run(train_op, feed_dict=feed_dict)

        if itr % 500 == 0:
            train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict)
            print("[T] Step: %d, loss:%g" % (itr, train_loss))
            train_writer.add_summary(summary_str, itr)
        # validation
        if itr % 1000 == 0:
            # prepare inputs
            batch_xs_valid, batch_ys_valid = next(valid_batches)
            batch_ymap_valid = pts2map(batch_ys_valid);# print(batch_ymap_valid.shape)

            feed_dict = {image: batch_xs_valid, annotation: batch_ymap_valid, keep_probability: 1.0}
            valid_loss, pts_maps, summary_sva=sess.run([loss, tf.nn.softmax(logits), loss_summary], feed_dict=feed_dict)
            # write result figure to the imgs/
            write_result(batch_xs_valid, pts_maps, itr)
            # save validation log
            validation_writer.add_summary(summary_sva, itr)
            # save the ckpt if reachings better loss
            if valid_loss < max_validloss:
                saver.save(sess, logs_dir + "model.ckpt", itr)
                print("[V*] Step: %d, loss:%g" % (itr, valid_loss))
                max_validloss = valid_loss
            else:
                print("[V] Step: %d, loss:%g" % (itr, valid_loss))
else:
    testing_batch = 30
    neis = []
    for t in range(int(db_helen['img']['testset'].shape[0]/testing_batch)):
        t_batch_x = db_helen['img']['testset'][(t*testing_batch):((t+1)*testing_batch)]
        t_batch_y = db_helen['pts']['testset'][(t*testing_batch):((t+1)*testing_batch)]
        feed_dict = {image: t_batch_x, keep_probability: 1.0}
        pts_maps=sess.run(tf.nn.softmax(logits), feed_dict=feed_dict)
        infered_pts = map2pts(pts_maps)
        norm_error_image = eval_norm_error_image(infered_pts, t_batch_y)
        neis.append(norm_error_image)
    neis = np.reshape(np.asarray(neis),newshape=-1)
    pandas.DataFrame({'loss':neis}).to_csv('./fcn/norm_error_image.csv')

In [None]:
sess.close()