In [1]:
import pickle
import os
import numpy as np
# google 的 NN coding 套件
import tensorflow as tf
import imgaug as ia
from imgaug import augmenters as iaa
import random
import cv2
import matplotlib.pyplot as plt
import pandas

  from ._conv import register_converters as _register_converters


In [2]:
'''setting'''
gpus = [0] # Here I set CUDA to only see one GPU
os.environ['CUDA_VISIBLE_DEVICES']=','.join([str(i) for i in gpus])

In [3]:
IMAGE_SIZE=224
batch_size = 16
MAX_ITERATION = int(1e5 + 1)

In [4]:
data_dir = './dataset/'
logs_dir = './dan/logs/'
MAX_ITERATION = int(1e5 + 1)
training = True

In [5]:
db_helen = pickle.load(open( data_dir+"HELEN.pickle", "rb" ) )
print(db_helen.keys())
#db_300W['img'].keys()

dict_keys(['img', 'pts'])


In [6]:
def batch_norm(x, train_phase, name='bn_layer'):
    #with tf.variable_scope(name) as scope:
    batch_norm = tf.layers.batch_normalization(
            inputs=x,
            momentum=0.9, epsilon=1e-5,
            center=True, scale=True,
            training = train_phase,
            name=name
    )
    return batch_norm

def conv_blk (inputs,n_filter, train_phase, name = 'conv_blk'):
    with tf.variable_scope(name):
        c1 = tf.layers.conv2d(inputs, filters=n_filter[0], kernel_size=[3,3], strides=(1,1), padding='same')       
        c1_bn = batch_norm(c1, train_phase, name='c1_bn')
        c1_relu = tf.nn.relu(c1_bn)
        c2 = tf.layers.conv2d(c1_relu,filters=n_filter[1],kernel_size=[3,3],strides=(1,1),padding='same')        
        c2_bn = batch_norm(c2, train_phase, name='c2_bn')
        c2_relu = tf.nn.relu(c2_bn)
        return c2_relu

# DAN

In [7]:
def FF_NN(inputs, train_phase, keeprate):
    h1 = conv_blk(inputs, [64,64], train_phase, name='conv_blk1')
    m1 = tf.layers.max_pooling2d(h1,pool_size=[2,2],strides=(2,2))

    h2 = conv_blk(m1, [128,128], train_phase, name='conv_blk2')
    m2 = tf.layers.max_pooling2d(h2,pool_size=[2,2],strides=(2,2))

    h3 = conv_blk(m2, [256,256], train_phase, name='conv_blk3')
    m3 = tf.layers.max_pooling2d(h3,pool_size=[2,2],strides=(2,2))

    h4 = conv_blk(m3, [512,512], train_phase, name='conv_blk4')
    m4 = tf.layers.max_pooling2d(h4,pool_size=[2,2],strides=(2,2))

    flt = tf.layers.flatten(m4)

    # fully connected part
    f1_do = tf.layers.dropout(flt,rate=keeprate)
    f1 = tf.layers.dense(f1_do,256,activation=None)
    f1_bn = batch_norm(f1, train_phase, name='f1_bn')
    f1_relu = tf.nn.relu(f1_bn)
    
    f2 = tf.layers.dense(f1_relu,136,activation=None)
    y_out = tf.reshape(f2, shape=[-1,68,2])
    
    return y_out, f1_relu

In [8]:
#https://github.com/zjjMaiMai/Deep-Alignment-Network-A-convolutional-neural-network-for-robust-face-alignment/blob/master/DAN_V2/dan_model.py
# "shape" means points
def __calc_affine_params(from_shape,to_shape):
    from_shape = tf.cast(from_shape,dtype=tf.float32)    
    to_shape = tf.cast(to_shape,dtype=tf.float32)
    from_shape = tf.reshape(from_shape,[-1,68,2])
    to_shape = tf.reshape(to_shape,[-1,68,2])

    from_mean = tf.reduce_mean(from_shape, axis=1, keepdims=True)
    to_mean = tf.reduce_mean(to_shape, axis=1, keepdims=True)

    from_centralized = from_shape - from_mean
    to_centralized = to_shape - to_mean

    dot_result = tf.reduce_sum(tf.multiply(from_centralized, to_centralized), axis=[1, 2])
    norm_pow_2 = tf.pow(tf.norm(from_centralized, axis=[1, 2]), 2)

    a = dot_result / norm_pow_2
    b = tf.reduce_sum(tf.multiply(from_centralized[:, :, 0], to_centralized[:, :, 1]) - tf.multiply(from_centralized[:, :, 1], to_centralized[:, :, 0]), 1) / norm_pow_2

    r = tf.reshape(tf.stack([a, b, -b, a], axis=1), [-1, 2, 2])
    t = to_mean - tf.matmul(from_mean, r)
    return r,t

def __affine_image(imgs,r,t):
    # The Tensor [imgs].format is [NHWC]
    r = tf.matrix_inverse(r)
    r = tf.matrix_transpose(r)

    rm = tf.reshape(tf.pad(r, [[0, 0], [0, 0], [0, 1]], mode='CONSTANT'), [-1, 6])
    rm = tf.pad(rm, [[0, 0], [0, 2]], mode='CONSTANT')

    tm = tf.contrib.image.translations_to_projective_transforms(tf.reshape(t, [-1, 2]))
    rtm = tf.contrib.image.compose_transforms(rm, tm)
    
    # crash with GPU
    with tf.device('/cpu:0'):
        ret = tf.contrib.image.transform(imgs, rtm, interpolation="BILINEAR")

    return ret

def __affine_shape(shapes,r,t,isinv=False):
    if isinv:
        r = tf.matrix_inverse(r)
        t = tf.matmul(-t,r)
    shapes = tf.matmul(shapes,r) + t
    return shapes
def __gen_heatmap(shapes, IMAGE_SIZE=224):
    __pixels__ = tf.constant([(x, y) for y in range(IMAGE_SIZE) for x in range(IMAGE_SIZE)],
                                      dtype=tf.float32,shape=[1,IMAGE_SIZE,IMAGE_SIZE,2])
    shapes = shapes[:,:,tf.newaxis,tf.newaxis,:]
#     __pixels__ (1, 224, 224, 2)
#     shapes (?, 68, 1, 1, 2)
    value = __pixels__ - shapes
#   value (?, 68, 224, 224, 2)
    value = tf.norm(value,axis=-1)
#   value2 (?, 68, 224, 224)
    value = 1.0 / (tf.reduce_min(value,axis=1) + 1.0)
#   value3 (?, 224, 224)
    value = tf.expand_dims(value,axis=-1)
#   value_o (?, 224, 224, 1)
    return value

In [9]:
def conn_layers(imgs, mean_s, s_current, fc_current):   
    # Transformation estimation
    r,t = __calc_affine_params(s_current, mean_s)
    
    # image transformation
    T_img = __affine_image(imgs, r, t)
    
    # landmark transformation
    T_pts=__affine_shape(s_current, r, t, isinv=False)
    
    # heatmap generation
    hmap = __gen_heatmap(T_pts, IMAGE_SIZE//2)
    
    # feature generation
    fm_flat = tf.layers.dense(fc_current,(IMAGE_SIZE // 4) ** 2,activation=tf.nn.relu)
#     print('fm_flat', fm_flat)
    fm = tf.reshape(fm_flat, shape = [-1,(IMAGE_SIZE // 4),(IMAGE_SIZE // 4), 1])
    fmap = tf.image.resize_images(fm, (IMAGE_SIZE//2, IMAGE_SIZE//2), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    
    return T_img, hmap, fmap, T_pts, r, t

In [10]:
def DAN_blk(imgs, s_mean, s_current, fc_current, train_phase, keeprate, name = 'DAN_blk'):
    with tf.variable_scope(name):
        T_img, hmap, fmap, T_pts, r, t = conn_layers(imgs, s_mean, s_current, fc_current)
#         print('T_img', T_img)
#         print('hmap', hmap)
#         print('fmap', fmap)
        igt_input = tf.concat([T_img, hmap, fmap], axis=3)
    
        delta_s, fc_next = FF_NN(igt_input, train_phase, keeprate)

        s_next = T_pts + delta_s

        s_next_inverse = __affine_shape(s_next, r, t, isinv=True)

        return s_next_inverse, fc_next

In [11]:
# Define Model Input (imgs, mean_s) and Output (pts_),  pts_ ~ f(imgs, mean_s)
imgs = tf.placeholder(tf.float32, [None, 224,224,3])
s_mean = tf.placeholder(tf.float32, [None,68,2]) # 136
pts_ = tf.placeholder(tf.float32, [None,68,2]) # 136
# control 
train_phase = tf.placeholder(tf.bool, name='phase_train')
keeprate = tf.placeholder(tf.float32, name="keeprate")

In [12]:
# DAN model
pts_flatten = tf.layers.flatten(pts_)
mean_s_flatten = tf.layers.flatten(tf.cast(s_mean,dtype=tf.float32))

imgs_in = tf.image.resize_images(imgs, (IMAGE_SIZE//2, IMAGE_SIZE//2), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

delta_s, fc1 = FF_NN(imgs_in, train_phase, keeprate)
s1_out = tf.cast(s_mean,dtype=tf.float32) + delta_s

# DAN block
ds2, fc2 = DAN_blk(imgs_in, s_mean, s1_out, fc1, train_phase, keeprate, name = 'DAN_blk1')
ds3, fc3 = DAN_blk(imgs_in, s_mean, ds2, fc2, train_phase, keeprate, name = 'DAN_blk2')
ds4, fc4 = DAN_blk(imgs_in, s_mean, ds3, fc3, train_phase, keeprate, name = 'DAN_blk3')
s2_out, _ = DAN_blk(imgs_in, s_mean, ds4, fc4, train_phase, keeprate, name = 'DAN_blk4')
# print(s_out)
# s_out_flatten = tf.layers.flatten(s_out)

In [13]:
total_parameters = 0
for variable in tf.trainable_variables():
    # shape is an array of tf.Dimension
    shape = variable.get_shape()

    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value

    total_parameters += variable_parameters
print('total_parameters', total_parameters)

total_parameters 58965736


# Loss function

In [14]:
# Define the Model Loss (4)

s1_losses = tf.reduce_mean(tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.squared_difference(pts_, s1_out),-1)),-1))
s2_losses = tf.reduce_mean(tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.squared_difference(pts_, s2_out),-1)),-1))


# Define the Optimizer (5)
s1_train_step = tf.train.AdamOptimizer(0.001).minimize(s1_losses)
s2_train_step = tf.train.AdamOptimizer(0.001).minimize(s2_losses)

# initialize the model
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

print("Setting up Saver...")
saver = tf.train.Saver(tf.global_variables())

Setting up Saver...


# Session start ~

In [15]:
sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
if (training == False):
    ckpt = tf.train.get_checkpoint_state(logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('Loading sucessfully')
    else:
        print('No checkpoint file found')
        raise
else:
    init = tf.global_variables_initializer()
    sess.run(init)

# Training

In [16]:
# data iterator
def get_batch(X, Y, batch_size = 32):
    # print ('shuffle training dataset')
    idx = np.arange(len(X))    
    while True:
        np.random.shuffle(idx)
        tb = int(len(X)/batch_size)
        #print('total batches %d' % tb)
        for b_idx in range(tb):
            tar_idx = idx[(b_idx*batch_size):((b_idx+1)*batch_size)]
            t_batch_x = X[tar_idx]
            t_batch_y = Y[tar_idx]
            # print(b_idx, t_batch_x.shape, t_batch_y.shape)
            yield t_batch_x, t_batch_y

def data_augmentation(images, pts, rot=(-30, 30), s=(0.6, 1.0)):
    keypoints_on_images = []
    for idx_img in range(images.shape[0]):
        image = images[idx_img]
        height, width = image.shape[0:2]
        keypoints = []
        for p in range(pts.shape[1]):
            keypoints.append(ia.Keypoint(x=pts[idx_img,p,0], y=pts[idx_img,p,1]))
        keypoints_on_images.append(ia.KeypointsOnImage(keypoints, shape=image.shape))

    seq = iaa.Sequential([iaa.Affine(rotate=rot,scale=s)])
    seq_det = seq.to_deterministic() # call this for each batch again, NOT only once at the start

    # augment keypoints and images
    images_aug = seq_det.augment_images(images)
    keypoints_aug = seq_det.augment_keypoints(keypoints_on_images)
    
    pts_aug=[]
    for img_idx, keypoints_after in enumerate(keypoints_aug):
        img_pts_aug=[]
        for kp_idx, keypoint in enumerate(keypoints_after.keypoints):
            img_pts_aug.append([round(keypoint.x),round(keypoint.y)])
        pts_aug.append(np.asarray(img_pts_aug))

    pts_aug = np.asarray(pts_aug).astype(np.int32)
    
#     print('images_aug', images_aug.shape)
#     print('pts_aug', pts_aug.shape)
    return images_aug, pts_aug
      
# write image to file
def write_result(batch_xs_valid, batch_pts, iter_num):
    b = random.randint(0, batch_pts.shape[0]-1)
    img = batch_xs_valid[b].copy()
    pts = batch_pts[b] #print(pts)
    for p in range(pts.shape[0]):
        #print("p",p, pts[p+1,0],pts[p+1,1])
        cv2.circle(img,(pts[p,0],pts[p,1]), 2, (255,0,0), -1)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    cv2.imwrite('./dan/imgs/infer_'+str(iter_num)+'.png', img)
    
def eval_norm_error_image(infer, gt):
    # loss of all landmarks
    l2d = np.sum(np.sqrt(np.sum(np.square(infer-gt),axis=2)), axis=1)
    # distance of eye corners
    cd = np.sqrt(np.sum(np.square(gt[:,45,:]-gt[:,36,:]),axis=1))
    norm_error_image = l2d/cd/68
    return norm_error_image

In [None]:
avg_shape = np.expand_dims(np.round(np.mean(db_helen['pts']['trainset'], axis=0)),axis =0);#print(avg_shape.shape)
if training == True:
    batches = get_batch(db_helen['img']['trainset'], db_helen['pts']['trainset'], batch_size = batch_size)
    valid_batches = get_batch(db_helen['img']['testset'], db_helen['pts']['testset'], batch_size = batch_size)
    # pretrain FF_NN net
#     for step in range(10000):
#         batch_xs, batch_ys = next(batches)
#         batch_xs_aug, batch_ys_aug =data_augmentation(batch_xs, batch_ys)
#         sess.run([extra_update_ops,s1_train_step], feed_dict={imgs: batch_xs_aug,
#                                                            pts_: batch_ys_aug,
#                                                            s_mean:avg_shape,
#                                                            train_phase: True,
#                                                            keeprate:0.5})
#         if (step % 1000 == 0):
#             print("[PreT] Step: %d" % (step))
    
    # Train Model for 1000 steps
    hist_train_acc = []
    hist_valid_acc = []
    max_validloss = 99999
    for step in range(MAX_ITERATION):
        batch_xs, batch_ys = next(batches)
        batch_xs_aug, batch_ys_aug =data_augmentation(batch_xs, batch_ys)

        sess.run([extra_update_ops,s2_train_step], feed_dict={imgs: batch_xs_aug,
                                                           pts_: batch_ys_aug,
                                                           s_mean:avg_shape,
                                                           train_phase: True,
                                                           keeprate:0.5})

        if (step % 500 == 0):
            # get training accr
            idx = np.arange(len(db_helen['img']['trainset']))    
            tb = int(len(idx)/batch_size)
            acc_train= []
            for b_idx in range(tb):
                tar_idx = idx[(b_idx*batch_size):((b_idx+1)*batch_size)]
                t_batch_x = db_helen['img']['trainset'][tar_idx]
                t_batch_y = db_helen['pts']['trainset'][tar_idx]
                acc_train.append(sess.run(s2_losses, feed_dict={imgs: t_batch_x,
                                                                 pts_: t_batch_y,
                                                                 s_mean:avg_shape,
                                                                 train_phase: False,
                                                                 keeprate:1.0}))
            print("[T] Step: %d, loss:%g" % (step, np.mean(acc_train)))
            
            # get validation accr
            idx = np.arange(len(db_helen['img']['testset']))  
            tb = int(len(idx)/batch_size)
#             t_batch_x = db_helen['img']['testset']#[tar_idx]
#             t_batch_y = db_helen['pts']['testset']#[tar_idx]
            acc_valid=[]
            pts_valid=[]
            for b_idx in range(tb):
                tar_idx = idx[(b_idx*batch_size):((b_idx+1)*batch_size)]
                t_batch_x = db_helen['img']['testset'][tar_idx]
                t_batch_y = db_helen['pts']['testset'][tar_idx]

                infered_pts, acc_loss= sess.run([s2_out, s2_losses],
                                                 feed_dict={imgs: t_batch_x,
                                                            pts_: t_batch_y,
                                                            s_mean:avg_shape,
                                                            train_phase: False,
                                                            keeprate:1.0})
                acc_valid.append(acc_loss)
                pts_valid.append(infered_pts)
                
            infered_pts = np.reshape(np.asarray(pts_valid), newshape = [-1,68,2])
    #             acc_valid.append(valid_loss)
            write_result(db_helen['img']['testset'][np.arange(infered_pts.shape[0])], infered_pts, step)

            if np.mean(acc_valid) < max_validloss:
                saver.save(sess, logs_dir + "model.ckpt", step)
                print("[V*] Step: %d, loss:%g" % (step, np.mean(acc_valid)))
                max_validloss = np.mean(acc_valid)
            else:
                print("[V] Step: %d, loss:%g" % (step, np.mean(acc_valid)))

            hist_train_acc.append(np.mean(acc_train))
            hist_valid_acc.append(np.mean(acc_valid))
# else: # evaluate
#     batch_xs_valid, batch_ys_valid = next(valid_batches)
#     t_batch_x = db_helen['img']['testset']#[tar_idx]
#     t_batch_y = db_helen['pts']['testset']#[tar_idx]
#     infered_pts, acc_valid= sess.run([tf.reshape(y,shape=(-1,68,2)), avg_losses], feed_dict={imgs: t_batch_x,
#                                                                                              pts_: t_batch_y,
#                                                                                              s_mean:avg_shape,
#                                                                                              train_phase: False,
#                                                                                              keeprate:1.0})
#     norm_error_image = eval_norm_error_image(infered_pts, t_batch_y)
#     pandas.DataFrame({'loss':norm_error_image}).to_csv('./dan/norm_error_image.csv')

[T] Step: 0, loss:206.895
[V*] Step: 0, loss:207.059
[T] Step: 500, loss:3.91331
[V*] Step: 500, loss:4.45444
[T] Step: 1000, loss:3.76438
[V*] Step: 1000, loss:4.245
[T] Step: 1500, loss:4.14252
[V] Step: 1500, loss:4.54797
[T] Step: 2000, loss:3.62823
[V*] Step: 2000, loss:4.12358
[T] Step: 2500, loss:3.71995
[V] Step: 2500, loss:4.17129
[T] Step: 3000, loss:3.75447
[V] Step: 3000, loss:4.35928
[T] Step: 3500, loss:3.73383
[V] Step: 3500, loss:4.22593
[T] Step: 4000, loss:3.61738
[V] Step: 4000, loss:4.17972
[T] Step: 4500, loss:3.57443
[V*] Step: 4500, loss:4.1135
[T] Step: 5000, loss:3.5207
[V] Step: 5000, loss:4.12888
[T] Step: 5500, loss:3.60988
[V] Step: 5500, loss:4.25603
[T] Step: 6000, loss:3.46964
[V*] Step: 6000, loss:4.09751
[T] Step: 6500, loss:3.59425
[V] Step: 6500, loss:4.16251
[T] Step: 7000, loss:3.51819
[V*] Step: 7000, loss:4.02841
[T] Step: 7500, loss:3.57499
[V] Step: 7500, loss:4.21789
[T] Step: 8000, loss:3.35026
[V*] Step: 8000, loss:4.02535
[T] Step: 8500, lo

In [None]:
# sess.close()