In [1]:
import tensorflow as tf
import tensorlayer as tl
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
import math
import re

In [2]:
batch_size = 2# 한 iteration에 몇개의 batch를 사용할지
image_resize_w = 256
image_resize_h = 256
nstack = 4  # num of hourglass stack

net_ouput_w = 64
net_ouput_h = 64

iterations = 60000000 # 몇 iteration을 학습할지
learning_rate = 0.0001
result_save_freq = 10000 # result를 save하는 주기 (몇 iter마다 save할지)
summary_dir = "./log"

save_freq = 20000 # model weights를 저장하는 주기
save_dir = "./model"

vis_train_dir = "./vis/train"
vis_val_dir = "./vis/val"

#train_ratio = 0.8 # train의 비율, validation 비율 = 1-train_ratio
validation_num = 500

path_pose_joint = "/media/vision/Seagate Expansion Drive/coco2014data/densepose_coco" # 데이터셋의 경로


if not os.path.exists(summary_dir):
    os.makedirs(summary_dir)
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
if not os.path.exists(vis_train_dir):
    os.makedirs(vis_train_dir)
if not os.path.exists(vis_val_dir):
    os.makedirs(vis_val_dir)

In [3]:
def _data_aug_fn(image, joints_image, segs_image):
    h = len(image)
    w = len(image[0])

    joint = np.zeros([net_ouput_h,net_ouput_w,18], dtype=np.float32)  # 17 + background
    joint_ori = np.zeros([net_ouput_h,net_ouput_w,18], dtype=np.float32)  # 17 + background
    existing_joints = np.unique(joints_image)
    
    for existing_joint in existing_joints:
        if existing_joint != 0:
            tmp_y = np.where(joints_image == existing_joint)[0][0]
            tmp_x = np.where(joints_image == existing_joint)[1][0]
            resized_x = int(tmp_x/w * net_ouput_w)
            resized_y = int(tmp_y/h * net_ouput_h)
            joint[resized_y][resized_x][int(existing_joint/10)] = 1.0
            joint_ori[resized_y][resized_x][int(existing_joint/10)] = 1.0
    
    for i in range(0,18):
         joint[:,:,i] = keypoint_gauusian(joint[:,:,i])
    
    joint_bg = 1 - np.sum(joint[:,:,1:], axis=-1)
    joint_bg[joint_bg < 0] = 0
    joint[:,:,0] = joint_bg
        
    segs_image = np.squeeze(segs_image, axis=-1)
    seg = np.eye(15,dtype=np.float32)[np.int32(segs_image/10)]
    
    
    image = cv2.resize(image, (image_resize_h, image_resize_w))#, interpolation=cv2.INTER_NEAREST)
    seg = cv2.resize(seg, (net_ouput_h, net_ouput_w), interpolation=cv2.INTER_NEAREST)
    
    return image, joint, seg, joint_ori


In [4]:
def _map_fn(frame_name, joint_name, seg_name):
    image = tf.image.decode_jpeg(tf.read_file(frame_name), channels=3)
    joints_image = tf.image.decode_jpeg(tf.read_file(joint_name), channels=1)
    segs_image = tf.image.decode_png(tf.read_file(seg_name), channels=1)
     
    img, joint, seg, joint_ori  = tf.py_func(_data_aug_fn, [image, joints_image, segs_image], [tf.uint8, tf.float32, tf.float32, tf.float32])
    
    img = tf.image.convert_image_dtype(img, dtype=tf.float32)
   # img = img/255
    
    
    return img, joint, seg, joint_ori

    

In [5]:
def densepose_agu(directory):
    frame_name = []
    joint_name = []
    seg_name = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.jpg'):
                tmp = []
                frame_name.append(root + '/' + file)
                joint_name.append(root + '/' + file[:-4] + '_keypoint.png')
                seg_name.append(root + '/' + file[:-4] + '_seg.png')
    dataset = tf.data.Dataset.from_tensor_slices((frame_name, joint_name, seg_name))
    
    ds_val = dataset.take(validation_num) 
    ds_train = dataset.skip(validation_num)

    ds_train = ds_train.repeat()
    ds_val = ds_val.repeat()

    ds_train = ds_train.shuffle(buffer_size=4096)
    ds_val = ds_val.shuffle(buffer_size=4096)
        
    ds_train = ds_train.map(_map_fn)
    ds_val = ds_val.map(_map_fn)
    
    ds_train = ds_train.batch(batch_size)  
    ds_val = ds_val.batch(batch_size) 
    
    return ds_train, ds_val


In [6]:
def keypoint_gauusian(single_keypoint_image):
    if np.max(single_keypoint_image) == 0:
        return single_keypoint_image
    else:
        h = len(single_keypoint_image)
        w = len(single_keypoint_image[0])

        ind = np.argmax(single_keypoint_image)

        center_x = (ind) % w 
        center_y = (ind) // w 


      #  sigma = 0.5
        sigma = 2
        
        th = 3.6052
      #  th = 4.6052

        delta = math.sqrt(th * 2)

        x0 = int(max(0, center_x - delta * sigma + 0.5))
        y0 = int(max(0, center_y - delta * sigma + 0.5))

        x1 = int(min(w - 1, center_x + delta * sigma + 0.5))
        y1 = int(min(h - 1, center_y + delta * sigma + 0.5))

        exp_factor = 1 / 2.0 / sigma / sigma

        ## fast - vectorize
        arr_heatmap = single_keypoint_image[y0:y1 + 1, x0:x1 + 1]
        y_vec = (np.arange(y0, y1 + 1) - center_y)**2  # y1 included
        x_vec = (np.arange(x0, x1 + 1) - center_x)**2
        xv, yv = np.meshgrid(x_vec, y_vec)
        arr_sum = exp_factor * (xv + yv)
        arr_exp = np.exp(-arr_sum)
        arr_exp[arr_sum > th] = 0
        single_keypoint_image[y0:y1 + 1, x0:x1 + 1] = np.maximum(arr_heatmap, arr_exp)
        return single_keypoint_image

In [7]:
def residual(inputs, input_ch, ouput_ch, is_train):
    if input_ch != ouput_ch:
        identity = tf.layers.conv2d(inputs, ouput_ch, [1, 1], padding='same')#, activation=tf.nn.relu)
        identity = tf.nn.relu(tf.layers.batch_normalization(identity, training=is_train))

    else:
        identity = inputs

    net = tf.layers.conv2d(inputs, input_ch, [1, 1], padding='same')#, activation=tf.nn.relu)
    net = tf.nn.relu(tf.layers.batch_normalization(net, training=is_train))

    net = tf.layers.conv2d(net, input_ch, [3, 3], padding='same')#, activation=tf.nn.relu)
    net = tf.nn.relu(tf.layers.batch_normalization(net, training=is_train))

    net = tf.layers.conv2d(net, ouput_ch, [1, 1], padding='same')
    net = tf.layers.batch_normalization(net, training=is_train)
           
    net += identity
    return net

def pre_hourglass(img, is_train):
    save256 = residual(img, 3, 64, is_train) # 256 256 64
    save256 = residual(save256, 64, 256, is_train) # 256 256 256
    save256 = residual(save256, 256, 256, is_train) # 256 256 256
 
    net = tf.layers.conv2d(img, 64, [7, 7], strides = 2, padding='same')#, activation=tf.nn.relu)
    net = tf.nn.relu(tf.layers.batch_normalization(net, training=is_train))

  #  print("128x128")
  #  print(net.shape)   #128 128 64
    
    net = residual(net, 64, 128, is_train)
    
    save128 = residual(net, 128, 256, is_train) # 128 128 64
    save128 = residual(save128, 256, 256, is_train) # 128 128 256
    save128 = residual(save128, 256, 256, is_train) # 128 128 256
    
    net = tf.layers.max_pooling2d(net, [2, 2], [2, 2])
   # print("64x64") 
   # print(net.shape) # 64 64 128
    
    net = residual(net, 128, 256, is_train) # 64 64 256
    return net, save256, save128


def post_hourglass(net, save256, save128, is_train):      ## input : 64 x 64 x 256
    net = residual(net, 256, 256, is_train)
    
    net = tf.image.resize_nearest_neighbor(net, (128,128))
    net += save128
    
    net = residual(net, 256, 256, is_train)
    
    net = tf.image.resize_nearest_neighbor(net, (256,256))
    net += save256
    
    return net   
    
def hourglass(net, is_train):#, is_first = True):   ### input : 64 x 64 x 256
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)

    save64 = residual(net, 256, 256, is_train) # 64 64 256
    save64 = residual(save64, 256, 256, is_train) # 64 64 256
    save64 = residual(save64, 256, 256, is_train) # 64 64 256

    
    net = tf.layers.max_pooling2d(net, [2, 2], [2, 2]) # 32 32 256
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)

    save32 = residual(net, 256, 256, is_train) # 32 32 256
    save32 = residual(save32, 256, 256, is_train) # 32 32 256
    save32 = residual(save32, 256, 256, is_train) # 32 32 256

    
    net = tf.layers.max_pooling2d(net, [2, 2], [2, 2]) # 16 16 256
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)

    save16 = residual(net, 256, 256, is_train) # 16 16 256
    save16 = residual(save16, 256, 256, is_train) # 16 16 256
    save16 = residual(save16, 256, 256, is_train) # 16 16 256

    
    net = tf.layers.max_pooling2d(net, [2, 2], [2, 2]) # 8 8 256
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)

    
    save8 = residual(net, 256, 256, is_train) # 8 8 256
    save8 = residual(save8, 256, 256, is_train) # 8 8 256
    save8 = residual(save8, 256, 256, is_train) # 8 8 256

    
    net = tf.layers.max_pooling2d(net, [2, 2], [2, 2]) # 4 4 256
    
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)
    net = residual(net, 256, 256, is_train)
    
    
    net = tf.image.resize_nearest_neighbor(net, (8,8))
    net += save8
    
    net = residual(net, 256, 256, is_train)

    
    net = tf.image.resize_nearest_neighbor(net, (16,16))
    net += save16
    
    net = residual(net, 256, 256, is_train)

    
    net = tf.image.resize_nearest_neighbor(net, (32,32))
    net += save32

    net = residual(net, 256, 256, is_train)

    
    net = tf.image.resize_nearest_neighbor(net, (64,64))
    net += save64
    
    net = residual(net, 256, 256, is_train)
    
    inter_seg = tf.layers.conv2d(net, 15, [1, 1], padding='same')#, activation=tf.nn.relu)  ###input : 256 channel
    interpredict_seg = tf.layers.batch_normalization(inter_seg, training=is_train)
    
    net = residual(interpredict_seg, 15, 128, is_train)
    net = residual(net, 128, 256, is_train)
    net += save64
    
    inter_key = tf.layers.conv2d(net, 18, [1, 1], padding='same')#, activation=tf.nn.relu)  ###input : 256 channel
    interpredict_key = tf.layers.batch_normalization(inter_key, training=is_train)
    
    return net, interpredict_key, interpredict_seg   ## 64x64x256,  64x64x33
#######################################################
 
    

In [8]:
def SHGnet(img, numstack, is_train): ##img = [batch, w, h, 3], hourglass-ing
    with tf.variable_scope("SegNet"):
        net, save256, save128 = pre_hourglass(img, is_train)  ### 256 -> 64

        interpredicts_key = []
        interpredicts_seg = []
        for i in range(numstack):
            savenet = net
            net, interpredict_key, interpredict_seg = hourglass(net, is_train)#, False)
            interpredicts_key.append(interpredict_key)
            interpredicts_seg.append(interpredict_seg)

            inter_key = tf.layers.conv2d(interpredict_key, 256, [1, 1], padding='same')#, activation=tf.nn.relu)  ###input : 256 channel
            inter_key = tf.nn.relu(tf.layers.batch_normalization(inter_key, training=is_train))
            inter_seg = tf.layers.conv2d(interpredict_seg, 256, [1, 1], padding='same')#, activation=tf.nn.relu)  ###input : 256 channel
            inter_seg = tf.nn.relu(tf.layers.batch_normalization(inter_seg, training=is_train))
            
            net += inter_key
            net += inter_seg
            net += savenet
        

        return interpredicts_key, interpredicts_seg

In [None]:
def dice_loss(labels, logits, smooth = 2.220446049250313e-16):
    product = labels*logits 
    intersection = tf.reduce_sum(product) 
    coefficient = (2. * intersection + smooth) / (tf.reduce_sum(logits) + tf.reduce_sum(labels) + smooth) 
    loss = 1. - tf.reduce_mean(coefficient) 
    return loss

In [None]:
def save_train(save_dir, input_image, joint_gt, seg_gt, joint_predict, seg_predict, step):   #batch result,img
    image = input_image[0]
    joint_gt = joint_gt[0]
    seg_gt = seg_gt[0]
    joint_pred = joint_predict[0]
    seg_pred = seg_predict[0]

    
    tmp = joint_predict[0]
    
   # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

######################joint
 #   joint_gt = np.argmax(joint_gt,-1)
    joint_gt = np.argmax(joint_gt,-1)
    
    joint_pred = np.argmax(joint_pred,-1)

###################seg
   # seg_gt = np.argmax(seg_gt,-1)
    seg_gt = np.argmax(seg_gt,-1)

    
  #  seg_pred = np.argmax(seg_pred,-1)
    seg_pred = np.argmax(seg_pred,-1)
    
#     cv2.imwrite(save_dir + '/img'+ str(step) +'_.jpg', image)
    
#     cv2.imwrite(save_dir + '/img'+ str(step) +'_joint_gt.jpg', joint_gt)
#     cv2.imwrite(save_dir + '/img'+ str(step) +'_joint_pred.jpg', joint_pred)   
    
#     cv2.imwrite(save_dir + '/img'+ str(step) +'_seg_gt.jpg', seg_gt)
#     cv2.imwrite(save_dir + '/img'+ str(step) +'_seg_pred.jpg', seg_pred)
    
 #   print(np.unique(joint_gt))
 #   print(np.unique(seg_gt))

    plt.imsave(save_dir + '/img' + str(step) +'_.png', image)#*255)
    plt.imsave(save_dir + '/img' + str(step) +'_joint_gt.png', joint_gt)
    plt.imsave(save_dir + '/img' + str(step) +'_joint_pred.png', joint_pred)
    plt.imsave(save_dir + '/img' + str(step) +'_seg_gt.png', seg_gt)
    plt.imsave(save_dir + '/img' + str(step) +'_seg_pred.png', seg_pred)
    
    


    tmp = np.amax(tmp[:,:,1:],-1)
    tmp *= 255
    plt.imsave(save_dir + '/img' + str(step) +'_joint_amax.png', tmp)

In [None]:
if __name__ == '__main__':
    tf.reset_default_graph()
    ds_train, ds_val = densepose_agu(path_pose_joint)
    
   # train(ds_train, ds_val)  
    iterator = ds_train.make_one_shot_iterator()
    one_element = iterator.get_next()
    
    iterator_val = ds_val.make_one_shot_iterator()
    one_element_val = iterator_val.get_next()
    
    

    img_ph = tf.placeholder(tf.float32, [batch_size, image_resize_w, image_resize_h, 3])
    joint_gt_ph = tf.placeholder(tf.float32, [batch_size, net_ouput_w, net_ouput_h, 18])
    seg_gt_ph = tf.placeholder(tf.float32, [batch_size, net_ouput_w, net_ouput_h, 15])
    
    is_train = tf.placeholder(tf.bool, shape=())
    
    predictions_key, predictions_seg = SHGnet(img_ph, nstack, is_train)
    
    losses_joint = 0
    losses_seg = 0
    for stack in range(0,nstack):
        joint_predict = predictions_key[stack]
     #   joint_predict = tf.nn.softmax(joint_predict, -1)
        seg_predict = predictions_seg[stack]
     #  seg_predict = tf.nn.softmax(seg_predict, -1)


     #   loss_joint = dice_loss(joint_gt_ph, joint_predict)
     #   loss_seg = dice_loss(seg_gt_ph, seg_predict)
        loss_joint = tf.losses.mean_squared_error(joint_gt_ph , joint_predict)
        loss_seg = tf.losses.mean_squared_error(seg_gt_ph , seg_predict)
        
        
        
        losses_joint += loss_joint
        losses_seg += loss_seg
    

    loss = losses_joint + losses_seg   
    loss /= batch_size
    loss /= nstack
    
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = tf.train.AdamOptimizer(0.0005).minimize(loss)
    
    tf.summary.scalar('loss_joint', loss_joint)
    tf.summary.scalar('loss_seg', loss_seg)

    merged = tf.summary.merge_all()

    saver = tf.train.Saver()
    
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
Instructions for updating:
Use tf.cast instead.


In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
step = 0
try:
    saver.restore(sess, tf.train.latest_checkpoint(save_dir))
    with open(save_dir + '/checkpoint') as f:
        model_checkpoint_path = f.readline()
        global_step = int(re.search(r'\d+', model_checkpoint_path).group())
    step = global_step

except:
    print("no pre-trained model")   

writer_train = tf.summary.FileWriter("./log/train", sess.graph)
writer_val = tf.summary.FileWriter("./log/val", sess.graph)

print('step', step)

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./model/model-400000
no pre-trained model


In [None]:
while(True):
    GT_imgNseg = sess.run(one_element)
    GT_imgNseg_val = sess.run(one_element_val)
    _, train_summary, loss_ = sess.run([train_op, merged, loss], feed_dict={img_ph:GT_imgNseg[0], joint_gt_ph:GT_imgNseg[1], seg_gt_ph:GT_imgNseg[2], is_train:True})
    val_summary = sess.run(merged, feed_dict={img_ph:GT_imgNseg_val[0], joint_gt_ph:GT_imgNseg_val[1], seg_gt_ph:GT_imgNseg_val[2], is_train:True})
    print(loss_)
    writer_train.add_summary(train_summary, step)
    writer_train.flush()
    writer_val.add_summary(val_summary, step)
    writer_val.flush()
    step+=1
    if step % result_save_freq == 0 or step < 3:
        joint_predicted, seg_predicted = sess.run([joint_predict, seg_predict], feed_dict={img_ph:GT_imgNseg[0], joint_gt_ph:GT_imgNseg[1], seg_gt_ph:GT_imgNseg[2], is_train:False})
        joint_predicted_val, seg_predicted_val = sess.run([joint_predict, seg_predict], feed_dict={img_ph:GT_imgNseg_val[0], joint_gt_ph:GT_imgNseg_val[1], seg_gt_ph:GT_imgNseg_val[2], is_train:False})

        save_train(vis_train_dir, GT_imgNseg[0], GT_imgNseg[3], GT_imgNseg[2], joint_predicted, seg_predicted, step)
        save_train(vis_val_dir, GT_imgNseg_val[0], GT_imgNseg_val[3], GT_imgNseg_val[2], joint_predicted_val, seg_predicted_val, step)

#          accuracy()

        print('step = ', step)
    if step % save_freq == 0:
        saver.save(sess, os.path.join(save_dir, 'model-' + str(step)))

    if step > iterations:
        break