In [1]:
import tensorflow as tf
import tensorlayer as tl
from tensorflow.contrib.keras import layers
from tensorflow.contrib import slim
import numpy as np
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

def weights_model(inputs, timesteps=101, dim=512, unit=128, emotion_embedding_dim=64, n_class=5):
    x = tf.unstack(inputs, axis=1) # len(x)=timestaps
    batch_norm_params = {
        'decay': 0.99,
        'epsilon': 0.001,
        'updates_collections': None,
        'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
    }
    xs = []
    for x_i in x:
        xs.append(tf.contrib.layers.fully_connected(x_i, int(dim/2), activation_fn=tf.nn.relu,\
                                            normalizer_fn=slim.batch_norm, \
                                            normalizer_params=batch_norm_params, \
                                            scope="fc_1", reuse=tf.AUTO_REUSE))
    x = tf.stack(xs,axis=1)
    x = tl.layers.InputLayer(x, name='lstm1_input')
    x = tl.layers.DynamicRNNLayer(layer=x, cell_fn = tf.contrib.rnn.BasicLSTMCell, n_hidden = unit,\
                                  return_last = False, return_seq_2d = False, name = 'lstm_1').outputs
    x = tf.reshape(x, (-1,timesteps,unit))

    # FC (W^h) for mapping the dim from unit to emotion_embedding_dim
    x = tf.unstack(x, axis=1)
    xs = []
    for x_i in x:
        xs.append(tf.contrib.layers.fully_connected(x_i, emotion_embedding_dim, activation_fn=tf.nn.relu,\
                                            normalizer_fn=slim.batch_norm, \
                                            normalizer_params=batch_norm_params, \
                                            scope="fc_W", reuse=tf.AUTO_REUSE))
    
    x = xs
    # FC (e={e_1,e_2,...}) for embedding mapping from emotion_embedding_dim to n_class
    xs = []
    for x_i in x:
        xs.append(tf.contrib.layers.fully_connected(x_i, n_class, activation_fn=tf.nn.softmax,\
                                            scope="fc_E", reuse=tf.AUTO_REUSE))
    x = tf.stack(xs,axis=1)

    return x

def attention_model(inputs, f, timesteps=101, dim=512, unit=128, n_class=5):
    x = tf.unstack(inputs, axis=1) # len(x)=timestaps
    batch_norm_params = {
        'decay': 0.99,
        'epsilon': 0.001,
        'updates_collections': None,
        'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
    }
    xs = []
    for x_i in x:
        xs.append(tf.contrib.layers.fully_connected(x_i, int(dim/2), activation_fn=tf.nn.relu,\
                                            normalizer_fn=slim.batch_norm, \
                                            normalizer_params=batch_norm_params, \
                                            scope="fc_2", reuse=tf.AUTO_REUSE))
    x = tf.stack(xs,axis=1)
    x = tl.layers.InputLayer(x, name='lstm2_input')
    x = tl.layers.DynamicRNNLayer(layer=x, cell_fn = tf.contrib.rnn.BasicLSTMCell, n_hidden = unit,\
                                  return_last = False, return_seq_2d = False, name = 'lstm_2').outputs
    x = tf.reshape(x, (-1,timesteps,unit))
    # f: weights (batch_size, timestep , n_class)
    x = tf.matmul(f, x, transpose_a=True) # n_class * units
    x = tf.unstack(x, axis=1) # len(x)=n_class
    xs = []
    for x_i in x:
        xs.append(layers.Dense(1)(x_i))
    x = tf.stack(xs,axis=1)
    x = tf.reshape(x, (-1,n_class))
    x = tf.nn.softmax(x)

    return x

# def LSTM_model(inputs, input_dim=512, output_dim=256, unit=128, n_class=5):
#     x = tf.unstack(inputs, axis=1) # len(x)=timestaps
#     batch_norm_params = {
#         # Decay for the moving averages.
#         'decay': 0.99,
#         # epsilon to prevent 0s in variance.
#         'epsilon': 0.001,
#         # force in-place updates of mean and variance estimates
#         'updates_collections': None,
#         # Moving averages ends up in the trainable variables collection
#         'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
#     }
#     xs = []
#     for x_i in x:
#         xs.append(tf.contrib.layers.fully_connected(x_i, output_dim, activation_fn=tf.nn.relu,\
#                                             normalizer_fn=slim.batch_norm, \
#                                             normalizer_params=batch_norm_params, \
#                                             scope="fc256", reuse=tf.AUTO_REUSE))
#     x = tf.stack(xs,axis=1)
#     x = tl.layers.InputLayer(x, name='input_for_lstm')
#     x = tl.layers.DynamicRNNLayer(layer=x, cell_fn = tf.contrib.rnn.BasicLSTMCell, n_hidden = unit,\
#                                   return_last = True, return_seq_2d = True, name = 'lstm3').outputs
#     x = layers.Dense(n_class,activation='softmax')(x)
    
#     return x
    
dim=512
timesteps=141
n_class=5
sess=tf.InteractiveSession()
inputs = tf.placeholder(tf.float32, [None, timesteps, dim])
print('input shape:',inputs.shape)
y = tf.placeholder(tf.int64, [None, 1])
y_onehot = tf.reshape(tf.one_hot(y, n_class),(-1,n_class))
y_onehot_stacked = tf.reshape(tf.stack([y_onehot]*timesteps,axis=1),(-1,timesteps,n_class))

# model & loss
f_weights=weights_model(inputs, timesteps=timesteps, dim=dim, unit=128, emotion_embedding_dim=256, n_class=5)
outputs=attention_model(inputs, f_weights, timesteps=timesteps, dim=dim, unit=128, n_class=5)
# lstm_outputs=LSTM_model(inputs)
print('f_weights shape:',f_weights.shape)
print('outputs shape:',outputs.shape)
# print('lstm_outputs shape:',lstm_outputs.shape)
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=f_weights, labels=y_onehot_stacked))
loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=y_onehot))
# loss3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=lstm_outputs, labels=y_onehot))
# tf.summary.scalar('loss', softmax_loss)

# cal acc
result=tf.argmax(outputs,1)
ground_truth = tf.reshape(y, [-1])
correct_prediction = tf.equal(result, ground_truth)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# tf.summary.scalar('accuracy', accuracy)

# train
# train_step = tf.train.AdamOptimizer(lr).minimize(softmax_loss)
# learning_rate=tf.placeholder(tf.float32)
update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
#     train_op3 = tf.train.AdamOptimizer(0.001).minimize(loss3)
    # train_op1 = tf.train.MomentumOptimizer(learning_rate1, 0.9).minimize(loss1)
    # train_op2 = tf.train.MomentumOptimizer(learning_rate2, 0.9).minimize(loss2)
    train_op1 = tf.train.AdamOptimizer(0.0005).minimize(loss1)
    train_op2 = tf.train.AdamOptimizer(0.0005).minimize(loss2)

  return f(*args, **kwds)


input shape: (?, 141, 512)
  [TL] InputLayer  lstm1_input: (?, 141, 256)
  [TL] DynamicRNNLayer lstm_1: n_hidden:128, in_dim:3 in_shape:(?, 141, 256) cell_fn:BasicLSTMCell dropout:None n_layer:1
       non specified batch_size, uses a tensor instead.
  [TL] InputLayer  lstm2_input: (?, 141, 256)
  [TL] DynamicRNNLayer lstm_2: n_hidden:128, in_dim:3 in_shape:(?, 141, 256) cell_fn:BasicLSTMCell dropout:None n_layer:1
       non specified batch_size, uses a tensor instead.
f_weights shape: (?, 141, 5)
outputs shape: (?, 5)


In [2]:
import pandas as pd
# # CASME2

# file_head='CASME2/CASME2_vgg16_feature/'
# def cat(x,file_head):
#     if x[0]<10:
#         return file_head+'sub0'+str(x[0])+'/'+x[1]+'.csv'
#     else:
#         return file_head+'sub'+str(x[0])+'/'+x[1]+'.csv'

# label_path='CASME2-ObjectiveClasses.xlsx'
# data_labels=pd.read_excel(label_path)
# data_labels.head()
# name_list=data_labels[['Subject','Filename']].values
# label_list=data_labels['Objective Class'].values
# name_list=np.array(list(map(lambda x:cat(x,file_head),name_list)))
# index_useful=np.where(label_list<=5)[0] # 去除label6 label7
# label_list=label_list[index_useful]
# name_list=name_list[index_useful]
# label_list=label_list-1
# X_train=name_list
# y_train=label_list
# print('X_train:',X_train.shape)
# print('y_train:',y_train.shape)

# SAMM
data_path='SAMM_vggface_vgg16_feature/All/'
data_labels=os.listdir(data_path)
#去除label6 label7
data_labels.remove('6')
data_labels.remove('7')

people={}
X_train=[]
y_train=[]
for l in data_labels:
    list_temp=os.listdir(data_path+l+'/')
    for j in list_temp:
        X_train.append(data_path+l+'/'+j)
        y_train.append(int(l)-1)
        if j.split('_')[0] not in people:
            people[j.split('_')[0]]=[int(l)]
        elif int(l) not in people[j.split('_')[0]]:
            people[j.split('_')[0]].append(int(l))          
X_train=np.array(X_train)
y_train=np.array(y_train)
print('X_train:',X_train.shape)
print('y_train:',y_train.shape)

X_train: (68,)
y_train: (68,)


In [4]:
n_epochs = 300
batch_size = 8
n_batch = int(X_train.shape[0] / batch_size)
sess.run(tf.global_variables_initializer())
lr=0.0005
print('train for f_weights...')
for epoch in range(n_epochs):
#     if epoch%100==0 and epoch!=0:
#         lr*=0.5
    # training step
    index = np.arange(X_train.shape[0])
    np.random.shuffle(index)
    X_train = X_train[index]
    y_train = y_train[index]
    sum_loss = 0
    last_train_str = ""
    for i in range(n_batch):
        x_path = X_train[i * batch_size:(i + 1) * batch_size]
        x_temp = []
        for path in x_path:
            x_i= pd.read_csv(path)
            x_i.pop('id')
            x_temp.append(x_i.values)
        x_temp = np.reshape(x_temp, (batch_size, timesteps, dim))
        y_temp = y_train[i * batch_size:(i + 1) * batch_size]
        y_temp = np.reshape(y_temp, (batch_size, 1))
        feed_dict = {inputs: x_temp, y: y_temp}
        _, loss_value = sess.run([train_op1, loss1], feed_dict=feed_dict)
        sum_loss += loss_value
        last_train_str = "\r[epoch:%d/%d, steps:%d/%d] -loss: %.4f" % \
                         (epoch + 1, n_epochs, i + 1, n_batch, sum_loss / (i + 1))
        print(last_train_str, end='      ', flush=True)
    print('\n')

print('train for the attention model...')
lr=0.0005
for epoch in range(n_epochs):
    if epoch%10==0 and epoch!=0:
        lr*=0.5
    index = np.arange(X_train.shape[0])
    np.random.shuffle(index)
    X_train = X_train[index]
    y_train = y_train[index]
    sum_loss = 0
    sum_acc = 0
    last_train_str = ""
    for i in range(n_batch):
        x_path = X_train[i * batch_size:(i + 1) * batch_size]
        x_temp = []
        for path in x_path:
            x_i= pd.read_csv(path)
            x_i.pop('id')
            x_temp.append(x_i.values)
        x_temp = np.reshape(x_temp, (batch_size, timesteps, dim))
        y_temp = y_train[i * batch_size:(i + 1) * batch_size]
        y_temp = np.reshape(y_temp, (batch_size, 1))
        feed_dict = {inputs: x_temp, y: y_temp}
        _, loss_value, acc_value = sess.run([train_op2, loss2, accuracy], feed_dict=feed_dict)
        sum_loss += loss_value
        sum_acc += (acc_value*100)
        last_train_str = "\r[epoch:%d/%d, steps:%d/%d] -loss: %.4f - acc: %.2f%%" % \
                         (epoch + 1, n_epochs, i + 1, n_batch, sum_loss / (i + 1), sum_acc / (i + 1))
        print(last_train_str, end='      ', flush=True)
    print('\n')
    
# print('train for the simple lstm model...')
# lr=0.01
# for epoch in range(n_epochs):
# #     if epoch%10==0 and epoch!=0:
# #         lr*=0.5
#     index = np.arange(X_train.shape[0])
#     np.random.shuffle(index)
#     X_train = X_train[index]
#     y_train = y_train[index]
#     sum_loss = 0
#     sum_acc = 0
#     last_train_str = ""
#     for i in range(n_batch):
#         x_path = X_train[i * batch_size:(i + 1) * batch_size]
#         x_temp = []
#         for path in x_path:
#             x_i= pd.read_csv(path)
#             x_i.pop('id')
#             x_temp.append(x_i.values)
#         x_temp = np.reshape(x_temp, (batch_size, timesteps, dim))
#         y_temp = y_train[i * batch_size:(i + 1) * batch_size]
#         y_temp = np.reshape(y_temp, (batch_size, 1))
#         feed_dict = {inputs: x_temp, y: y_temp}
#         _, loss_value, acc_value = sess.run([train_op3, loss3, accuracy], feed_dict=feed_dict)
#         sum_loss += loss_value
#         sum_acc += (acc_value*100)
#         last_train_str = "\r[epoch:%d/%d, steps:%d/%d] -loss: %.4f - acc: %.2f%%" % \
#                          (epoch + 1, n_epochs, i + 1, n_batch, sum_loss / (i + 1), sum_acc / (i + 1))
#         print(last_train_str, end='      ', flush=True)
#     print('\n')

train for f_weights...
[epoch:1/300, steps:8/8] -loss: 1.5856      

[epoch:2/300, steps:8/8] -loss: 1.4999      

[epoch:3/300, steps:8/8] -loss: 1.4749      

[epoch:4/300, steps:8/8] -loss: 1.4295      

[epoch:5/300, steps:8/8] -loss: 1.3779      

[epoch:6/300, steps:8/8] -loss: 1.3458      

[epoch:7/300, steps:8/8] -loss: 1.3198      

[epoch:8/300, steps:8/8] -loss: 1.3317      

[epoch:9/300, steps:8/8] -loss: 1.2894      

[epoch:10/300, steps:8/8] -loss: 1.3855      

[epoch:11/300, steps:8/8] -loss: 1.4014      

[epoch:12/300, steps:8/8] -loss: 1.2994      

[epoch:13/300, steps:8/8] -loss: 1.2839      

[epoch:14/300, steps:8/8] -loss: 1.2943      

[epoch:15/300, steps:8/8] -loss: 1.3090      

[epoch:16/300, steps:8/8] -loss: 1.3021      

[epoch:17/300, steps:8/8] -loss: 1.2491      

[epoch:18/300, steps:8/8] -loss: 1.2732      

[epoch:19/300, steps:8/8] -loss: 1.2566      

[epoch:20/300, steps:8/8] -loss: 1.2648      

[epoch:21/300, steps:8/8] -loss: 1.2515      


[epoch:173/300, steps:8/8] -loss: 1.0491      

[epoch:174/300, steps:8/8] -loss: 1.1105      

[epoch:175/300, steps:8/8] -loss: 1.0790      

[epoch:176/300, steps:8/8] -loss: 1.0725      

[epoch:177/300, steps:8/8] -loss: 1.0936      

[epoch:178/300, steps:8/8] -loss: 1.1038      

[epoch:179/300, steps:8/8] -loss: 1.0792      

[epoch:180/300, steps:8/8] -loss: 1.0828      

[epoch:181/300, steps:8/8] -loss: 1.1016      

[epoch:182/300, steps:8/8] -loss: 1.0577      

[epoch:183/300, steps:8/8] -loss: 1.0689      

[epoch:184/300, steps:8/8] -loss: 1.1350      

[epoch:185/300, steps:8/8] -loss: 1.0743      

[epoch:186/300, steps:8/8] -loss: 1.0362      

[epoch:187/300, steps:8/8] -loss: 1.1045      

[epoch:188/300, steps:8/8] -loss: 1.0812      

[epoch:189/300, steps:8/8] -loss: 1.0913      

[epoch:190/300, steps:8/8] -loss: 1.0982      

[epoch:191/300, steps:8/8] -loss: 1.1457      

[epoch:192/300, steps:8/8] -loss: 1.1681      

[epoch:193/300, steps:8/8] -loss: 1.1271

[epoch:34/300, steps:8/8] -loss: 1.4282 - acc: 46.88%      

[epoch:35/300, steps:8/8] -loss: 1.3868 - acc: 50.00%      

[epoch:36/300, steps:8/8] -loss: 1.3862 - acc: 51.56%      

[epoch:37/300, steps:8/8] -loss: 1.4033 - acc: 48.44%      

[epoch:38/300, steps:8/8] -loss: 1.3934 - acc: 51.56%      

[epoch:39/300, steps:8/8] -loss: 1.4096 - acc: 48.44%      

[epoch:40/300, steps:8/8] -loss: 1.3699 - acc: 51.56%      

[epoch:41/300, steps:8/8] -loss: 1.3659 - acc: 51.56%      

[epoch:42/300, steps:8/8] -loss: 1.4094 - acc: 50.00%      

[epoch:43/300, steps:8/8] -loss: 1.3614 - acc: 53.12%      

[epoch:44/300, steps:8/8] -loss: 1.4254 - acc: 46.88%      

[epoch:45/300, steps:8/8] -loss: 1.4039 - acc: 51.56%      

[epoch:46/300, steps:8/8] -loss: 1.3745 - acc: 51.56%      

[epoch:47/300, steps:8/8] -loss: 1.4191 - acc: 46.88%      

[epoch:48/300, steps:8/8] -loss: 1.3875 - acc: 51.56%      

[epoch:49/300, steps:8/8] -loss: 1.3956 - acc: 50.00%      

[epoch:50/300, steps:8/8

[epoch:167/300, steps:8/8] -loss: 1.4297 - acc: 46.88%      

[epoch:168/300, steps:8/8] -loss: 1.4158 - acc: 48.44%      

[epoch:169/300, steps:8/8] -loss: 1.4304 - acc: 48.44%      

[epoch:170/300, steps:8/8] -loss: 1.4510 - acc: 45.31%      

[epoch:171/300, steps:8/8] -loss: 1.4823 - acc: 42.19%      

[epoch:172/300, steps:8/8] -loss: 1.5428 - acc: 35.94%      

[epoch:173/300, steps:8/8] -loss: 1.4647 - acc: 43.75%      

[epoch:174/300, steps:8/8] -loss: 1.5067 - acc: 39.06%      

[epoch:175/300, steps:8/8] -loss: 1.5431 - acc: 35.94%      

[epoch:176/300, steps:8/8] -loss: 1.4952 - acc: 40.62%      

[epoch:177/300, steps:8/8] -loss: 1.5706 - acc: 32.81%      

[epoch:178/300, steps:8/8] -loss: 1.5084 - acc: 39.06%      

[epoch:179/300, steps:8/8] -loss: 1.4962 - acc: 40.62%      

[epoch:180/300, steps:8/8] -loss: 1.5579 - acc: 34.38%      

[epoch:181/300, steps:8/8] -loss: 1.4327 - acc: 46.88%      

[epoch:182/300, steps:8/8] -loss: 1.4526 - acc: 45.31%      

[epoch:1

[epoch:299/300, steps:8/8] -loss: 1.4644 - acc: 43.75%      

[epoch:300/300, steps:8/8] -loss: 1.4646 - acc: 42.19%      

