In [None]:
"""
merge openface, openpose, and label data per frame
 * based adjusted openface "frame" (openface data have multi-person and missing frame)
 * creating "Training", "Validation" subfolder in output folder
 * filename of openface: <vidoe name>.csv
 * filename of openpose: <vidoe name>_openpose.csv
"""

import numpy as np
import pandas as pd
import glob
import re
import datetime
import os
import pathlib
from pathlib import Path

In [None]:
# root folder
dir_submit = str(Path().resolve())
dir_base = str(Path(Path().resolve()).parent) + "\\base_data"

# openface, openpose, resnet folder
dir_of = dir_base + "\\OpenFace\\"
dir_op = dir_base + "\\OpenPose\\"
dir_rn = dir_base + "\\Resnet\\"

# VA, EXP test set name folder
dir_test_set = dir_base + "\\test_set\\"

# test set name file
file_test_va = dir_test_set + "va_test_set.txt"
file_test_exp = dir_test_set + "expression_test_set.txt"

# input standardization parameter folder
dir_norm = dir_base + "\\Merged_with_resnet\\Norm\\"

# frame count folder  #sub "AU", "VA", "EXP" 
dir_count = dir_base + "\\Frame_Count\\"

# output dataset folder
dir_out_va = dir_base + "\\Merged_with_resnet\\Merged_VA\\Test\\"
# create output folder
if os.path.isdir(dir_out_va) == False:
    os.makedirs(dir_out_va)

dir_out_exp = dir_base + "\\Merged_with_resnet\\Merged_EXP\\Test\\"
# create output folder
if os.path.isdir(dir_out_exp) == False:
    os.makedirs(dir_out_exp)

# exclude file name (with out "file_exc") *exclude multi-person

file_count = dir_count + "*.csv"
files_count = [
    filename for filename in sorted(glob.glob(file_count))
]
log = "file number of count: {0}".format(len(files_count))
print(log)


In [None]:
# read test name
df_name_va = pd.read_csv(file_test_va, header=None)
df_name_exp = pd.read_csv(file_test_exp, header=None)
print(df_name_va)
print(df_name_exp)

In [None]:
# calculate standardization parameter
def get_standardize_param(dir_norm_param):
    of_mean = pd.read_hdf(dir_norm_param + "raw_mean_of.h5","key")
    of_std = pd.read_hdf(dir_norm_param + "raw_std_of.h5","key")
    op_mean = pd.read_hdf(dir_norm_param + "raw_mean_op.h5","key")
    op_std = pd.read_hdf(dir_norm_param + "raw_std_op.h5","key")
    rn_mean = pd.read_hdf(dir_norm_param + "raw_mean_rn.h5","key")
    rn_std = pd.read_hdf(dir_norm_param + "raw_std_rn.h5","key")
    
    
    mean_data = of_mean.append(op_mean)
    mean_data = mean_data.append(rn_mean)
    mean_data = mean_data.reset_index()
    std_data  = of_std.append(op_std)
    std_data  = std_data.append(rn_std)
    std_data = std_data.reset_index()
    
    
    return mean_data, std_data

In [None]:
mean_data, std_data = get_standardize_param(dir_norm)

print(mean_data)
print(std_data)

In [None]:
# merge openface, openpose and label
def merge_of_data(data_names,dir_count, dir_of, dir_op, dir_rn, dir_out, param_mean, param_std, str_type):
    count = 0
    max_count = len(data_names)    
    if len(data_names) <1:
        print("test files name are not found")
        data_merge = pd.DataFrame()
        return
    
    names = data_names.iloc[:,0].values

    for name in names:
        
        # set save file mame
        file_out = dir_out + name + ".h5"

        # read openface data, delete duplicated frame, set index based on "frame"
        f_of = dir_of + name + ".csv"
        data_of = pd.read_csv(f_of)
        data_of = data_of.drop_duplicates(["frame"])
        
        # read openpose data, delete duplicated frame, set frame column based on "Unnamed: 0"+1
        f_op = dir_op + name + "_openpose.csv"
        data_op = pd.read_csv(f_op)
        data_op = data_op.drop_duplicates(["Unnamed: 0"])
        data_op["frame"] = data_op["Unnamed: 0"]+1
        
        f_rn = dir_rn + name + "_resnet50.h5"
        data_rn = pd.read_hdf(f_rn).iloc[:,0:201]
               
        f_count = dir_count + name + ".csv"
        data_count = pd.read_csv(f_count)
        
        # join data openface, openpose df_a.merge(df_b, on='mukey', how='left')
        data_tmp = data_count.merge(data_of, on='frame', how='left')
        data_tmp = data_tmp.merge(data_op, on='frame', how='left')
        data_tmp = data_tmp.merge(data_rn, on='frame', how='left')
        #data_tmp = data_tmp.fillna(0)
        #data_tmp = data_tmp[data_tmp["frame"]>0]
        data_tmp = data_tmp.reset_index(drop=True)
        #data_tmp["frame"] = data_tmp.index + 1
        
        #print(data_tmp)
        
        # standardize *** 
        col_len = len(data_tmp.columns)
        for col in range(col_len):
            if (col >= 5) & (col <= 35):
                data_tmp.iloc[:,col] = (data_tmp.iloc[:,col] - param_mean.iloc[col,1]) / param_std.iloc[col,1]
            elif (col >= 56) & (col <= 130):
                data_tmp.iloc[:,col] = (data_tmp.iloc[:,col] - param_mean.iloc[col,1]) / param_std.iloc[col,1]
            elif (col >= 131):
                data_tmp.iloc[:,col] = (data_tmp.iloc[:,col] - param_mean.iloc[col,1]) / param_std.iloc[col,1]
        #data_of = (data_of - data_of.mean()) / data_x.std()
        
        #f_op = files_of[i].replace(".csv","") + "_openpose.csv"
        name_of = os.path.splitext(os.path.basename(f_of))[0]
        name_op = os.path.splitext(os.path.basename(f_op))[0].replace("_openpose", "")
        name_rn = os.path.splitext(os.path.basename(f_rn))[0].replace("_resnet50", "")
        
        # save merged file
        #data_merge.to_csv(file_out, index=False, float_format='%.6g')
        data_tmp.to_hdf(file_out, key='key', mode="w", complevel=5, complib="lzo")
        log = "{0}/{1}: {2}, {3}, {4}".format(count+1, max_count, name_of, name_op, name_rn)
        print(log)
        count = count + 1
        
    log = "** finished **"
    print(log)


In [None]:
# create and save merge data "VA Test"
merge_of_data(df_name_va, dir_count, dir_of, dir_op, dir_rn, dir_out_va, mean_data, std_data, "VA")

# create and save merge data "EXP Test"
merge_of_data(df_name_exp, dir_count, dir_of, dir_op, dir_rn, dir_out_exp, mean_data, std_data, "EXP")
