In [None]:
"""
evaluate multi time-window model taht is fusion model of multi target models
 * select type: "EXP", "VA_V", "VA_A"
 * select sub1, sub2 type: "EXP", "VA_V", "VA_A"
 * evaluate validation per frame
"""
import numpy as np
import pandas as pd
import glob
import sklearn #機械学習のライブラリ
import lightgbm as lgb
from sklearn.metrics import accuracy_score,mean_squared_error,f1_score
from statistics import mean, median,variance,stdev
import math
from scipy import stats
import pickle
import os
import pathlib
from pathlib import Path

In [None]:
# create base dataset: concat au csv & add label, file count & drop unnecessary columns
#   str_substract_time: ex. '02s' , str_search_key: ex. '(Subject_).*.csv'
#   cut_start: trim data (X sec from start), cut_end: trim data (X sec from end)
def crate_base_data(data_file_names, str_type, str_time):
    # create empty dataframe (base dataframe)
    #data = pd.DataFrame()
    count = 0
    max_count = len(data_file_names)
    
    data_list = [pd.DataFrame()] # <- dummy
    
    for  data_file in data_file_names:
        # read au csv
        if os.path.isfile(data_file) and os.path.getsize(data_file) > 32:
            #print(os.path.getsize(data_file))
            #data_tmp = pd.read_csv(data_file)
            data_tmp = pd.read_hdf(data_file)
        else:
            count = count+1
            continue
        
        if (len(data_tmp)<1):
            count = count+1
            continue
        
        # create column - 'count', 'Label', 'subject' (default: 0)
        data_tmp["count"] = 0
        data_tmp["subject"] = "sample"

        # convert filename to 'subject'
        name_train = os.path.splitext(os.path.basename(data_file))[0].replace(str_time,'')
        #print(name_train)

        #print(data_temp)
        # get and set Label value
        data_tmp["count"]  = count
        data_tmp["subject"] = name_train
        
        # drop unnecessary columns
        # ' frame-avg',' face_id-avg,' timestamp-avg',' confidence-avg,' success-avg','frame-std',' face_id-std',' confidence-std',' success-std'
        data_tmp = data_tmp.drop(['frame-avg',' face_id-avg',' timestamp-avg',' confidence-avg',' success-avg',
                                  'frame-std',' face_id-std',' timestamp-std',' confidence-std',' success-std',
                                  'frame-range', ' face_id-range', ' timestamp-range', ' confidence-range', ' success-range',
                                  'frame-slope', ' face_id-slope', ' timestamp-slope', ' confidence-slope', ' success-slope',
                                  'Unnamed: 0-avg', 'Unnamed: 0-std', 'Unnamed: 0-range', 'Unnamed: 0-slope'
                               ], axis=1)
        if str_type == "EXP":
            data_tmp = data_tmp.drop(['Neutral-std','Neutral-range','Neutral-slope'], axis=1)
        else:
            data_tmp = data_tmp.drop(['arousal-std', 'arousal-range', 'arousal-slope', 
                                     'valence-std', 'valence-range', 'valence-slope'], axis=1)

        # append created data to base dataframe
        #data = data.append(data_tmp)
        data_list.append(data_tmp)

        log = 'count: {0}, name: {1}, data shape: {2}'.format(count, name_train, data_tmp.shape)
        print(log)
        count = count + 1
    # finish
    del data_list[0]
    data = pd.concat([x for x in data_list])
    
    log = '**** finished creating base dataset, data shape: {0}'.format(data.shape)
    print(log)
    
    return data


In [None]:
def load_model(file_model):
    with open(file_model, mode='rb') as fp:
        model = pickle.load(fp)
    return model

In [None]:
# split base data to <au>, <gaze and pose>, <eye_landmark, 2d landmark, 3d landmark>
# ** 'count','label','subject' is contained in all splits
def split_data_no_label(in_data):
    # au data
    df_au = in_data.loc[:, in_data.columns.str.contains("AU") ]
    #df_au = df_au.join(df_lable)
    print("AU data shape: ",df_au.shape)

    # gaze and pose data **** temp pose
    df_pose = in_data.loc[:, in_data.columns.str.contains("pose_") ]
    #df_pose = df_pose.join(df_lable)
    print("Gaze & Pose data shape: ",df_pose.shape)
    
    # eye_landmark, 2d landmark, 3d landmark data **** temp gaze
    df_lmk = in_data.loc[:, in_data.columns.str.contains("gaze")]
    #df_lmk = df_lmk.join(df_lable)
    print("Landmark data shape: ",df_lmk.shape)
    
    # openpose
    #df_op = in_data.loc[:, ~in_data.columns.str.contains("AU|pose_|gaze")]
    df_op = in_data.loc[:, in_data.columns.str.contains("hand_flag|0x|0y|0c|1x|1y|1c|2x|2y|2c|3x|3y|3c|4x|4y|4c|5x|5y|5c|6x|6y|6c|7x|7y|7c|8x|8y|8c|9x|9y|9c|10x|10y|10c|11x|11y|11c|12x|12y|12c|13x|13y|13c|14x|14y|14c|15x|15y|15c|16x|16y|16c|17x|17y|17c|18x|18y|18c|19x|19y|19c|20x|20y|20c|21x|21y|21c|22x|22y|22c|23x|23y|23c|24x|24y|24c")]
    print("Opepose data shape: ",df_op.shape)
    
    # resnet
    df_rn = in_data.loc[:, ~in_data.columns.str.contains("AU|pose_|gaze|hand_flag|0x|0y|0c|1x|1y|1c|2x|2y|2c|3x|3y|3c|4x|4y|4c|5x|5y|5c|6x|6y|6c|7x|7y|7c|8x|8y|8c|9x|9y|9c|10x|10y|10c|11x|11y|11c|12x|12y|12c|13x|13y|13c|14x|14y|14c|15x|15y|15c|16x|16y|16c|17x|17y|17c|18x|18y|18c|19x|19y|19c|20x|20y|20c|21x|21y|21c|22x|22y|22c|23x|23y|23c|24x|24y|24c|count|subject|Neutral|valence|arousal")]
    print("Resnet data shape: ",df_rn.shape)
    
    print("** end **")
    return df_au,df_pose,df_lmk,df_op, df_rn
    

In [None]:
# predict
def calc_imprtances(data_val, models, str_type, window_time):
    log = "split data to AU ,pose, gaze, openpose"
    print(log)
    val_au, val_pose, val_lmk, val_op, val_rn = split_data_no_label(data_val)

    col_au = list(val_au.columns)
    col_pose = list(val_pose.columns)
    col_lmk = list(val_lmk.columns)
    col_op = list(val_op.columns)
    col_rn = list(val_rn.columns)

    str_col = str(window_time).zfill(2) + "s"
    importance_au = pd.DataFrame(models[0].feature_importance(), index=col_au, columns=[str_col])
    importance_pose = pd.DataFrame(models[1].feature_importance(), index=col_pose, columns=[str_col])
    importance_lmk = pd.DataFrame(models[2].feature_importance(), index=col_lmk, columns=[str_col])
    importance_op = pd.DataFrame(models[3].feature_importance(), index=col_op, columns=[str_col])
    importance_rn = pd.DataFrame(models[4].feature_importance(), index=col_rn, columns=[str_col])
    
    return importance_au, importance_pose, importance_lmk, importance_op, importance_rn

In [None]:
def read_models(dir_model, str_type, window_time):
    ext = "_{0}s.pickle".format(str(window_time).zfill(2))
    
    model_au = load_model(dir_model + "model_au_gbm_" + str_type + ext)
    model_pose = load_model(dir_model + "model_pose_gbm_" + str_type + ext)
    model_lmk = load_model(dir_model + "model_lmk_gbm_" + str_type + ext)
    model_op = load_model(dir_model + "model_op_gbm_" + str_type + ext)
    model_rn = load_model(dir_model + "model_rn_gbm_" + str_type + ext)
    model_ens = load_model(dir_model + "model_ens_gbm_" + str_type + ext)
    
    models = [model_au, model_pose, model_lmk, model_op, model_rn, model_ens]
    
    return models

In [None]:
def run_get_importances(dir_validation, dir_model_01s, dir_model_06s, dir_model_12s, dir_out):
    str_type = "VA_A"
    str_type_sub1 = "VA_V"
    str_type_sub2 = "EXP"
    
    # read models
    model_01s = read_models(dir_model_01s, str_type, 1)
    model_06s = read_models(dir_model_06s, str_type, 6)
    model_12s = read_models(dir_model_12s, str_type, 12)
    
    model_01s_sub1 = read_models(dir_model_01s, str_type_sub1, 1)
    model_06s_sub1 = read_models(dir_model_06s, str_type_sub1, 6)
    model_12s_sub1 = read_models(dir_model_12s, str_type_sub1, 12)
    
    model_01s_sub2 = read_models(dir_model_01s, str_type_sub2, 1)
    model_06s_sub2 = read_models(dir_model_06s, str_type_sub2, 6)
    model_12s_sub2 = read_models(dir_model_12s, str_type_sub2, 12)
    
    # search files of validation data
    file_val = dir_validation + "*_01s.h5"
    files_val_01s = [
        filename for filename in sorted(glob.glob(file_val))
    ]
    log = "file number of val 01s: {0}".format(len(files_val_01s))
    print(log)
    files_val_01s = files_val_01s[0:2]

    file_val = dir_validation + "*_06s.h5"
    files_val_06s = [
        filename for filename in sorted(glob.glob(file_val))
    ]
    log = "file number of val 06s: {0}".format(len(files_val_06s))
    print(log)
    files_val_06s = files_val_06s[0:2]

    file_val = dir_validation + "*_12s.h5"
    files_val_12s = [
        filename for filename in sorted(glob.glob(file_val))
    ]
    log = "file number of val 12s: {0}".format(len(files_val_12s))
    print(log)
    files_val_12s = files_val_12s[0:2]
    
    # create base dataset
    log = "data loading...."
    print(log)

    str_time = "_01s"
    #data_train = pd.read_hdf(file_train, 'key')
    data_val_01s = crate_base_data(files_val_01s, str_type, str_time)
    log = "data validation 01s shape: {0}".format(data_val_01s.shape)
    print(log)

    str_time = "_06s"
    data_val_06s = crate_base_data(files_val_06s, str_type, str_time)
    log = "data validation 06s shape: {0}".format(data_val_06s.shape)
    print(log)

    str_time = "_12s"
    data_val_12s = crate_base_data(files_val_12s, str_type, str_time)
    log = "data validation 12s shape: {0}".format(data_val_12s.shape)
    print(log)

    #data_val = pd.read_hdf(file_val, 'key')

    # create base dataset
    log = "finished data loading"
    print(log)
    
    # adjust data shape (same frame)
    log = "val data shape) 01s: {0}, 06s: {1}, 12s: {2}".format(data_val_01s.shape, data_val_06s.shape, data_val_12s.shape)
    print(log)

    length_columns = len(data_val_01s.columns)
    base_columns = data_val_01s.columns

    data_val_01s.columns = data_val_01s.columns + "_01s"
    data_val_06s.columns = data_val_06s.columns + "_06s"
    data_val_12s.columns = data_val_12s.columns + "_12s"

    data_val = pd.concat([data_val_01s, data_val_06s, data_val_12s], axis=1)
    if str_type == "EXP":
        data_val = data_val.loc[data_val["Neutral-avg_01s"]>=0]
        data_val = data_val.loc[data_val["Neutral-avg_06s"]>=0]
        data_val = data_val.loc[data_val["Neutral-avg_12s"]>=0]
    data_val = data_val.dropna(how='any')
    val_index = data_val.index

    data_val_01s = data_val.iloc[:,0:length_columns]
    data_val_06s = data_val.iloc[:,length_columns:length_columns*2]
    data_val_12s = data_val.iloc[:,length_columns*2:length_columns*3]

    data_val_01s.columns = base_columns
    data_val_06s.columns = base_columns
    data_val_12s.columns = base_columns

    log = "val data shape) 01s: {0}, 06s: {1}, 12s: {2}".format(data_val_01s.shape, data_val_06s.shape, data_val_12s.shape)
    print(log)

    # VA_A:
    # 01s
    window_time = 1
    au_1, pose_1, lmk_1, op_1, rn_1 = calc_imprtances(data_val_01s, model_01s, str_type, window_time)
    # 06s
    window_time = 6
    au_6, pose_6, lmk_6, op_6, rn_6 = calc_imprtances(data_val_06s, model_06s, str_type, window_time)
    # 12s
    window_time = 12
    au_12, pose_12, lmk_12, op_12, rn_12 = calc_imprtances(data_val_12s, model_12s, str_type, window_time)
    
    fp = dir_out + "importances_" + str_type + "_au.csv"
    au = pd.concat([au_1, au_6, au_12], axis=1)
    au.to_csv(fp)    
    fp = dir_out + "importances_" + str_type + "_pose.csv"
    pose = pd.concat([pose_1, pose_6, pose_12], axis=1)
    pose.to_csv(fp)    
    fp = dir_out + "importances_" + str_type + "_lmk.csv"
    lmk = pd.concat([lmk_1, lmk_6, lmk_12], axis=1)
    lmk.to_csv(fp)    
    fp = dir_out + "importances_" + str_type + "_op.csv"
    op = pd.concat([op_1, op_6, op_12], axis=1)
    op.to_csv(fp)    
    fp = dir_out + "importances_" + str_type + "_en.csv"
    rn = pd.concat([rn_1, rn_6, rn_12], axis=1)
    rn.to_csv(fp)
    
    # VA_V:
    # 01s
    window_time = 1
    au_1, pose_1, lmk_1, op_1, rn_1 = calc_imprtances(data_val_01s, model_01s_sub1, str_type_sub1, window_time)
    # 06s
    window_time = 6
    au_6, pose_6, lmk_6, op_6, rn_6 = calc_imprtances(data_val_06s, model_06s_sub1, str_type_sub1, window_time)
    # 12s
    window_time = 12
    au_12, pose_12, lmk_12, op_12, rn_12 = calc_imprtances(data_val_12s, model_12s_sub1, str_type_sub1, window_time)
    
    fp = dir_out + "importances_" + str_type_sub1 + "_au.csv"
    au = pd.concat([au_1, au_6, au_12], axis=1)
    au.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub1 + "_pose.csv"
    pose = pd.concat([pose_1, pose_6, pose_12], axis=1)
    pose.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub1 + "_lmk.csv"
    lmk = pd.concat([lmk_1, lmk_6, lmk_12], axis=1)
    lmk.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub1 + "_op.csv"
    op = pd.concat([op_1, op_6, op_12], axis=1)
    op.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub1 + "_rn.csv"
    rn = pd.concat([rn_1, rn_6, rn_12], axis=1)
    rn.to_csv(fp)    

    # EXP:
    # 01s
    window_time = 1
    au_1, pose_1, lmk_1, op_1, rn_1 = calc_imprtances(data_val_01s, model_01s_sub2, str_type_sub2, window_time)
    # 06s
    window_time = 6
    au_6, pose_6, lmk_6, op_6, rn_6 = calc_imprtances(data_val_06s, model_06s_sub2, str_type_sub2, window_time)
    # 12s
    window_time = 12
    au_12, pose_12, lmk_12, op_12, rn_12 = calc_imprtances(data_val_12s, model_12s_sub2, str_type_sub2, window_time)
    
    fp = dir_out + "importances_" + str_type_sub2 + "_au.csv"
    au = pd.concat([au_1, au_6, au_12], axis=1)
    au.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub2 + "_pose.csv"
    pose = pd.concat([pose_1, pose_6, pose_12], axis=1)
    pose.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub2 + "_lmk.csv"
    lmk = pd.concat([lmk_1, lmk_6, lmk_12], axis=1)
    lmk.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub2 + "_op.csv"
    op = pd.concat([op_1, op_6, op_12], axis=1)
    op.to_csv(fp)    
    fp = dir_out + "importances_" + str_type_sub2 + "_rn.csv"
    rn = pd.concat([rn_1, rn_6, rn_12], axis=1)
    rn.to_csv(fp)

    

In [None]:
# root folder
dir_submit = str(Path().resolve())
dir_base = str(Path(Path().resolve()).parent) + "\\base_data"

# merged data folder (va, exp : train, validation)
dir_data_va_val = dir_base + "\\Merged_with_resnet\\Merged_VA_roll\\Validation_Frame\\"
dir_data_exp_val = dir_base + "\\Merged_with_resnet\\Merged_EXP_roll\\Validation_Frame\\"

# set model folder: 01, 06, 12s window
dir_model_01s = dir_submit + "\\models\\t01\\"
dir_model_06s = dir_submit + "\\models\\t06\\"
dir_model_12s = dir_submit + "\\models\\t12\\"

# set output folder
dir_out = dir_submit + "\\models\\ensemble\\"
if os.path.isdir(dir_out) == False:
    os.makedirs(dir_out)

dir_data_val = dir_data_va_val
run_get_importances(dir_data_val, dir_model_01s, dir_model_06s, dir_model_12s, dir_out)

print("*** finished ***")
