# Import Libraries & Data

In [1]:
import os
import glob
import re
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_pickle('dolos_openface_merged.pkl')

# Aggregate all data again

In [3]:
# at first we used only the AU column -> no good results so we added features for pose and gaze as well
dataset.columns.values

array(['frame', 'face_id', 'timestamp', 'confidence', 'success',
       'gaze_0_x', 'gaze_0_y', 'gaze_0_z', 'gaze_1_x', 'gaze_1_y',
       'gaze_1_z', 'gaze_angle_x', 'gaze_angle_y', 'eye_lmk_x_0',
       'eye_lmk_x_1', 'eye_lmk_x_2', 'eye_lmk_x_3', 'eye_lmk_x_4',
       'eye_lmk_x_5', 'eye_lmk_x_6', 'eye_lmk_x_7', 'eye_lmk_x_8',
       'eye_lmk_x_9', 'eye_lmk_x_10', 'eye_lmk_x_11', 'eye_lmk_x_12',
       'eye_lmk_x_13', 'eye_lmk_x_14', 'eye_lmk_x_15', 'eye_lmk_x_16',
       'eye_lmk_x_17', 'eye_lmk_x_18', 'eye_lmk_x_19', 'eye_lmk_x_20',
       'eye_lmk_x_21', 'eye_lmk_x_22', 'eye_lmk_x_23', 'eye_lmk_x_24',
       'eye_lmk_x_25', 'eye_lmk_x_26', 'eye_lmk_x_27', 'eye_lmk_x_28',
       'eye_lmk_x_29', 'eye_lmk_x_30', 'eye_lmk_x_31', 'eye_lmk_x_32',
       'eye_lmk_x_33', 'eye_lmk_x_34', 'eye_lmk_x_35', 'eye_lmk_x_36',
       'eye_lmk_x_37', 'eye_lmk_x_38', 'eye_lmk_x_39', 'eye_lmk_x_40',
       'eye_lmk_x_41', 'eye_lmk_x_42', 'eye_lmk_x_43', 'eye_lmk_x_44',
       'eye_lmk_x_45', 'eye_lm

In [4]:
# keep only au, pose & gaze columns

au_cols = [c for c in dataset.columns if c.endswith("_r")]

pose_cols = [c for c in dataset.columns if c.startswith("pose_")]

gaze_cols = [c for c in dataset.columns if c.startswith("gaze_")]

final_columns = au_cols + pose_cols + pose_cols
final_columns

['AU01_r',
 'AU02_r',
 'AU04_r',
 'AU05_r',
 'AU06_r',
 'AU07_r',
 'AU09_r',
 'AU10_r',
 'AU12_r',
 'AU14_r',
 'AU15_r',
 'AU17_r',
 'AU20_r',
 'AU23_r',
 'AU25_r',
 'AU26_r',
 'AU45_r',
 'pose_Tx',
 'pose_Ty',
 'pose_Tz',
 'pose_Rx',
 'pose_Ry',
 'pose_Rz',
 'pose_Tx',
 'pose_Ty',
 'pose_Tz',
 'pose_Rx',
 'pose_Ry',
 'pose_Rz']

In [5]:
# compute aggregates (mean/max/std) per video

# compute  mean per video
video_mean = (dataset.groupby('video_id')[final_columns].mean())
video_mean

# compute AU mean per video
video_max = (dataset.groupby('video_id')[final_columns].max())
video_max

# compute AU mean per video
video_std = (dataset.groupby('video_id')[final_columns].std())
video_std

Unnamed: 0_level_0,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,AU09_r,AU10_r,AU12_r,AU14_r,...,pose_Tz,pose_Rx,pose_Ry,pose_Rz,pose_Tx,pose_Ty,pose_Tz,pose_Rx,pose_Ry,pose_Rz
video_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AN_WILTY_EP15_lie10,0.131243,0.085327,0.562402,0.033261,0.132124,0.181448,0.057571,0.446373,0.352630,0.328930,...,360.916228,0.084786,0.498067,0.071552,174.032103,69.130514,360.916228,0.084786,0.498067,0.071552
AN_WILTY_EP15_lie11,0.314456,0.219906,0.000000,0.130074,0.005639,0.000000,0.095572,0.297845,0.164786,0.574241,...,11.860857,0.060232,0.058292,0.065948,23.471155,9.319948,11.860857,0.060232,0.058292,0.065948
AN_WILTY_EP15_lie12,0.095647,0.070883,0.217000,0.079565,0.381883,0.461531,0.253215,0.538801,0.348212,0.470164,...,6.395445,0.051518,0.036717,0.035931,5.857887,4.081321,6.395445,0.051518,0.036717,0.035931
AN_WILTY_EP15_lie13,0.095652,0.076767,0.000000,0.048791,0.109656,0.129917,0.066048,0.419778,0.430428,0.564710,...,6.893409,0.031178,0.050602,0.050558,10.382623,4.785112,6.893409,0.031178,0.050602,0.050558
AN_WILTY_EP15_lie14,0.385574,0.348092,0.000000,0.113538,0.440128,0.313956,0.082497,0.566972,0.704785,0.606506,...,113.857880,0.070308,0.172690,0.041356,67.525139,20.701165,113.857880,0.070308,0.172690,0.041356
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YW_WILTY_EP70_truth5,0.147764,0.145458,0.101270,0.152342,0.292310,0.311208,0.064355,0.373886,0.475253,0.197343,...,4.847969,0.057337,0.063470,0.061649,10.083449,5.163165,4.847969,0.057337,0.063470,0.061649
YW_WILTY_EP70_truth6,0.527518,0.200851,0.521999,0.367288,0.429061,0.612170,0.167712,0.636275,0.572024,0.434525,...,89.687619,0.340680,0.408428,0.178965,52.138436,24.575249,89.687619,0.340680,0.408428,0.178965
YW_WILTY_EP70_truth7,0.436879,0.281196,0.361892,0.057731,0.114382,0.298365,0.099114,0.140731,0.228547,0.000000,...,58.006466,0.055712,0.126236,0.032992,44.884650,14.471699,58.006466,0.055712,0.126236,0.032992
YW_WILTY_EP70_truth8,0.301876,0.182715,0.172107,0.656657,0.452309,0.531807,0.094904,0.408353,0.513274,0.409628,...,7.234302,0.097613,0.157272,0.136141,17.375231,6.708019,7.234302,0.097613,0.157272,0.136141


In [6]:
# bring label column as well

labels = (dataset[['video_id', 'label']].drop_duplicates(subset = 'video_id'))

dataset_mean = video_mean.merge(labels, on = 'video_id')
dataset_mean

dataset_max = video_max.merge(labels, on = 'video_id')
dataset_max

dataset_std = video_std.merge(labels, on = 'video_id')
dataset_std

Unnamed: 0,video_id,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,AU09_r,AU10_r,AU12_r,...,pose_Rx,pose_Ry,pose_Rz,pose_Tx,pose_Ty,pose_Tz,pose_Rx.1,pose_Ry.1,pose_Rz.1,label
0,AN_WILTY_EP15_lie10,0.131243,0.085327,0.562402,0.033261,0.132124,0.181448,0.057571,0.446373,0.352630,...,0.084786,0.498067,0.071552,174.032103,69.130514,360.916228,0.084786,0.498067,0.071552,lie
1,AN_WILTY_EP15_lie11,0.314456,0.219906,0.000000,0.130074,0.005639,0.000000,0.095572,0.297845,0.164786,...,0.060232,0.058292,0.065948,23.471155,9.319948,11.860857,0.060232,0.058292,0.065948,lie
2,AN_WILTY_EP15_lie12,0.095647,0.070883,0.217000,0.079565,0.381883,0.461531,0.253215,0.538801,0.348212,...,0.051518,0.036717,0.035931,5.857887,4.081321,6.395445,0.051518,0.036717,0.035931,lie
3,AN_WILTY_EP15_lie13,0.095652,0.076767,0.000000,0.048791,0.109656,0.129917,0.066048,0.419778,0.430428,...,0.031178,0.050602,0.050558,10.382623,4.785112,6.893409,0.031178,0.050602,0.050558,lie
4,AN_WILTY_EP15_lie14,0.385574,0.348092,0.000000,0.113538,0.440128,0.313956,0.082497,0.566972,0.704785,...,0.070308,0.172690,0.041356,67.525139,20.701165,113.857880,0.070308,0.172690,0.041356,lie
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1464,YW_WILTY_EP70_truth5,0.147764,0.145458,0.101270,0.152342,0.292310,0.311208,0.064355,0.373886,0.475253,...,0.057337,0.063470,0.061649,10.083449,5.163165,4.847969,0.057337,0.063470,0.061649,truth
1465,YW_WILTY_EP70_truth6,0.527518,0.200851,0.521999,0.367288,0.429061,0.612170,0.167712,0.636275,0.572024,...,0.340680,0.408428,0.178965,52.138436,24.575249,89.687619,0.340680,0.408428,0.178965,truth
1466,YW_WILTY_EP70_truth7,0.436879,0.281196,0.361892,0.057731,0.114382,0.298365,0.099114,0.140731,0.228547,...,0.055712,0.126236,0.032992,44.884650,14.471699,58.006466,0.055712,0.126236,0.032992,truth
1467,YW_WILTY_EP70_truth8,0.301876,0.182715,0.172107,0.656657,0.452309,0.531807,0.094904,0.408353,0.513274,...,0.097613,0.157272,0.136141,17.375231,6.708019,7.234302,0.097613,0.157272,0.136141,truth


#### ---> From now on we have one row per video with AU, Pose, Gaze mean/max/std & label.

# Output Files

In [7]:
dataset_mean.to_pickle('dolos_aggr_mean_v2.pkl')
dataset_max.to_pickle('dolos_aggr_max_v2.pkl')
dataset_std.to_pickle('dolos_aggr_std_v2.pkl')