In [None]:
#######################################################################################################################
# Project: Deep Virtual Rapport Agent (data preprocessing)
#
#     Jan Ondras (jo951030@gmail.com)
#     Institute for Creative Technologies, University of Southern California
#     April-October 2019
#
#######################################################################################################################
# Annotate OpenFace features from the sewa dataset with ground-true head gestures.
#
#     Each frame will be annotated with ground-truth annotations of nod and shake head gestures
#
#     Input features: dvra_datasets/sewa/openface_features 
#     Input annotations: dvra_datasets/sewa/original_data 
#     Output features: dvra_datasets/sewa/annotated_features
#######################################################################################################################

In [1]:
###########################################################
import numpy as np
random_seed = 37
np.random.seed(random_seed)
from tensorflow import set_random_seed
set_random_seed(random_seed)
###########################################################

# For each recording
#     Resample feature dataframe
#     Add first derivatives of selected features
#     Annotate frames
#     Save as new annotated dataframe, 0 => not a nod, 1 => nod

import os
import glob
import pandas as pd
import scipy.signal
from scipy import interpolate
import time
import pympi    # Import pympi to work with elan files

# Unified frame rate
FRAME_RATE = 30.

# Features whose first and second derivatives will be calculated
diff_selected_features = [
    ' pose_Tx', 
    ' pose_Ty', 
    ' pose_Tz', 

    ' pose_Rx', 
    ' pose_Ry', 
    ' pose_Rz',

    ' p_rx', 
    ' p_ry',
    ' p_rz'
    
    # add landmarks?
]

input_features_dir = '/home/ICT2000/jondras/dvra_datasets/sewa/openface_features'
input_annotations_dir = '/home/ICT2000/jondras/dvra_datasets/sewa/original_data'
output_dir = '/home/ICT2000/jondras/dvra_datasets/sewa/annotated_features'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
start_time = time.time()    
cnt = 0
for feature_file in sorted(glob.glob(input_features_dir + '/*.csv')):
    
    sid = feature_file.split('/')[-1].split('.')[0]
    print(f'Processing SESID {sid}')
    
    org_df = pd.read_csv(feature_file)
    print(len(org_df))
#     print(len(org_df), org_df)
    
    # Resample feature dataframe to common frame rate, if needed
    csv_frame_rate = (org_df.shape[0] - 1) / np.sum(np.diff(org_df[' timestamp']))
    print(f'\tcsv frame rate: {csv_frame_rate}')
    if round(csv_frame_rate) == FRAME_RATE:
        print(f'\tNOT resampling')
        new_df = org_df.copy()
    else:
        print(f'\tREsampling from {csv_frame_rate} to {FRAME_RATE}')
        new_df = []
        timestamps_resampled = np.arange(0., org_df.iloc[-1][' timestamp'], step=1. / FRAME_RATE)
        for col_name in org_df.columns:            
            # Get interpolation function
            f = interpolate.interp1d(x=org_df[' timestamp'], y=org_df[col_name], kind='linear')
            new_df.append( f(timestamps_resampled) )
        # Does not work properly
#         n_resampled_points = int(1 + (len(org_df[' timestamp']) - 1) * FRAME_RATE / csv_frame_rate)
#         for col_name in org_df.columns:            
#             new_col = scipy.signal.resample(np.array(org_df[col_name]), num=n_resampled_points)
#             new_df.append(new_col)
        new_df = pd.DataFrame(np.array(new_df).T, columns=org_df.columns) 
                
    # Add first and second derivatives of selected features
    diff_features = dict()
    for feature_name in diff_selected_features:
        diff_features['diff_' + feature_name] =  np.diff(new_df[feature_name], prepend=new_df[feature_name][0])
        diff_features['diff2_' + feature_name] = np.diff(diff_features['diff_' + feature_name], 
                                                         prepend=diff_features['diff_' + feature_name][0])    
    new_df = new_df.assign(**diff_features)
            
    # Annotate frames
    # Add annotation columns for nod and shake
    new_df = new_df.assign(nod=np.zeros(len(new_df), dtype=int))
    new_df = new_df.assign(shake=np.zeros(len(new_df), dtype=int))
#     print(new_df)
    
    hg_df = pd.read_csv(f'{input_annotations_dir}/{sid}/{sid}-HeadGesture.csv' )
    for _, row in hg_df.iterrows():
        # Annotation begin/end time
        interval_begin = (row['first_frame'] - 1) / csv_frame_rate
        interval_end   = row['last_frame'] / csv_frame_rate
        if row['head_gesture'] == 'Nod':
            new_df.nod = pd.np.where((new_df[' timestamp'] >= interval_begin) & (new_df[' timestamp'] <= interval_end), 
                                 1, new_df.nod)
        elif row['head_gesture'] == 'Shake':
            new_df.shake = pd.np.where((new_df[' timestamp'] >= interval_begin) & (new_df[' timestamp'] <= interval_end), 
                                 1, new_df.shake)
        else:
            raise Exception('Unknown head gesture!')
#         print(interval_begin, interval_end)
#         print(np.where(np.array(new_df.nod) == 1))
#         print(np.where(np.array(new_df.shake) == 1))
    
    # Save as new annotated dataframe
#     print(new_df)
    new_df.to_csv(f'{output_dir}/{sid}.csv', index=False)
    cnt += 1
    print(f'Time taken: {time.time() - start_time} s\n')        
#     break

print(f'\nGenerated {cnt} annotated feature files.')

Processing SESID SAH_C1_S001_P001_VC1_003857_004977
1121
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 1.4785840511322021 s

Processing SESID SAH_C1_S005_P010_VC1_000001_000801
801
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 2.610422134399414 s

Processing SESID SAH_C1_S008_P016_VC1_005601_006984
1384
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 4.253813028335571 s

Processing SESID SAH_C1_S009_P017_VC1_004001_006201
2201
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 6.649529933929443 s

Processing SESID SAH_C1_S009_P018_VC1_004321_005367
1047
	csv frame rate: 49.99999999999999
	REsampling from 49.99999999999999 to 30.0
Time taken: 8.105289936065674 s

Processing SESID SAH_C1_S013_P025_VC1_000997_002939
1943
	csv frame rate: 49.99999999999999
	REsampling from 49.99999999999999 to 30.0
Time taken: 10.204917430877686 s

Processing SESID SAH_C1_S013_P025_VC1_006901_008783
1883
	csv frame rate: 50.0
	REsampling from

1300
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 74.09485077857971 s

Processing SESID SAH_C4_S111_P221_VC1_001951_003500
1550
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 76.09259104728699 s

Processing SESID SAH_C4_S112_P224_VC1_004801_005450
650
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 77.1158800125122 s

Processing SESID SAH_C4_S113_P226_VC1_002701_003600
900
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 78.48111629486084 s

Processing SESID SAH_C4_S116_P232_VC1_007551_008500
950
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 79.82185864448547 s

Processing SESID SAH_C4_S118_P236_VC1_001151_002250
1100
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 81.1899163722992 s

Processing SESID SAH_C4_S120_P239_VC1_006551_007250
700
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 82.28253817558289 s

Processing SESID SAH_C4_S122_P244_VC1_003301_004400
1100
	csv fr

1231
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 145.55414867401123 s

Processing SESID SAL_C2_S038_P076_VC1_001881_002782
902
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 146.73221850395203 s

Processing SESID SAL_C2_S041_P081_VC1_003415_004258
844
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 147.78695368766785 s

Processing SESID SAL_C2_S042_P083_VC1_001958_003364
1407
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 149.44669580459595 s

Processing SESID SAL_C2_S044_P087_VC1_004155_004862
708
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 150.45340609550476 s

Processing SESID SAL_C2_S046_P092_VC1_002832_004061
1230
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 151.9069640636444 s

Processing SESID SAL_C2_S048_P095_VC1_003085_003710
626
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 152.85993313789368 s

Processing SESID SAL_C2_S048_P096_VC1_000837_001711
875


	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 216.43781542778015 s

Processing SESID SAL_C6_S188_P375_VC1_004920_006091
1172
	csv frame rate: 49.99999999999999
	REsampling from 49.99999999999999 to 30.0
Time taken: 217.89582991600037 s

Processing SESID SAL_C6_S191_P381_VC1_004792_006827
2036
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 220.0943341255188 s

Processing SESID SSD_C1_S001_P001_VC1_004201_005201
1001
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 221.35920596122742 s

Processing SESID SSD_C1_S001_P002_VC1_002001_002701
701
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 222.34079217910767 s

Processing SESID SSD_C1_S004_P007_VC1_001001_001901
901
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 223.48878622055054 s

Processing SESID SSD_C1_S006_P012_VC1_002101_002801
701
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 224.55274295806885 s

Processing SESID SSD_C1_S012_P023_

Time taken: 285.6525478363037 s

Processing SESID SSD_C4_S097_P193_VC1_000601_001000
400
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 286.3526945114136 s

Processing SESID SSD_C4_S097_P194_VC1_002451_003100
650
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 287.2876799106598 s

Processing SESID SSD_C4_S101_P202_VC1_001251_001750
500
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 288.0750937461853 s

Processing SESID SSD_C4_S101_P202_VC1_004951_006350
1400
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 289.64163279533386 s

Processing SESID SSD_C4_S106_P212_VC1_003701_004200
500
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 290.43596839904785 s

Processing SESID SSD_C4_S107_P214_VC1_003801_005300
1500
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 292.1910183429718 s

Processing SESID SSD_C4_S111_P221_VC1_000651_001500
850
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time take

1001
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 361.9792790412903 s

Processing SESID SSL_C1_S029_P057_VC1_003151_003901
751
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 363.0080246925354 s

Processing SESID SSL_C1_S030_P060_VC1_002301_002701
401
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 363.70176815986633 s

Processing SESID SSL_C1_S194_P387_VC1_002301_003501
1201
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 365.1685564517975 s

Processing SESID SSL_C1_S194_P387_VC1_004401_005501
1101
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 366.5844886302948 s

Processing SESID SSL_C1_S194_P388_VC1_005501_005901
401
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 367.2820806503296 s

Processing SESID SSL_C2_S034_P067_VC1_001606_002132
527
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 368.1006586551666 s

Processing SESID SSL_C2_S035_P070_VC1_002850_003342
493
	csv 

Time taken: 421.9295530319214 s

Processing SESID SSL_C5_S153_P305_VC1_006651_007001
351
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 422.66086530685425 s

Processing SESID SSL_C5_S153_P306_VC1_006701_007101
401
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 423.3699641227722 s

Processing SESID SSL_C5_S157_P314_VC1_000601_001101
501
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 424.1690740585327 s

Processing SESID SSL_C5_S158_P316_VC1_000751_001201
451
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 424.92189836502075 s

Processing SESID SSL_C5_S159_P318_VC1_007201_007401
201
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 425.4487326145172 s

Processing SESID SSL_C6_S166_P332_VC1_001336_004670
3335
	csv frame rate: 49.99999999999999
	REsampling from 49.99999999999999 to 30.0
Time taken: 428.676641702652 s

Processing SESID SSL_C6_S168_P335_VC1_000756_003159
2404
	csv frame rate: 50.0
	REsampling fr

Time taken: 498.3091022968292 s

Processing SESID SVH_C3_S076_P151_VC1_002931_003181
251
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 498.89536356925964 s

Processing SESID SVH_C3_S077_P153_VC1_001141_001901
761
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 499.91161823272705 s

Processing SESID SVH_C3_S081_P161_VC1_003521_004011
491
	csv frame rate: 49.99999999999999
	REsampling from 49.99999999999999 to 30.0
Time taken: 500.69931721687317 s

Processing SESID SVH_C3_S083_P166_VC1_002701_003301
601
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 501.65225315093994 s

Processing SESID SVH_C3_S084_P168_VC1_001121_002001
881
	csv frame rate: 49.99999999999999
	REsampling from 49.99999999999999 to 30.0
Time taken: 502.76215839385986 s

Processing SESID SVH_C3_S085_P170_VC1_000901_001561
661
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 503.6980381011963 s

Processing SESID SVH_C3_S088_P175_VC1_000401_000981
581
	csv fra

Time taken: 555.8996167182922 s

Processing SESID SVL_C1_S014_P027_VC1_004045_006075
2031
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 558.099134683609 s

Processing SESID SVL_C1_S014_P028_VC1_002595_004043
1449
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 559.7461578845978 s

Processing SESID SVL_C1_S017_P034_VC1_001001_003801
2801
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 562.6059303283691 s

Processing SESID SVL_C1_S018_P036_VC1_002718_004073
1356
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 564.1910774707794 s

Processing SESID SVL_C1_S021_P041_VC1_002541_003447
907
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 565.3025951385498 s

Processing SESID SVL_C1_S022_P044_VC1_003301_004095
795
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 566.4537672996521 s

Processing SESID SVL_C1_S022_P044_VC1_007329_008693
1365
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time take

Time taken: 625.2222535610199 s

Processing SESID SVL_C5_S146_P292_VC1_005401_005601
201
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 625.7395269870758 s

Processing SESID SVL_C5_S156_P312_VC1_000601_000851
251
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 626.3147628307343 s

Processing SESID SVL_C5_S159_P318_VC1_000851_001401
551
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 627.1699900627136 s

Processing SESID SVL_C5_S159_P318_VC1_002001_002601
601
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 628.0368602275848 s

Processing SESID SVL_C5_S161_P321_VC1_002851_003201
351
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 628.697735786438 s

Processing SESID SVL_C5_S161_P321_VC1_007901_008251
351
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 629.3538720607758 s

Processing SESID SVL_C5_S163_P325_VC1_001701_001851
151
	csv frame rate: 50.0
	REsampling from 50.0 to 30.0
Time taken: 62