In [2]:
#######################################################################################################################
# Project: Deep Virtual Rapport Agent (head gesture detector)
#
#     Jan Ondras (jo951030@gmail.com)
#     Institute for Creative Technologies, University of Southern California
#     April-October 2019
#
#######################################################################################################################
# Annotate OpenFace vision features from the mimicry dataset with smile, gaze away, voice activity, and take turn 
#     binary labels
# 
#     Run after the vision features are annotated with nod, shake, and tilt head gestures using the developed Head 
#     Gesture Detector (HGD) (deep-virtual-rapport-agent/head_gesture_detector/hgd_annotate_frames.ipynb).
# 
#     Note: it changes the annotated vision featurefiles generated by HGD in-place (!) adding smile, gaze_away, 
#     voice_active and take_turn columns. 
# 
#    Input/output features: dvra_datasets/mimicry/vision_features/annotated_features
#    Input voice activity: dvra_datasets/mimicry/voice_activity_detection/voice_activity_ibm_watson
#######################################################################################################################

###########################################################
import numpy as np
random_seed = 37
np.random.seed(random_seed)
from tensorflow import set_random_seed
set_random_seed(random_seed)
###########################################################

# from utils import SMOOTHING_KERNEL_SIZE
from collections import defaultdict
import pandas as pd
import glob
import time
# from matplotlib import pyplot as plt
import scipy.signal
from scipy import interpolate

# Unified frame rate
FRAME_RATE = 30.

# Based on the fact that smile duration is 500-4000 ms [https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5826373/]
SMILE_SMOOTHING_KERNEL_SIZE = 15
# Based on minimum gaze glance duration ~ 2 s 
GAZE_SMOOTHING_KERNEL_SIZE = 59

DATASET_NAME = 'mimicry'
inputoutput_features_dir = f'/home/ICT2000/jondras/dvra_datasets/{DATASET_NAME}/vision_features/annotated_features'
voice_activity_dir = f'/home/ICT2000/jondras/dvra_datasets/mimicry/voice_activity_detection/voice_activity_ibm_watson'

start_time = time.time()
cnt = 0
for feature_file in sorted(glob.glob(inputoutput_features_dir + '/*.csv')):
    
    file_basename = feature_file.split('/')[-1].split('.')[0]
    print(f'Processing {file_basename}')
    df = pd.read_csv(feature_file)
    print(f'\tDataframe length: {len(df)}')
    
    # Add SMILE annotations & smooth
    smile_ann = np.where((df[' AU06_r'] >= 1.) & (df[' AU12_r'] >= 1.), 1, 0)
    smile_ann = scipy.signal.medfilt(smile_ann, kernel_size=SMILE_SMOOTHING_KERNEL_SIZE).astype(int)
    df['smile'] = smile_ann
    
    # Add GAZE_AWAY annotations & smooth
    gaze_angle_x = df[' gaze_angle_x']
    gaze_angle_y = df[' gaze_angle_y']
    gaze_away_ann = np.where(
        ( (abs(np.mean(gaze_angle_x) - df[' gaze_angle_x']) > np.std(gaze_angle_x)) 
            | (abs(np.mean(gaze_angle_y) - df[' gaze_angle_y']) > np.std(gaze_angle_y)) )
        & (df[' AU45_r'] < 1.), 1, 0)
    gaze_away_ann = scipy.signal.medfilt(gaze_away_ann, kernel_size=GAZE_SMOOTHING_KERNEL_SIZE).astype(int)
    df['gaze_away'] = gaze_away_ann
    
    # Add VOICE_ACTIVE annotations
    va_df = pd.read_csv(f'{voice_activity_dir}/{file_basename}.csv')
    df['voice_active'] = np.zeros(len(df), dtype=int)
    for _, row in va_df.iterrows():
        df['voice_active'] = pd.np.where(
            (df[' timestamp'] >= row['start_time']) & (df[' timestamp'] <= row['end_time']), 1, df['voice_active'])
                
    # Add TAKE_TURN annotations
    take_turn_ann = np.zeros(len(df), dtype=int)
    for j in range(len(df) - 1):
        # End of VA interval => set take_turn in the first frame after the voice active interval
        if (df.iloc[j]['voice_active'] == 1) and (df.iloc[j + 1]['voice_active'] == 0):
            take_turn_ann[j + 1] = 1
    df['take_turn'] = take_turn_ann
    
    # Replace the original dataframe
    df.to_csv(feature_file, index=False)
    cnt += 1
    print(f'\tTime taken: {time.time() - start_time} s\n')        
#     break

print(f'\nGenerated {cnt} fully-annotated feature files.')

Processing sessid_01_P1_sid_09
	Dataframe length: 18217
	Time taken: 59.08472728729248 s

Processing sessid_01_P2_sid_02
	Dataframe length: 18217
	Time taken: 117.74326634407043 s

Processing sessid_02_P1_sid_09
	Dataframe length: 29598
	Time taken: 212.04612016677856 s

Processing sessid_02_P2_sid_17
	Dataframe length: 29598
	Time taken: 305.9755229949951 s

Processing sessid_03_P1_sid_17
	Dataframe length: 13336
	Time taken: 349.88799691200256 s

Processing sessid_03_P2_sid_02
	Dataframe length: 13336
	Time taken: 390.83993434906006 s

Processing sessid_04_P1_sid_12
	Dataframe length: 28631
	Time taken: 474.8661289215088 s

Processing sessid_04_P2_sid_23
	Dataframe length: 28631
	Time taken: 568.4601290225983 s

Processing sessid_05_P1_sid_12
	Dataframe length: 29297
	Time taken: 658.5412974357605 s

Processing sessid_05_P2_sid_21
	Dataframe length: 29297
	Time taken: 755.7943522930145 s

Processing sessid_06_P1_sid_23
	Dataframe length: 29025
	Time taken: 845.7748095989227 s

Proces

	Dataframe length: 29503
	Time taken: 7317.513409614563 s

Processing sessid_48_P1_sid_58
	Dataframe length: 23904
	Time taken: 7392.31408572197 s

Processing sessid_48_P2_sid_37
	Dataframe length: 23904
	Time taken: 7467.653558015823 s

Processing sessid_49_P1_sid_56
	Dataframe length: 26394
	Time taken: 7548.868389368057 s

Processing sessid_49_P2_sid_49
	Dataframe length: 26394
	Time taken: 7632.138125896454 s

Processing sessid_50_P1_sid_12
	Dataframe length: 25721
	Time taken: 7713.060355186462 s

Processing sessid_50_P2_sid_31
	Dataframe length: 25721
	Time taken: 7797.866845846176 s

Processing sessid_51_P1_sid_12
	Dataframe length: 19426
	Time taken: 7857.78262758255 s

Processing sessid_51_P2_sid_59
	Dataframe length: 19426
	Time taken: 7925.205075740814 s

Processing sessid_52_P1_sid_12
	Dataframe length: 20088
	Time taken: 7987.160039901733 s

Processing sessid_52_P2_sid_08
	Dataframe length: 20088
	Time taken: 8053.369354486465 s

Processing sessid_53_P1_sid_59
	Dataframe l