In [None]:
import os
# change dir to root
os.chdir('..')
os.getcwd()

In [None]:
import os
import subprocess
import codecs
import pandas as pd
import emrecdem as erd
import textgrid
import magic

import importlib
importlib.reload(erd)

TOPICS_DIR = "/Users/peter/repos/esc/data/Deniece/Textgrids_Timestamps_Participants_Face"
FEATURES_DIR = "/Users/peter/repos/esc/data/Deniece/openface-features"
AGG_FEATURES_DIR = "/Users/peter/repos/esc/data/Deniece/aggregated-features"
VIDEO_DIR = '/Volumes/vera_emrec/MEMOA_database'
#VIDEO_DIR = '/media/peter/Data/esc/emrecdem/MEMOA_database'
SILENCES_DIR = '/Users/peter/repos/esc/data/Deniece/Silence_textgrids_N17'

def process_video(video_dir, video_title):
    #cmd = f"/Users/peter/repos/esc/emrecdem/emrecdem-framework/notebooks/process_video.sh {video_dir} {video_title}"
    cmd = f"./process_video.sh {video_dir} {video_title}"
    print(cmd)
    #!{cmd}
    
def parseTime(s):
    split = s.split(':')
    minutes = int(split[0])
    seconds = float(split[1])
    total_seconds = 60 * minutes + seconds
    return total_seconds
    
def process_file(topics_file):
    (participant, session, experiment, memory, _) = topics_file.split("_")
    
    # Construct video path
    video_title = f'{participant}_{session}_{experiment}_{memory}_Cfront'
    video_dir = os.path.join(VIDEO_DIR, participant, session, 'Video', 'Single-angle')
    
    # Process video with openface
    process_video(video_dir, video_title)
    
    # Load openface features
    openface_file = os.path.join(FEATURES_DIR, video_title + '_features.csv')
    openface_features = pd.read_csv(openface_file, skipinitialspace=True)
    
    # Load silences
    silences_file = os.path.join(SILENCES_DIR, f'{participant}_{session}_{experiment}_{memory}_Mparticipant_SIL.TextGrid')
    blob = open(silences_file, 'rb').read()
    m = magic.Magic(mime_encoding=True)
    encoding = m.from_buffer(blob)
    silences_tgrid = textgrid.read_textgrid(silences_file, encoding)
    silences = pd.DataFrame(silences_tgrid)
    
    # Add silences in column
    def isSilent(timestamp):
        filt = (silences['start'] <= timestamp) & (timestamp < silences['stop'])
        return silences.loc[filt, 'name'].iat[0] == 'silent'
    openface_features['silence'] = openface_features.apply(lambda row: isSilent(row.timestamp), axis=1)
    
    # Aggregate values (used for z-score calculation)
    silent_features = openface_features[openface_features['silence'] == True]
    video_aggregates = silent_features.aggregate(['mean', 'std'])
    
    # Write raw (debug)
    output_file = "_".join([participant, session, experiment, memory]) + "_raw.csv"
    output_path = os.path.join(AGG_FEATURES_DIR, output_file)
    #openface_features.to_csv(output_path, float_format='%g', index=False)

    # Process topics from file
    aggregated_features = pd.DataFrame()
    topics_path = os.path.join(TOPICS_DIR, topics_file)
    with codecs.open(topics_path, 'r', encoding='cp1252') as file:
        for line in file:
            if not line.lower().startswith('timestamp'):
                continue
            
            # Extract start and end time, topic index and text
            line = line.strip()
            split = line.split(' ')
            start_time = parseTime(split[-2])
            end_time = parseTime(split[-1])
            duration = end_time - start_time
            topic_index = int(split[1].replace('.', ''))
            topic_label = ' '.join(split[2:-2])
            
            # Extract frames for this topic/fragment
            fragment_frames = erd.extract_fragment(openface_features, start_time, end_time)
            
            # Aggregate the values
            aggregated_row = erd.aggregate_frames(fragment_frames, video_aggregates)
            
            # Combine results with information for this topic
            aggregated_row['start_time'] = start_time
            aggregated_row['end_time'] = end_time
            aggregated_row['duration'] = duration
            aggregated_row['topic_index'] = topic_index
            aggregated_row['topic_label'] = topic_label
            
            aggregated_features = pd.concat([aggregated_features, aggregated_row], ignore_index=True)
            
    output_file = "_".join([participant, session, experiment, memory]) + "_aggregated.csv"
    output_path = os.path.join(AGG_FEATURES_DIR, output_file)
    print(output_path)
    aggregated_features.to_csv(output_path, float_format='%g', index=False)
    return aggregated_features
    

# print(os.getcwd())
results = process_file('P12_S2_LSB_HM1_topics.txt')
#results = process_file('P1_S2_LSB_HM1_topics.txt')
#process_file('P2_S2_LSB_HM1_topics.txt')
#process_file('P11_S2_LSB_SM1_topics.txt')
#print(results[['AU04_int_freq', 'AU04_pres_freq']])
results

In [None]:
files = os.listdir(TOPICS_DIR)
files.sort()
for file in files:
    if file.endswith('.txt'):
        print("Processing", file)
        aggregated_features = process_file(file)