# Run only once! (after OpenFace feature extraction)
###  *DONE*

Naming convention: *sessid_{session id}_P{subject position either 1 or 2}_sid_{subject id}.csv*

In [1]:
#######################################################################################################################
# Project: Deep Virtual Rapport Agent (rapport model)
#
#     Jan Ondras (jo951030@gmail.com)
#     Institute for Creative Technologies, University of Southern California
#     April-October 2019
#
#######################################################################################################################
# Rename the extracted vision features csvs to follow the naming convention: 
#     sessid_{session id}P{subject position either 1 or 2}sid_{subject id}.csv
#     It uses the session.xml files from the Mimicry dataset and also the vision_feature_extraction_time_log.txt.
#######################################################################################################################


from os import rename
import glob
import pandas as pd
import xml.etree.ElementTree as ET

# Load feature extraction log (contains sessid-filename associations)
df = pd.read_csv('/home/ICT2000/jondras/dvra_datasets/mimicry/vision_feature_extraction_time_log.txt') #, header=None)
# print(df['sessid'])

# Set of subject ids
sids = set()

# Iterate over original csv feature files
for old_file_path in sorted(glob.glob('/home/ICT2000/jondras/dvra_datasets/mimicry/vision_features/original_openface_features/*.csv')):
    split_file_path = old_file_path.split('/')
    
    # Get session ID (sessid), as string
    old_filename = split_file_path[-1]
    sessid = f"{int(df[df['filename'] == old_filename[:-3] + 'avi']['sessid']):02d}"
    
    # Get subject ID (sid)
    if 'P1' in old_filename:
        pid = 'P1'
    elif 'P2' in old_filename:
        pid = 'P2'
    else:
        raise Exception('PID not recognized within the csv filename of extracted features!')
    xml_root = ET.parse(f'/home/ICT2000/jondras/dvra_datasets/mimicry/original_data/sessid_{sessid}/Sessions/{int(sessid)}/session.xml').getroot()
    sid = xml_root.find(f"./subject/[@position='{pid}']").attrib['id']
    sid = f'{int(sid):02d}'
    sids.add(sid)
    
    print(f'{sessid} {pid} {sid}')
 
    new_file_path = '/'.join(split_file_path[:-1]) + f'/sessid_{sessid}_{pid}_sid_{sid}.csv'
    rename(old_file_path, new_file_path)
    print(f'{old_file_path}\n\t renamed to: {new_file_path}')

print(f'# unique SIDs: {len(sids)}')

01 P1 09
/home/ICT2000/jondras/datasets/mimicry/features/2010.11.15.10.05.12_P1_FaceFar2_06068_BW1024.csv
	 renamed to: /home/ICT2000/jondras/datasets/mimicry/features/sessid_01_P1_sid_09.csv
01 P2 02
/home/ICT2000/jondras/datasets/mimicry/features/2010.11.15.10.05.12_P2_FaceFar2_06060_BW1024.csv
	 renamed to: /home/ICT2000/jondras/datasets/mimicry/features/sessid_01_P2_sid_02.csv
02 P1 09
/home/ICT2000/jondras/datasets/mimicry/features/2010.11.15.10.33.56_P1_FaceFar2_06068_BW1024.csv
	 renamed to: /home/ICT2000/jondras/datasets/mimicry/features/sessid_02_P1_sid_09.csv
02 P2 17
/home/ICT2000/jondras/datasets/mimicry/features/2010.11.15.10.33.56_P2_FaceFar2_06060_BW1024.csv
	 renamed to: /home/ICT2000/jondras/datasets/mimicry/features/sessid_02_P2_sid_17.csv
03 P1 17
/home/ICT2000/jondras/datasets/mimicry/features/2010.11.15.11.09.07_P1_FaceFar2_06068_BW1024.csv
	 renamed to: /home/ICT2000/jondras/datasets/mimicry/features/sessid_03_P1_sid_17.csv
03 P2 02
/home/ICT2000/jondras/datasets/