In [1]:
from collections import defaultdict
import pandas as pd
import os


LOGGING_RATE = 0.1

def prep_tracking_data(df):
    """ Preprocess facial tracking data of subject """
    name = df.name
    df = df.drop(["Max", "None", "26"], axis=1)
    df["Time"] = df["Time"].apply(lambda x: pd.to_datetime(x, unit='ms', origin='unix'))  # Convert time to datetime objects
    df = df.set_index("Time")  # Time as index
    df.index = df.index.floor(freq=f"{int(1000 * LOGGING_RATE)}L")  # Round datetime to ticks
    df = df.drop_duplicates()
    df.name = name
    return df

In [2]:
data = []  # All facetracking data
for _, _, files in os.walk(SOURCE_DIR):
    for f in files:
        df_subj = pd.read_csv(fr"{SOURCE_DIR}\{f}", sep='\t')
        name = f.lstrip("subj_").rstrip(".tsv")  # Give dataframe subject id as name
        df_subj.name = name
        data.append(df_subj)

# Combine
new_data = []
for i, df in enumerate(data):
    subj = df.name
    if subj.endswith('_a'):
        stem_df = data[i-1]
        if stem_df.name == subj.rstrip('_a'):
            new_df = stem_df.append(df)
            new_df.name = subj.rstrip('_a')
            new_data.pop()
            new_data.append(new_df)
        else:
            new_data.append(df)
    else:
        new_data.append(df)
data = new_data

print("Unprocessed face tracking data:")
[d.name for d in data]

NameError: name 'SOURCE_DIR' is not defined

In [None]:
data[0].head()

In [None]:
data = list(map(prep_tracking_data, data))
print("Processed face tracking data:")

In [None]:
data[3].iloc[225:230][["Eye_Left_Blink", "Eye_Left_Wide", "Mouth_Smile_Left", "Mouth_Philtrum_Right"]]

In [None]:
tbl = data[3].iloc[225:230][["Eye_Left_Blink", "Eye_Left_Wide", "Mouth_Smile_Left", "Mouth_Philtrum_Right"]].to_latex()
with open("out/result/fs_exmpl.txt", 'w') as f:
    f.write(tbl)

In [None]:
data_by_subj = defaultdict(tuple)  # Paired face tracking data sets
for df in data:
    subj = df.name.split('_')[0]  # e.g. 3_2 -> 3
    data_by_subj[subj] += (df,)
data_by_subj = dict(data_by_subj)

print("Data paired by subject:\n"
      "========================")
for subj, df_pair in data_by_subj.items():
    names = [df.name for df in df_pair]
    print(f"Data for subject {subj}: {names}")  # Sanity check data

In [None]:
from importlib import reload
from facetracking_helpers import calculations

reload(calculations)

for subj, pair in data_by_subj.items():
    if len(pair) == 2:
        pair = calculations.get_overlapping_slice(*pair)
        pair = calculations.sync_pair(*pair)
        data_by_subj[subj] = pair


In [None]:
# Sanity check
for subj, (s1, s2) in data_by_subj.items():
    same_start = s1.index[0] == s2.index[0]
    same_end = s1.index[-1] == s2.index[-1]
    same_len = len(s1.index) == len(s2.index)
    if not same_start: print(f"Data for dyad {subj} does not start at the same time")
    if not same_end: print(f"Data for dyad {subj} does not start at the end time")
    if not same_len: print(f"Data for dyad {subj} does not have same length")


In [None]:
for pair in data_by_subj.values():
    if len(pair) == 2:
        df1, df2 = pair
        df1.to_csv(f"./prepped_data/facetracking/{df1.name}.csv")
        df2.to_csv(f"./prepped_data/facetracking/{df2.name}.csv")

print("Wrote to files")