# Data processing

## Imports

In [69]:
import pandas as pd
import random

## Data preparation
Iterate over each participant and keep only the first 5 line and the 30 lines before `Apex==True` (for each expression)

In [70]:
def first_20(df):
    #only first 5 frame for each expression
    df = df[df['Frame'] < 5].copy()
    
    #Change label to "Neutral"
    df["Expression"] = "Neutral"
    return df

#Get the 30 row before "Apex" == true (included)
def first_20_before_apex(df):
    idx = df[df['Apex']==True].index
    filtered_idx = idx
    for i in range(1,30):
        filtered_idx = filtered_idx.union(idx-i)
    
    return df.iloc[filtered_idx]

#Modify Subject number to the correct one
def add_correct_subject_number(df, nb):
    df["Subject number"] = nb
    return df

In [71]:
def get_data(subjects_eyes, subjects_lips, expression, subjects_number):
    eyes_li = []
    lips_li = []
    assert (len(subjects_eyes) == len(subjects_lips))

    for x in range(len(subjects_lips)):
        eyes_path = "Data/subject {}/Eyes/eye_blendshapes.csv".format(subjects_eyes[x])
        lips_path = "Data/subject {}/Lips/lips_blendshapes.csv".format(subjects_lips[x])

        df1 = pd.read_csv(eyes_path)
        df2 = pd.read_csv(lips_path)

        df1 = df1[df1["Expression"].isin(expression)].reset_index(drop=True)
        df2 = df2[df2["Expression"].isin(expression)].reset_index(drop=True)


        df1 = pd.concat([first_20(df1), first_20_before_apex(df1)])
        df2 = pd.concat([first_20(df2), first_20_before_apex(df2)])

        eyes_li.append(add_correct_subject_number(df1, subjects_number[x]))
        lips_li.append(add_correct_subject_number(df2, subjects_number[x]))

    return pd.concat(eyes_li), pd.concat(lips_li)
    
expression = ["Happy", "Sad", "Angry", "Fear", "Disgust", "Surprise"]
subjects = [0,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,18,19,20,21, 22, 23, 24, 25, 26] #skip subject 7
Eyes_df, Lips_df = get_data(subjects_lips=subjects,subjects_eyes=subjects ,expression=expression, subjects_number=subjects)

## Data augmentation

In [72]:
random.seed(0)
subjects_augmented = [27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52]
subjects_shuffled = random.sample(subjects, len(subjects))
Eyes_df2, Lips_df2 = get_data(subjects_eyes=subjects, subjects_lips=subjects_shuffled, expression=expression, subjects_number=subjects_augmented)

Eyes_df = Eyes_df.append(Eyes_df2, ignore_index=True)
Lips_df = Lips_df.append(Lips_df2, ignore_index=True)

Remove useless features

In [73]:
#Remove useless features, keep expression and subject number label for eyes only
Lips_df = Lips_df.drop(["Subject number", "Expression","Multiple expression","Frame","Apex"], axis=1)
Eyes_df = Eyes_df.drop(["Multiple expression","Frame","Apex", "Max", "None"], axis=1)

Merge Lips and Eye together

In [74]:
def merge_df(df1, df2):
    return pd.concat([df1,df2], axis=1).reset_index(drop=True)


expr_df = merge_df(Eyes_df, Lips_df)
expr_df.to_csv("final_dataset.csv")