# Environment setup

In [176]:
import os
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET

# Load MMI Database

**Parsing images and AUs from sessions**

In [77]:
sessions = {}
directory_template = './data/mmi/Sessions/$session$/'
for session in os.listdir('./data/mmi/Sessions'):
    # Find XML filename
    directory = directory_template.replace('$session$', session)
    files = os.listdir(directory)
    xml_filename = directory + files[1]
    
    # Load AUs
    aus = []
    root = ET.parse(xml_filename).getroot()
    for au in root.iter('ActionUnit'):
        aus.append(au.attrib['Number'])
    
    # Load JPG filename
    jpg_filename = directory + files[0]
    
    sessions[session] = {
        'aus': aus,
        'img': jpg_filename
    }
    
assert len(sessions) == 493
print(f'{len(sessions)} sessions loaded')

493 XMLs loaded


**Creating base dataset**

In [158]:
df_sessions = pd.DataFrame.from_dict(sessions, orient='index')
df_sessions.index.rename('session', inplace=True)

df_sessions_null = df_sessions.loc[df_sessions['aus'].apply(lambda x: len(x)) == 0]
df_sessions = df_sessions.loc[df_sessions['aus'].apply(lambda x: len(x)) != 0]

print(f'df_sessions_null: {df_sessions_null.shape}')
print(f'df_sessions: {df_sessions.shape}')
print()
df_sessions.info()
print()
df_sessions.head()

df_sessions_null: (265, 2)
df_sessions: (228, 2)

<class 'pandas.core.frame.DataFrame'>
Index: 228 entries, 2401 to 2653
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   aus     228 non-null    object
 1   img     228 non-null    object
dtypes: object(2)
memory usage: 5.3+ KB



Unnamed: 0_level_0,aus,img
session,Unnamed: 1_level_1,Unnamed: 2_level_1
2401,"[45, 5]",./data/mmi/Sessions/2401/S001-001.jpg
2402,"[10, 25]",./data/mmi/Sessions/2402/S001-002.jpg
2403,"[10, 11, 25]",./data/mmi/Sessions/2403/S001-003.jpg
2404,"[10, 11, 17, 18, 25, 38]",./data/mmi/Sessions/2404/S001-004.jpg
2405,"[10, 11, 25, 38]",./data/mmi/Sessions/2405/S001-005.jpg


# Machine Learning

**Creating dataset for ML algorithm**

In [178]:
df_ml = df_sessions.copy()
df_ml = df_ml.explode('aus')

df_oh = pd.get_dummies(df_ml['aus'])
df_oh = df_oh.groupby(['session']).agg('max')
df_oh = df_oh.astype('bool').astype('int')

df_ml = pd.merge(
    df_ml['img'].drop_duplicates(),
    df_oh,
    how='inner',
    left_index=True,
    right_index=True
)


assert len(df_ml) == 228
df_ml.to_csv('./data/df_ml.csv', index=True)
print(f'df_ml: {df_ml.shape}')
print()
df_ml.head()

df_ml: (228, 53)



Unnamed: 0_level_0,img,1,10,11,12,13,14,15,16,17,...,44,45,46,46L,46R,5,6,7,8,9
session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2401,./data/mmi/Sessions/2401/S001-001.jpg,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
2402,./data/mmi/Sessions/2402/S001-002.jpg,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2403,./data/mmi/Sessions/2403/S001-003.jpg,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2404,./data/mmi/Sessions/2404/S001-004.jpg,0,1,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2405,./data/mmi/Sessions/2405/S001-005.jpg,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [177]:
df_ml

Unnamed: 0_level_0,img,1,10,11,12,13,14,15,16,17,...,44,45,46,46L,46R,5,6,7,8,9
session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2401,./data/mmi/Sessions/2401/S001-001.jpg,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
2402,./data/mmi/Sessions/2402/S001-002.jpg,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2403,./data/mmi/Sessions/2403/S001-003.jpg,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2404,./data/mmi/Sessions/2404/S001-004.jpg,0,1,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2405,./data/mmi/Sessions/2405/S001-005.jpg,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2625,./data/mmi/Sessions/2625/S014-070.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2626,./data/mmi/Sessions/2626/S014-071.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2644,./data/mmi/Sessions/2644/S014-089.jpg,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
2645,./data/mmi/Sessions/2645/S014-090.jpg,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
