In [159]:
import ast
import pandas as pd
import matplotlib.pyplot as plt
import re

In [160]:
answer_regex = re.compile('\[([\-\+\.0-9e]+),\s*([\-\+\.0-9e]+)\]')
import math

def handle_valence (row):
    if row['widget_type'] == "emospace1" :
        return answer_regex.match(row["ans"])[1]
    else:
        return -1
    

def handle_arousal (row):
    if row['widget_type'] == "emospace1" :
        return answer_regex.match(row["ans"])[2]
    else:
        return -1
    
def handle_ans_time (row):
    if math.isnan(row['ans_time']):
        return 100.0
    else:
        return row['ans_time']

In [161]:
meta_data = pd.read_csv("BIRAFFE-metadata.csv", sep=";")
meta_data = meta_data.drop(["PROCEDURE-BEGIN-TIMESTAMP", "PROCEDURE-END-TIMESTAMP", "BIOSIGS-BEGIN-TIMESTAMP", "BIOSIGS-END-TIMESTAMP"], axis=1)
meta_data = meta_data.dropna(subset=['OPENNESS', 'CONSCIENTIOUSNESS', 'EXTRAVERSION', 'AGREEABLENESS', 'NEUROTICISM', 'NEO-FFI', 'PROCEDURE'])
meta_data = meta_data.rename(columns={"ID": "subject_id", 
                                      "AGE": "age",
                                      "OPENNESS": "openness",
                                      "CONSCIENTIOUSNESS": "conscientiousness",
                                      "EXTRAVERSION": "extraversion",
                                      "AGREEABLENESS": "agreeableness",
                                      "NEUROTICISM": "neurocism",
                                      "NEO-FFI": "neo_ffi",
                                      "BIOSIGS": "biosigs",
                                      "PROCEDURE": "procedure",
                                      "SPACE": "space"
                                     })
meta_data.head()


Unnamed: 0,subject_id,age,SEX,openness,conscientiousness,extraversion,agreeableness,neurocism,neo_ffi,biosigs,procedure,space,FREUD,PHOTOS
0,1107,21.0,F,4.0,6.0,10.0,4.0,3.0,Y,Y,Y,Y,Y,Y
1,1153,22.0,F,5.0,3.0,8.0,3.0,8.0,Y,Y,Y,Y,Y,Y
2,1233,21.0,F,6.0,8.0,7.0,10.0,2.0,Y,,Y,Y,Y,Y
5,1318,21.0,M,3.0,6.0,5.0,5.0,2.0,Y,Y,Y,Y,Y,Y
6,1400,22.0,M,4.0,6.0,5.0,6.0,6.0,Y,Y,Y,Y,Y,Y


In [162]:
iads_data = pd.read_csv("IADS-database.csv", sep=";")
iads_data = iads_data.rename(columns={"Number": "iads_id", "ValenceMean": "sound_valance", "ArousalMean": "sound_arousal"})
iads_data = iads_data.drop(["Sound", "ValenceSD", "ArousalSD"], axis=1)
iads_data.head()



Unnamed: 0,iads_id,sound_valance,sound_arousal
0,102,4.63,4.91
1,104,4.96,5.37
2,105,2.88,6.4
3,106,3.37,6.39
4,107,5.47,5.85


In [163]:
iaps_data = pd.read_csv("IAPS-database.csv", sep=";")
iaps_data = iaps_data.rename(columns={"IAPS": "iaps_id", "ValenceMean": "picture_valance", "ArousalMean": "picture_arousal"})
iaps_data = iaps_data.drop(["ValenceSD", "ArousalSD", "set", "Description"], axis=1)
iaps_data.astype({'iaps_id': 'int32'})
iaps_data.head()



Unnamed: 0,iaps_id,picture_valance,picture_arousal
0,1019.0,3.95,5.77
1,1022.0,4.26,6.02
2,1026.0,4.09,5.61
3,1030.0,4.3,5.46
4,1033.0,3.87,6.13


In [164]:

def preprocess_face_data(subject_id):
    face_data = pd.read_csv(f'BIRAFFE-photo/SUB{subject_id}-Face.csv', sep=";")
    face_data = face_data.rename(columns={"IAPS-ID": "iaps_id", "IADS-ID": "iads_id"})
    face_data = face_data.dropna(subset=['iads_id', 'iaps_id'])
    face_data = face_data.dropna(subset=['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS', 'NEUTRAL', 'SADNESS',	'SURPRISE'])

    face_data = face_data.rename(columns={"ANGER": "anger", 
                                          "CONTEMPT": "contempt", 
                                          "DISGUST": "disgust", 
                                          "FEAR": "fear", 
                                          "HAPPINESS": "happiness",
                                          "NEUTRAL": "neutral",
                                          "SADNESS": "sadness",
                                          "SURPRISE": "surprise"
                                         
                                         })

    
    merged = face_data.merge(iaps_data, how='left', on='iaps_id')
    merged = merged.merge(iads_data, how='left', on='iads_id', suffixes=('_picture', '_sound'))
    
    grouped = merged.groupby(['iads_id', 'iaps_id']).mean()
    grouped = grouped.drop(columns=["GAME-TIMESTAMP", "PICTURE-TIMESTAMP"])
    return grouped


In [165]:
def preprocess_answer_data(subject_id):
    answer_data = pd.read_csv(f'BIRAFFE-procedure/SUB{subject_id}-Procedure.csv', sep=";")
    answer_data = answer_data[["IADS-ID", "IAPS-ID", "WIDGET-TYPE", "ANS", "ANS-TIME"]]
    answer_data = answer_data.rename(columns={"IADS-ID": "iads_id", 
                                              "IAPS-ID": "iaps_id", 
                                              "WIDGET-TYPE": "widget_type", 
                                              "ANS": "ans", 
                                              "ANS-TIME": "ans_time"})

    answer_data["emotion_ans"] = -1

    answer_data.loc[answer_data["widget_type"] == "emoscale1", 'emotion_ans'] = answer_data["ans"]

    answer_data['valence_ans'] = answer_data.apply (lambda row: handle_valence(row), axis=1)
    answer_data['arousal_ans'] = answer_data.apply (lambda row: handle_arousal(row), axis=1)
    answer_data['ans_time'] = answer_data.apply (lambda row: handle_ans_time(row), axis=1)
    
    answer_data = answer_data.drop(['ans'], axis=1)

    return answer_data


In [166]:
for index, row in meta_data.iterrows():
    subject_id = row['subject_id']
    data = preprocess_face_data(subject_id)

    
    data['SUB-ID'] = subject_id
    data['openness'] = row['openness']
    data['conscientiousness'] = row['conscientiousness']
    data['extraversion'] = row['extraversion']
    data['agreeableness'] = row['agreeableness']
    data['neurocism'] = row['neurocism']
    
    answer_data = preprocess_answer_data(subject_id)
    data = pd.merge(data, answer_data, how='inner', on=['iads_id','iaps_id'])
    
    data = data.dropna()
    data.to_csv(f'preprocessed/DATA-{subject_id}.csv')
    

In [167]:
import glob

path = 'preprocessed'
all_files = glob.glob(path + "/DATA-*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

all_dfs = pd.concat(li, axis=0, ignore_index=True)
all_dfs.to_csv('preprocessed/ALL-DATA.csv')



In [168]:
all_dfs.head()

Unnamed: 0.1,Unnamed: 0,iads_id,iaps_id,anger,contempt,disgust,fear,happiness,neutral,sadness,...,openness,conscientiousness,extraversion,agreeableness,neurocism,widget_type,ans_time,emotion_ans,valence_ans,arousal_ans
0,0,104.0,2810.0,0.161789,0.000684,0.0,0.0,0.0,0.834421,0.002947,...,3.0,6.0,5.0,5.0,2.0,emoscale1,1.184649,1.0,-1.0,-1.0
1,1,106.0,6213.0,0.044895,0.000105,0.0,0.0,0.0,0.936842,0.018105,...,3.0,6.0,5.0,5.0,2.0,emoscale1,1.71851,2.0,-1.0,-1.0
2,2,106.0,7405.0,0.029789,0.000211,0.0,0.0,0.0,0.964368,0.005474,...,3.0,6.0,5.0,5.0,2.0,emospace1,1.785309,-1.0,0.483333,0.163721
3,3,109.0,3261.0,0.123,0.000316,0.0,0.0,0.0,0.873474,0.003263,...,3.0,6.0,5.0,5.0,2.0,emospace1,2.653014,-1.0,-0.995714,0.180954
4,4,110.0,6213.0,0.013316,0.000263,0.0,0.0,0.0,0.982737,0.003474,...,3.0,6.0,5.0,5.0,2.0,emoscale1,0.033457,4.0,-1.0,-1.0
