In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob

## Data loading

In [2]:


dfImages = pd.read_csv('../data/IAPS.csv',
                       sep=';',
                       usecols=['IAPS', 'ValenceMean', 'ArousalMean']
                      )



dfSounds = pd.read_csv('../data/IADS2.csv', sep=';', usecols=['Number', 'ValenceMean', 'ArousalMean'])


studyDatafiles = glob.glob('../data/procedura/*.txt')

li = []

for filename in studyDatafiles:
    df = pd.read_csv(filename, sep='\t', header=None, names=['ID', 'nn', 'Condition', 'SpecificCondition', 'Number', 'IAPS', 'Widget', 'Response', 'Delay', 'ApperanceTimestamp'])
    li.append(df)
                         
dfStudy = pd.concat(li, axis=0, ignore_index=True)

dfStudy = dfStudy[dfStudy.Condition != 'con']
dfStudy = dfStudy[dfStudy.Widget != 'emoscale1']
                                     


## Preprocessing

In [3]:


dfStudy['Number'] = pd.to_numeric(dfStudy['Number'])



def convertResponseStringIntoColumns(string, i):
    string = string[1:-1]
    array = (string.split(','))
    array[0] = float(array[0])
    array[1] = float(array[1][1:])
    return array[i]
    
    
dfStudy['Valence'] = dfStudy.Response.apply(func = convertResponseStringIntoColumns, i = 0)
dfStudy['Arousal'] = dfStudy.Response.apply(func = convertResponseStringIntoColumns, i = 1)

dfStudy.Valence = dfStudy.Valence * 10
dfStudy.Arousal = dfStudy.Arousal * 10

dfStudy = dfStudy[['SpecificCondition', 'Number', 'IAPS', 'Valence', 'Arousal']]



             

In [4]:

def findSoundValence(number):
    list = dfSounds[dfSounds.Number == number].ValenceMean.tolist()
    return list.pop() if list else np.nan
def findSoundArousal(number):
    list = dfSounds[dfSounds.Number == number].ArousalMean.tolist()
    return list.pop() if list else np.nan
def findImageValence(iaps):
    list = dfImages[dfImages.IAPS == str(iaps)].ValenceMean.tolist()
    return list.pop() if list else np.nan
def findImageArousal(iaps):
    list = dfImages[dfImages.IAPS == str(iaps)].ArousalMean.tolist()
    return list.pop() if list else np.nan
    



dfStudy['SoundValence'] = dfStudy.Number.apply(findSoundValence)
dfStudy['SoundArousal'] = dfStudy.Number.apply(findSoundValence)
dfStudy['ImageValence'] = dfStudy.IAPS.apply(findImageValence)
dfStudy['ImageArousal'] = dfStudy.IAPS.apply(findImageArousal)







In [5]:
def applySpecificCondition(row, df):
    if row.SpecificCondition[1] == '-':
        row.ImageValence = -row.ImageValence
        row.ImageArousal = -row.ImageArousal
    if row.SpecificCondition[3] == '-':
        row.SoundValence = -row.SoundValence
        row.SoundArousal = -row.SoundArousal
    df.loc[row.name] = row

dfStudy.apply(applySpecificCondition, axis=1, df = dfStudy)

dfStudy = dfStudy[['ImageValence',
                   'ImageArousal',
                   'SoundValence',
                   'SoundArousal',
                   'Valence',
                   'Arousal'
                  ]]


In [6]:
print(dfStudy.dtypes)
dfStudy

ImageValence    float64
ImageArousal    float64
SoundValence    float64
SoundArousal    float64
Valence         float64
Arousal         float64
dtype: object


Unnamed: 0,ImageValence,ImageArousal,SoundValence,SoundArousal,Valence,Arousal
0,-1.91,-5.60,6.47,6.47,0.843677,1.573651
3,6.27,6.06,-2.04,-2.04,-4.317224,1.926666
4,5.89,6.21,-1.63,-1.63,-0.771283,2.728180
6,6.44,7.07,-2.22,-2.22,0.085718,3.900079
7,6.22,6.62,-1.65,-1.65,-3.004779,3.631299
9,-1.31,-6.91,5.68,5.68,-4.581738,3.844299
12,6.67,7.13,-2.71,-2.71,-0.995416,3.265729
16,7.01,6.84,-3.08,-3.08,0.000000,2.187518
18,-2.83,-6.54,7.32,7.32,0.000000,1.368903
19,-2.06,-6.36,6.53,6.53,-0.982904,1.706378
