In [1]:
import pandas as pd
import numpy as np
import glob

## Data loading

In [2]:


dfImages = pd.read_csv('../data/IAPS.csv',
                       sep=';',
                       usecols=['IAPS', 'ValenceMean', 'ArousalMean']
                      )



dfSounds = pd.read_csv('../data/IADS2.csv', sep=';', usecols=['Number', 'ValenceMean', 'ArousalMean'])


studyDatafiles = glob.glob('../data/procedura/*.txt')

li = []

for filename in studyDatafiles:
    df = pd.read_csv(filename, sep='\t', header=None, names=['ID', 'nn', 'Condition', 'SpecificCondition', 'Number', 'IAPS', 'Widget', 'Response', 'Delay', 'ApperanceTimestamp'])
    li.append(df)
                         
dfStudy = pd.concat(li, axis=0, ignore_index=True)

dfStudy = dfStudy[dfStudy.Condition != 'con']
dfStudy = dfStudy[dfStudy.Widget != 'emoscale1']
                                     


## Preprocessing

In [3]:


dfStudy['Number'] = pd.to_numeric(dfStudy['Number'])



def convertResponseStringIntoColumns(string, i):
    string = string[1:-1]
    array = (string.split(','))
    array[0] = float(array[0])
    array[1] = float(array[1][1:])
    return array[i]
    
    
dfStudy['Valence'] = dfStudy.Response.apply(func = convertResponseStringIntoColumns, i = 0)
dfStudy['Arousal'] = dfStudy.Response.apply(func = convertResponseStringIntoColumns, i = 1)

dfStudy.Valence = dfStudy.Valence * 10
dfStudy.Arousal = dfStudy.Arousal * 10

dfStudy = dfStudy[['SpecificCondition', 'IAPS', 'Number', 'Valence', 'Arousal']]



             

In [4]:

def findSoundValence(number):
    list = dfSounds[dfSounds.Number == number].ValenceMean.tolist()
    return list.pop() if list else np.nan
def findSoundArousal(number):
    list = dfSounds[dfSounds.Number == number].ArousalMean.tolist()
    return list.pop() if list else np.nan
def findImageValence(iaps):
    list = dfImages[dfImages.IAPS == str(iaps)].ValenceMean.tolist()
    return list.pop() if list else np.nan
def findImageArousal(iaps):
    list = dfImages[dfImages.IAPS == str(iaps)].ArousalMean.tolist()
    return list.pop() if list else np.nan
    
  




dfStudy['SoundValence'] = dfStudy.Number.apply(findSoundValence)
dfStudy['SoundArousal'] = dfStudy.Number.apply(findSoundValence)
dfStudy['ImageValence'] = dfStudy.IAPS.apply(findImageValence)
dfStudy['ImageArousal'] = dfStudy.IAPS.apply(findImageArousal)





In [5]:
print(dfStudy.dtypes)
dfStudy

SpecificCondition     object
IAPS                  object
Number                 int64
Valence              float64
Arousal              float64
SoundValence         float64
SoundArousal         float64
ImageValence         float64
ImageArousal         float64
dtype: object


Unnamed: 0,SpecificCondition,IAPS,Number,Valence,Arousal,SoundValence,SoundArousal,ImageValence,ImageArousal
0,p-s+,3101,215,0.843677,1.573651,6.47,6.47,1.91,5.60
3,p+s-,4490,260,-4.317224,1.926666,2.04,2.04,6.27,6.06
4,p+s-,4647,277,-0.771283,2.728180,1.63,1.63,5.89,6.21
6,p+s-,4800,422,0.085718,3.900079,2.22,2.22,6.44,7.07
7,p+s-,4697,290,-3.004779,3.631299,1.65,1.65,6.22,6.62
9,p-s+,3053,204,-4.581738,3.844299,5.68,5.68,1.31,6.91
12,p+s-,4668,288,-0.995416,3.265729,2.71,2.71,6.67,7.13
16,p+s-,8186,289,0.000000,2.187518,3.08,3.08,7.01,6.84
18,p-s+,6250,351,0.000000,1.368903,7.32,7.32,2.83,6.54
19,p-s+,3195,601,-0.982904,1.706378,6.53,6.53,2.06,6.36
