In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.linear_model import LinearRegression

## Data loading

In [2]:


dfImages = pd.read_csv('../data/IAPS.csv',
                       sep=';',
                       usecols=['IAPS', 'ValenceMean', 'ArousalMean']
                      )



dfSounds = pd.read_csv('../data/IADS2.csv', sep=';', usecols=['Number', 'ValenceMean', 'ArousalMean'])


studyDatafiles = glob.glob('../data/procedura/*.txt')

li = []

for filename in studyDatafiles:
    df = pd.read_csv(filename, sep='\t', header=None, names=['ID', 'nn', 'Condition', 'SpecificCondition', 'Number', 'IAPS', 'Widget', 'Response', 'Delay', 'ApperanceTimestamp'])
    li.append(df)
                         
dfStudy = pd.concat(li, axis=0, ignore_index=True)

dfStudy = dfStudy[dfStudy.Condition != 'con']
dfStudy = dfStudy[dfStudy.Widget != 'emoscale1']
                                     


## Preprocessing

In [3]:


dfStudy['Number'] = pd.to_numeric(dfStudy['Number'])



def convertResponseStringIntoColumns(string):
    string = string[1:-1]
    array = (string.split(','))
    array[0] = float(array[0]) * 10
    array[1] = float(array[1][1:]) * 10
    return array

def findExpectedImageReaction(iaps):
    valence = dfImages[dfImages.IAPS == str(iaps)].ValenceMean.tolist()
    arousal = dfImages[dfImages.IAPS == str(iaps)].ArousalMean.tolist()
    
    return [sum(valence)/len(valence), sum(arousal)/len(arousal)] if valence else [np.nan, np.nan]

def findExpectedSoundReaction(number):
    valence = dfSounds[dfSounds.Number == number].ValenceMean.tolist()
    arousal = dfSounds[dfSounds.Number == number].ArousalMean.tolist()
    
    return [sum(valence)/len(valence), sum(arousal)/len(arousal)] if valence else [np.nan, np.nan]

def applySpecificCondition(condition, i):
    if (condition[1] == '-' and i == 0) or (condition[3] == '-' and i == 1):
        return -1
    else:
        return 1


    
def getValence(array):
    return array[0]

def getArousal(array):
    return array[1]
    
dfStudy['ActualReaction'] = dfStudy.Response.apply(func = convertResponseStringIntoColumns)

dfStudy['ExpectedImageReaction'] = dfStudy.IAPS.apply(findExpectedImageReaction)
dfStudy['ExpectedSoundReaction'] = dfStudy.Number.apply(findExpectedSoundReaction)
       
dfStudy['ImageCondition'] = dfStudy.SpecificCondition.apply(applySpecificCondition, i = 0)
dfStudy['SoundCondition'] = dfStudy.SpecificCondition.apply(applySpecificCondition, i = 1)


dfStudy['ImageValence'] = dfStudy.ExpectedImageReaction.apply(getValence)
dfStudy['ImageArousal'] = dfStudy.ExpectedImageReaction.apply(getArousal)
dfStudy['SoundValence'] = dfStudy.ExpectedSoundReaction.apply(getValence)
dfStudy['SoundArousal'] = dfStudy.ExpectedSoundReaction.apply(getArousal)
dfStudy['ActualValence'] = dfStudy.ActualReaction.apply(getValence)
dfStudy['ActualArousal'] = dfStudy.ActualReaction.apply(getArousal)

df = dfStudy[['ImageCondition',
              'ImageValence',
              'ImageArousal',
              'SoundCondition',
              'SoundValence',
              'SoundArousal',
              'ActualValence',
              'ActualArousal'
             ]]
df = df.dropna()
df.head()


Unnamed: 0,ImageCondition,ImageValence,ImageArousal,SoundCondition,SoundValence,SoundArousal,ActualValence,ActualArousal
0,-1,1.91,5.6,1,6.47,7.32,0.843677,1.573651
3,1,6.27,6.06,-1,2.04,6.87,-4.317224,1.926666
4,1,5.89,6.21,-1,1.63,7.79,-0.771283,2.72818
6,1,6.44,7.07,-1,2.22,7.52,0.085718,3.900079
7,1,6.22,6.62,-1,1.65,7.61,-3.004779,3.631299


## Regression Model



In [4]:


reg = LinearRegression()
reg.fit(df[['ImageCondition', 'ImageValence', 'ImageArousal',
            'SoundCondition', 'SoundValence', 'SoundArousal']],
        df[['ActualValence', 'ActualArousal']])
print('Image Condition ImageValence ImageArousal SoundCondition SoundValence SoundArousal || ActualValence || ActualArousal')
print(reg.coef_)

reg.fit(df[['ImageValence', 'ImageArousal',
            'SoundValence', 'SoundArousal']],
        df[['ActualValence', 'ActualArousal']])

print('ImageValence ImageArousal SoundValence SoundArousal || ActualValence || ActualArousal')
print(reg.coef_)

reg.fit(df[['ImageCondition', 'ImageValence',
            'SoundCondition', 'SoundValence']],
        df['ActualValence'])

print('ImageCondition ImageValence SoundCondition SoundValence || ActualValence')
print(reg.coef_)

reg.fit(df[['ImageValence',
            'SoundValence']],
        df['ActualValence'])

print('ImageValence SoundValence || ActualValence')
print(reg.coef_)

reg.fit(df[['ImageCondition', 'ImageArousal',
            'SoundCondition', 'SoundArousal']],
        df['ActualArousal'])

print('ImageCondition ImageArousal SoundCondition SoundArousal || ActualArousal')
print(reg.coef_)

reg.fit(df[['ImageArousal',
            'SoundArousal']],
        df['ActualArousal'])

print('ImageArousal SoundArousal || ActualArousal')
print(reg.coef_)





Image Condition ImageValence ImageArousal SoundCondition SoundValence SoundArousal || ActualValence || ActualArousal
[[ 4.51016952 -0.59146393  0.82401674 -4.51016952  3.06597205  0.23227946]
 [-0.53123328 -0.14294641  0.42247949  0.53123328 -0.48367879  0.70934449]]
ImageValence ImageArousal SoundValence SoundArousal || ActualValence || ActualArousal
[[ 1.70079038  0.8088175   1.18945502  0.17757559]
 [-0.41294109  0.42426974 -0.262652    0.71578783]]
ImageCondition ImageValence SoundCondition SoundValence || ActualValence
[ 3.319829   -0.05649586 -3.319829    2.49296978]
ImageValence SoundValence || ActualValence
[1.63568656 1.11984701]
ImageCondition ImageArousal SoundCondition SoundArousal || ActualArousal
[-0.22222942  0.45323944  0.22222942  0.89041624]
ImageArousal SoundArousal || ActualArousal
[0.57706722 0.6971077 ]
