In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from sklearn.linear_model import LinearRegression

## Data loading

In [2]:


dfImages = pd.read_csv('../data/IAPS.csv',
                       sep=';',
                       usecols=['IAPS', 'ValenceMean', 'ArousalMean']
                      )



dfSounds = pd.read_csv('../data/IADS2.csv', sep=';', usecols=['Number', 'ValenceMean', 'ArousalMean'])


studyDatafiles = glob.glob('../data/procedura/*.txt')

li = []

for filename in studyDatafiles:
    df = pd.read_csv(filename, sep='\t', header=None, names=['ID', 'nn', 'Condition', 'SpecificCondition', 'Number', 'IAPS', 'Widget', 'Response', 'Delay', 'ApperanceTimestamp'])
    li.append(df)
                         
dfStudy = pd.concat(li, axis=0, ignore_index=True)

dfStudy = dfStudy[dfStudy.Condition != 'con']
dfStudy = dfStudy[dfStudy.Widget != 'emoscale1']
                                     


## Preprocessing

In [None]:


dfStudy['Number'] = pd.to_numeric(dfStudy['Number'])



def convertResponseStringIntoColumns(string):
    string = string[1:-1]
    array = (string.split(','))
    array[0] = float(array[0]) * 10
    array[1] = float(array[1][1:]) * 10
    return array

def findExpectedImageReaction(iaps):
    valence = dfImages[dfImages.IAPS == str(iaps)].ValenceMean.tolist()
    arousal = dfImages[dfImages.IAPS == str(iaps)].ArousalMean.tolist()
    
    return [sum(valence)/len(valence), sum(arousal)/len(arousal)] if valence else [np.nan, np.nan]

def findExpectedSoundReaction(number):
    valence = dfSounds[dfSounds.Number == number].ValenceMean.tolist()
    arousal = dfSounds[dfSounds.Number == number].ArousalMean.tolist()
    
    return [valence.pop(), arousal.pop()] if valence else [np.nan, np.nan]

def applySpecificCondition(condition, i):
    if (condition[1] == '-' and i == 0) or (condition[3] == '-' and i == 1):
        return -1
    else:
        return 1


    
def getValence(array):
    return array[0]

def getArousal(array):
    return array[1]
    
dfStudy['ActualReaction'] = dfStudy.Response.apply(func = convertResponseStringIntoColumns)

dfStudy['ExpectedImageReaction'] = dfStudy.IAPS.apply(findExpectedImageReaction)
dfStudy['ExpectedSoundReaction'] = dfStudy.Number.apply(findExpectedSoundReaction)
       
dfStudy['ImageCondition'] = dfStudy.SpecificCondition.apply(applySpecificCondition, i = 0)
dfStudy['SoundCondition'] = dfStudy.SpecificCondition.apply(applySpecificCondition, i = 1)


dfStudy['ImageValence'] = dfStudy.ExpectedImageReaction.apply(getValence)
dfStudy['ImageArousal'] = dfStudy.ExpectedImageReaction.apply(getArousal)
dfStudy['SoundValence'] = dfStudy.ExpectedSoundReaction.apply(getValence)
dfStudy['SoundArousal'] = dfStudy.ExpectedSoundReaction.apply(getArousal)
dfStudy['ActualValence'] = dfStudy.ActualReaction.apply(getValence)
dfStudy['ActualArousal'] = dfStudy.ActualReaction.apply(getArousal)

df = dfStudy[['ImageCondition',
              'ImageValence',
              'ImageArousal',
              'SoundCondition',
              'SoundValence',
              'SoundArousal',
              'ActualValence',
              'ActualArousal'
             ]]
df = df.dropna()
df.head()


## Regression Model



In [None]:


reg = LinearRegression()
reg.fit(df[['ImageCondition', 'ImageValence', 'ImageArousal',
            'SoundCondition', 'SoundValence', 'SoundArousal']],
        df[['ActualValence', 'ActualArousal']])
print('Image Condition ImageValence ImageArousal SoundCondition SoundValence SoundArousal || ActualValence || ActualArousal')
print(reg.coef_)

reg.fit(df[['ImageValence', 'ImageArousal',
            'SoundValence', 'SoundArousal']],
        df[['ActualValence', 'ActualArousal']])

print('ImageValence ImageArousal SoundValence SoundArousal || ActualValence || ActualArousal')
print(reg.coef_)

reg.fit(df[['ImageCondition', 'ImageValence',
            'SoundCondition', 'SoundValence']],
        df['ActualValence'])

print('ImageCondition ImageValence SoundCondition SoundValence || ActualValence')
print(reg.coef_)

reg.fit(df[['ImageValence',
            'SoundValence']],
        df['ActualValence'])

print('ImageValence SoundValence || ActualValence')
print(reg.coef_)

reg.fit(df[['ImageCondition', 'ImageArousal',
            'SoundCondition', 'SoundArousal']],
        df['ActualArousal'])

print('ImageCondition ImageArousal SoundCondition SoundArousal || ActualArousal')
print(reg.coef_)

reg.fit(df[['ImageArousal',
            'SoundArousal']],
        df['ActualArousal'])

print('ImageArousal SoundArousal || ActualArousal')
print(reg.coef_)



