In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from IPython.display import HTML, display

## Data loading

In [2]:


dfImages = pd.read_csv('../data/IAPS.csv',
                       sep=';',
                       usecols=['IAPS', 'ValenceMean', 'ArousalMean']
                      )



dfSounds = pd.read_csv('../data/IADS2.csv', sep=';', usecols=['Number', 'ValenceMean', 'ArousalMean'])


studyDatafiles = glob.glob('../data/procedura/*.txt')

li = []

for filename in studyDatafiles:
    df = pd.read_csv(filename, sep='\t', header=None, names=['ID', 'nn', 'Condition', 'SpecificCondition', 'Number', 'IAPS', 'Widget', 'Response', 'Delay', 'ApperanceTimestamp'])
    li.append(df)
                         
dfStudy = pd.concat(li, axis=0, ignore_index=True)

dfStudy = dfStudy[dfStudy.Condition != 'con']
dfStudy = dfStudy[dfStudy.Widget != 'emoscale1']
                                     


## Preprocessing

In [3]:


dfStudy['Number'] = pd.to_numeric(dfStudy['Number'])



def convertResponseStringIntoColumns(string):
    string = string[1:-1]
    array = (string.split(','))
    array[0] = float(array[0]) * 10
    array[1] = float(array[1][1:]) * 10
    return array

def findExpectedImageReaction(iaps):
    valence = dfImages[dfImages.IAPS == str(iaps)].ValenceMean.tolist()
    arousal = dfImages[dfImages.IAPS == str(iaps)].ArousalMean.tolist()
    
    return [valence.pop(), arousal.pop()] if valence else [np.nan, np.nan]

def findExpectedSoundReaction(number):
    valence = dfSounds[dfSounds.Number == number].ValenceMean.tolist()
    arousal = dfSounds[dfSounds.Number == number].ArousalMean.tolist()
    
    return [valence.pop(), arousal.pop()] if valence else [np.nan, np.nan]

def applySpecificCondition(row, df):
    if row.SpecificCondition[1] == '-':
        row.ExpectedImageReaction = [i * -1 for i in row.ExpectedImageReaction]
    if row.SpecificCondition[3] == '-':
        row.ExpectedSoundReaction = [i * -1 for i in row.ExpectedSoundReaction]
    df.loc[row.name] = row

def getValence(array):
    return array[0]

def getArousal(array):
    return array[1]
    
dfStudy['ActualReaction'] = dfStudy.Response.apply(func = convertResponseStringIntoColumns)

dfStudy['ExpectedImageReaction'] = dfStudy.IAPS.apply(findExpectedImageReaction)
dfStudy['ExpectedSoundReaction'] = dfStudy.Number.apply(findExpectedSoundReaction)
       
dfStudy.apply(applySpecificCondition, axis=1, df = dfStudy)

dfStudy['ImageValence'] = dfStudy.ExpectedImageReaction.apply(getValence)
dfStudy['ImageArousal'] = dfStudy.ExpectedImageReaction.apply(getArousal)
dfStudy['SoundValence'] = dfStudy.ExpectedSoundReaction.apply(getValence)
dfStudy['SoundArousal'] = dfStudy.ExpectedSoundReaction.apply(getArousal)
dfStudy['ActualValence'] = dfStudy.ActualReaction.apply(getValence)
dfStudy['ActualArousal'] = dfStudy.ActualReaction.apply(getArousal)

df = dfStudy[['ImageValence',
              'ImageArousal',
              'SoundValence',
              'SoundArousal',
              'ActualValence',
              'ActualArousal'
             ]]

df.head()

Unnamed: 0,ImageValence,ImageArousal,SoundValence,SoundArousal,ActualValence,ActualArousal
6,6.44,7.07,-2.22,-7.52,0.0,-8.871155
7,-2.26,-6.55,6.77,6.32,-8.892059,0.0
8,5.89,6.21,-1.63,-7.79,-6.720917,-6.839478
9,6.83,5.4,-2.89,-6.91,-6.523761,6.662744
10,-2.95,-5.91,7.65,7.12,-8.489929,0.0


## Regression Model



In [4]:
regressionModelImage = ols("ImageValence ~ ActualValence", data=df).fit()
regressionModelSummaryImage = regressionModelImage.summary()

HTML(
(regressionModelSummaryImage
    .as_html()
    .replace('<th>  Adj. R-squared:    </th>', '<th style="background-color:#aec7e8;"> Adj. R-squared: </th>')
    .replace('<th>coef</th>', '<th style="background-color:#ffbb78;">coef</th>')
    .replace('<th>std err</th>', '<th style="background-color:#c7e9c0;">std err</th>')
    .replace('<th>P>|t|</th>', '<th style="background-color:#bcbddc;">P>|t|</th>')
    .replace('<th>[0.025</th>    <th>0.975]</th>', '<th style="background-color:#ff9896;">[0.025</th>    <th style="background-color:#ff9896;">0.975]</th>'))
)


0,1,2,3
Dep. Variable:,ImageValence,R-squared:,0.093
Model:,OLS,Adj. R-squared:,0.092
Method:,Least Squares,F-statistic:,277.2
Date:,"Mon, 10 Jun 2019",Prob (F-statistic):,2.4200000000000003e-59
Time:,13:16:02,Log-Likelihood:,-7852.6
No. Observations:,2715,AIC:,15710.0
Df Residuals:,2713,BIC:,15720.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.9806,0.089,33.573,0.000,2.806,3.155
ActualValence,0.2909,0.017,16.650,0.000,0.257,0.325

0,1,2,3
Omnibus:,16145.213,Durbin-Watson:,2.017
Prob(Omnibus):,0.0,Jarque-Bera (JB):,272.417
Skew:,-0.135,Prob(JB):,7e-60
Kurtosis:,1.472,Cond. No.,5.41


In [5]:
regressionModelSound = ols("SoundValence ~ ActualValence", data=df).fit()
regressionModelSummarySound = regressionModelSound.summary()

HTML(
(regressionModelSummarySound
    .as_html()
    .replace('<th>  Adj. R-squared:    </th>', '<th style="background-color:#aec7e8;"> Adj. R-squared: </th>')
    .replace('<th>coef</th>', '<th style="background-color:#ffbb78;">coef</th>')
    .replace('<th>std err</th>', '<th style="background-color:#c7e9c0;">std err</th>')
    .replace('<th>P>|t|</th>', '<th style="background-color:#bcbddc;">P>|t|</th>')
    .replace('<th>[0.025</th>    <th>0.975]</th>', '<th style="background-color:#ff9896;">[0.025</th>    <th style="background-color:#ff9896;">0.975]</th>'))
)

0,1,2,3
Dep. Variable:,SoundValence,R-squared:,0.098
Model:,OLS,Adj. R-squared:,0.097
Method:,Least Squares,F-statistic:,298.1
Date:,"Mon, 10 Jun 2019",Prob (F-statistic):,1.62e-63
Time:,13:16:02,Log-Likelihood:,-8131.3
No. Observations:,2760,AIC:,16270.0
Df Residuals:,2758,BIC:,16280.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.3199,0.093,14.147,0.000,1.137,1.503
ActualValence,-0.3154,0.018,-17.267,0.000,-0.351,-0.280

0,1,2,3
Omnibus:,17903.7,Durbin-Watson:,2.021
Prob(Omnibus):,0.0,Jarque-Bera (JB):,263.204
Skew:,0.115,Prob(JB):,7.01e-58
Kurtosis:,1.505,Cond. No.,5.46


In [None]:
## Less than 1% >>