# Statistical analysis

In [None]:
#Import Statements
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "sans-serif"
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import chardet
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.formula.api import ols
from statannot import add_stat_annotation
from statsmodels.stats.anova import AnovaRM

sns.set() # Setting seaborn as default style
sns.set_theme()

In [None]:
#read in source csv
df = pd.read_csv (r'Database_AllParticipants_av_experiment_122522_2021-04-16_08h48.38_8b7284ca-9e90-11eb-a72a-ac1f6b405aea.csv',
                  encoding='utf-8-sig')
df.info()
print([*df])

## Statistical Analysis

In [541]:
df_no_na = df[df['choice_response.rt'].notna()]

In [542]:
# define dependent variable
rt = df_no_na['choice_response.rt']

In [544]:
condition = sm.add_constant(condition)

In [545]:
results = ols(data = df_no_na, formula = 'rt ~ condition').fit() 

results.params

Intercept       1.727009
condition[0]    1.727009
condition[1]   -0.255677
condition[2]    0.113647
condition[3]   -0.337089
condition[4]    0.054083
dtype: float64

In [546]:
results.tvalues

Intercept       23.107873
condition[0]    23.107873
condition[1]    -1.222513
condition[2]     0.542997
condition[3]    -1.614156
condition[4]     0.259166
dtype: float64

In [547]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                     rt   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     1.856
Date:                Fri, 16 Apr 2021   Prob (F-statistic):              0.116
Time:                        14:39:48   Log-Likelihood:                -3941.8
No. Observations:                1645   AIC:                             7894.
Df Residuals:                    1640   BIC:                             7921.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept        1.7270      0.075     23.108   

In [548]:
print('Parameters: ', results.params)
print('R2: ', results.rsquared)
print('Standard errors: ', results.bse)
print('Predicted values: ', results.predict())

Parameters:  Intercept       1.727009
condition[0]    1.727009
condition[1]   -0.255677
condition[2]    0.113647
condition[3]   -0.337089
condition[4]    0.054083
dtype: float64
R2:  0.004506018828513092
Standard errors:  Intercept       0.074737
condition[0]    0.074737
condition[1]    0.209140
condition[2]    0.209295
condition[3]    0.208833
condition[4]    0.208680
dtype: float64
Predicted values:  [3.56766467 3.50810073 3.11692874 ... 3.45401784 3.50810073 3.56766467]


### T test

In [549]:
# Hypothesis =  condition 0 and 1 are the same
t_test = results.t_test('condition[0] = condition[1], condition[0] = condition[3], condition[0] = condition[2], condition[0]')
print(t_test)              

                             Test for Constraints                             
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
c0             1.9827      0.268      7.406      0.000       1.458       2.508
c1             2.0641      0.267      7.717      0.000       1.539       2.589
c2             1.6134      0.268      6.024      0.000       1.088       2.139
c3             1.7270      0.075     23.108      0.000       1.580       1.874


### Outlier test

Returns either an ndarray or a DataFrame if labels is not None. Will attempt to get labels from model_results if available. The columns are the Studentized residuals, the unadjusted p-value, and the corrected p-value according to method.


In [550]:
results.outlier_test(method = 'bonferroni')

Unnamed: 0,student_resid,unadj_p,bonferroni(p)
198,0.489585,0.624493,1.0
199,0.419303,0.675050,1.0
200,0.469826,0.638542,1.0
201,1.474559,0.140523,1.0
212,0.684089,0.494016,1.0
...,...,...,...
2161,-0.626593,0.531013,1.0
2162,-0.086027,0.931456,1.0
2163,-0.759383,0.447733,1.0
2164,-0.308159,0.758001,1.0


## ANOVA RM

In [592]:
aovrm = AnovaRM(df, 
                depvar = 'choice_response.rt', 
                subject = 'participant', 
                within=['cond'],
                #between = ['group'],
               aggregate_func = 'mean')
res = aovrm.fit()

print(res)

              Anova
     F Value Num DF  Den DF Pr > F
----------------------------------
cond  1.5266 4.0000 36.0000 0.2151



## Anova LM

In [593]:

table = sm.stats.anova_lm(results, typ=2) # Type 2 ANOVA DataFrame

print(table)

                 sum_sq      df           F  PR(>F)
condition  18713.766446     5.0  528.448577     0.0
Residual   11615.350401  1640.0         NaN     NaN


## Accuracy in Choice Trial