In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pingouin as pg
import sys
from scipy.stats import shapiro, levene
import pickle

In [2]:
# load file with plotting stlye definitions
sys.path.append('../')
plt.style.use('plotting_style.mpltstyle')
sns.set_style('whitegrid')

In [3]:
import warnings

# Disable future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Descriptive Statistics depending on Gender

In [4]:
# load the data of each trial (including moves, reward, RT, time out, ID of performed sequence of actions)
df = pd.read_csv('../data_empirical/data_cleaned.csv', header=0)

dfDemographics = pd.read_csv('../data_empirical/demographic_data_IDs.csv')

In [5]:
dfDemographics

Unnamed: 0,Record_ID,Age,Gender,file_name,Participant_ID
0,1,19,weiblich,1634746180.csv,8
1,3,35,weiblich,1634753718.csv,36
2,5,25,weiblich,1634893289.csv,32
3,6,24,weiblich,1635170782.csv,0
4,7,27,weiblich,1634933833.csv,24
...,...,...,...,...,...
65,131,23,weiblich,1636031635.csv,40
66,133,18,männlich,1636044587.csv,58
67,134,26,männlich,1636070864.csv,34
68,136,31,weiblich,1636152235.csv,42


In [8]:
dfDemographics['Age'].groupby(dfDemographics['Gender']).agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
männlich,25.5,5.246553
weiblich,23.48,4.277325


In [6]:
# add DAS indicator column to df
IdxDAS = 4 # set DAS index
df['DAS'] = np.where(df['sequence_ID']==IdxDAS, 1, 0)

# set RT and DAS of time out trials to NaN
df.loc[df['time_out']==1, ['RT', 'DAS']] = np.NaN

# add columns that code for the halfves and segments of the experiment
df['half'] = pd.cut(df['blockN'], 2, labels=np.arange(2))
df['segment'] = pd.cut(df['blockN'], 4, labels=np.arange(4))

# select the measures
measures = ['DAS', 'reward', 'RT', 'time_out']

In [7]:
dfStats = pd.DataFrame()
dfStats['ID'] = dfDemographics['Participant_ID']
dfStats['gender'] = dfDemographics['Gender']
dfStats.sort_values('ID', inplace=True)
dfStats.set_index('ID', inplace=True)

In [8]:
dfMeans = df[measures].groupby(df['Participant_ID']).mean().round(2).reset_index()

dfStats['p(DAS)'] = dfMeans['DAS']
dfStats['reward'] = dfMeans['reward']
dfStats['RT'] = dfMeans['RT']
dfStats['time_outs'] = df['time_out'].groupby(df['Participant_ID']).sum()

In [9]:
dfStats.groupby('gender').agg(['mean', 'std']).round(2)

Unnamed: 0_level_0,p(DAS),p(DAS),reward,reward,RT,RT,time_outs,time_outs
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std
gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
männlich,0.57,0.17,82.69,5.79,1481.13,274.59,3.3,2.74
weiblich,0.53,0.19,79.89,5.93,1755.47,415.56,5.32,3.89


### p(DAS)

In [10]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='weiblich', 'p(DAS)'])
print(stats)
print(p)

0.9620251590465091
0.10808058730649966


In [11]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='männlich', 'p(DAS)'])
print(stats)
print(p)

0.9265710946889408
0.13262881898168333


In [12]:
stats, p = levene(dfStats.loc[dfStats['gender']=='männlich', 'p(DAS)'], dfStats.loc[dfStats['gender']=='weiblich', 'p(DAS)'])
print(stats)
print(p)

0.51524311611413
0.475336765614812


In [13]:
pg.ttest(x=dfStats.loc[dfStats['gender']=='weiblich', 'p(DAS)'],
         y=dfStats.loc[dfStats['gender']=='männlich', 'p(DAS)'])

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.813594,39.542595,two-sided,0.42075,"[-0.13, 0.06]",0.203805,0.353,0.118245


### Reward

In [14]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='weiblich', 'reward'])
print(stats)
print(p)

0.8991672691247901
0.00045105845653684723


In [15]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='männlich', 'reward'])
print(stats)
print(p)

0.7793868448000035
0.000432523429335665


In [16]:
stats, p = levene(dfStats.loc[dfStats['gender']=='männlich', 'reward'], dfStats.loc[dfStats['gender']=='weiblich', 'reward'])
print(stats)
print(p)

0.5393225551124053
0.46523853258559533


In [26]:
pg.mwu(x=dfStats.loc[dfStats['gender']=='weiblich', 'reward'],
         y=dfStats.loc[dfStats['gender']=='männlich', 'reward'])

Unnamed: 0,U-val,alternative,p-val,RBC,CLES
MWU,302.0,two-sided,0.01024,0.396,0.302


### Reaction Time (RT)

In [18]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='weiblich', 'RT'])
print(stats)
print(p)

0.8987823399721637
0.000438107177584692


In [19]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='männlich', 'RT'])
print(stats)
print(p)

0.8725364031228169
0.01302287545946309


In [20]:
stats, p = levene(dfStats.loc[dfStats['gender']=='männlich', 'RT'], dfStats.loc[dfStats['gender']=='weiblich', 'RT'])
print(stats)
print(p)

2.2437369588252967
0.1387843292195007


In [24]:
pg.mwu(x=dfStats.loc[dfStats['gender']=='weiblich', 'RT'].to_numpy(),
         y=dfStats.loc[dfStats['gender']=='männlich', 'RT'].to_numpy())

Unnamed: 0,U-val,alternative,p-val,RBC,CLES
MWU,723.0,two-sided,0.00382,-0.446,0.723


### Time Outs

In [22]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='weiblich', 'time_outs'])
print(stats)
print(p)

0.9408818100282254
0.014601528865072532


In [23]:
stats, p = shapiro(dfStats.loc[dfStats['gender']=='männlich', 'time_outs'])
print(stats)
print(p)

0.9161057514679756
0.08339821828287507


In [21]:
stats, p = levene(dfStats.loc[dfStats['gender']=='männlich', 'time_outs'], dfStats.loc[dfStats['gender']=='weiblich', 'time_outs'])
print(stats)
print(p)

3.0026781631719905
0.08766016530733838


In [25]:
pg.mwu(x=dfStats.loc[dfStats['gender']=='weiblich', 'time_outs'],
         y=dfStats.loc[dfStats['gender']=='männlich', 'time_outs'])

Unnamed: 0,U-val,alternative,p-val,RBC,CLES
MWU,654.5,two-sided,0.043799,-0.309,0.6545


In [56]:
# print date of last changes and version numbers
%load_ext watermark

%watermark -n -u -v -iv -w

Last updated: Tue Dec 10 2024

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.20.0

matplotlib: 3.8.4
numpy     : 1.25.2
sys       : 3.11.0 | packaged by conda-forge | (main, Jan 15 2023, 05:44:48) [Clang 14.0.6 ]
pandas    : 2.2.1
seaborn   : 0.12.2
pingouin  : 0.5.4

Watermark: 2.3.1

