In [None]:
# remember to pip install any of these that you need. seaborn and pingouin, I think, right?

import pandas as pd
import seaborn as sns
import pingouin as pg
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings('ignore')

# Experiment 1

In [None]:
df_Ex1 = pd.read_csv('https://raw.githubusercontent.com/ethanweed/ExPsyLing/master/Notebooks/ANOVA/data_Experiment_1.csv')
#df_Ex1 = df_Ex1[df_Ex1['duration'] < 1296.708]
df_Ex1.head()

In [None]:
ax = sns.pointplot(data = df_Ex1, x = 'soa_condition', y = 'duration', hue = 'congruence')  # make the plot
ax.set(xlabel='Stimulus Onset Asynchrony', ylabel='RT (milliseconds)')                  # give the axes more understandable labels
plt.legend(frameon = False)                                                             # remove the box and move the legend so it doesn't obscure the data
sns.despine()

In [None]:
df_Ex1_agg = df_Ex1.groupby(['ID', 'congruence', 'soa_condition']).mean().reset_index()    # get aggregated data (RT means per participant per condition)
df_Ex1_agg

In [None]:
order = ['short', 'long']  
ax = sns.boxplot(data = df_Ex1_agg, x = 'soa_condition', y = 'duration', hue = 'congruence', order=order)  # make the plot
ax.set(xlabel='Stimulus Onset Asynchrony', ylabel='RT (milliseconds)')                  # give the axes more understandable labels
plt.legend(frameon = False)                                                             # remove the box and move the legend so it doesn't obscure the data
sns.despine()   

In [None]:
Q1 = df_Ex1_agg['duration'].quantile(0.25)
Q3 = df_Ex1_agg['duration'].quantile(0.75)
    
IQR = Q3 - Q1

print('Q1:', Q1)
print('Q3:', Q3)
print('IQR:', IQR)
print('Outliers:', Q1 - (1.5 * IQR), Q3 + (1.5 * IQR))

In [None]:

short_cong = df_Ex1_agg.loc[((df_Ex1_agg['soa_condition'] == 'short') & (df_Ex1_agg['congruence'] == 'congruent'))]['duration']
short_incong = df_Ex1_agg.loc[((df_Ex1_agg['soa_condition'] == 'short') & (df_Ex1_agg['congruence'] == 'incongruent'))]['duration']

print('congruent:', short_cong.mean())
print('incongruent:', short_incong.mean())
print('difference:', short_incong.mean() - short_cong.mean())

pg.ttest(short_cong, short_incong, paired=True)


In [None]:
ax = sns.pointplot(data = df_Ex1, x = 'soa_condition', y = 'duration', hue = 'congruence')  # make the plot
ax.set(xlabel='Stimulus Onset Asynchrony', ylabel='RT (milliseconds)')                  # give the axes more understandable labels
plt.legend(frameon = False)                                                             # remove the box and move the legend so it doesn't obscure the data
sns.despine()

In [None]:
# Family-wise error rate

alpha = 0.05
number_of_compairisons = 1

fwer = 1-((1-alpha)**number_of_compairisons)
print('Family-wise error rate:', round(fwer, 4))

In [None]:
# Repeated-measures ANOVA
# Note! pg.rm_anova wants the FULL dataset, not the aggregated one!

pg.rm_anova(data=df_Ex1, dv='duration', within=['congruence', 'soa_condition'], subject='ID').round(2)

We conducted a repeated-measures 2-way analysis of variance. There was a significant main effect of congruence (F(1,30) = 12.18, p = 0.0001). There was no main effect of soa length (F(1,30) = 0.23, p = 0.63). There was a significant interaction between congruence and soa length (F(1,30 = 8.26, p = 0.01)).

# Experiment 2

In [None]:
import pandas as pd
import seaborn as sns
import pingouin as pg
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
df_Ex2 = pd.read_csv('https://raw.githubusercontent.com/ethanweed/ExPsyLing/master/Notebooks/ANOVA/data_Experiment_2.csv')
df_Ex2.head()

In [None]:
# some preprocessing

df = df_Ex2[df_Ex2['sender'] == 'Stimulus']                         # get only rows with RT data
df = df[df['correct'] == True]                                      # get only correct responses

data = df[['sender', 'url', 'duration', 'condition']].copy()        # get only the columns we need

data = data[data['condition'] != 'Filler']                          # remove filler trials

data = data[data['duration'] < 2000]                                # remove outliers

data_agg = data.groupby(['url', 'condition']).mean().reset_index()    # get aggregated data (RT means per participant per condition)


data_agg.head()

In [None]:
# plot the data

order = ['Nonword', 'Unrelated', 'Related']
sns.pointplot(data = data_agg, x='condition', y = 'duration', order=order)
sns.stripplot(data = data_agg, x='condition', y = 'duration', order=order)

In [None]:
# remember, pingouin wants the full dataset for ANOVA
pg.rm_anova(data = XXX, dv = XXX, within = XXX, subject = XXX)

In [None]:
# we need the aggregated dataset for t-tests
related = data_agg[data_agg['condition'] == 'Related']['duration']
unrelated = data_agg[data_agg['condition'] == 'Unrelated']['duration']

pg.ttest(related, unrelated, paired=True)