In [None]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.anova import AnovaRM
from statsmodels.stats.multitest import multipletests
from scipy import stats
from itertools import combinations
from google.colab import files

# Upload the dataset
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
df = pd.read_csv(file_path)

# Filter for EPN component only
df_epn = df[df['COMPONENT'] == 'EPN']

# Ensure categorical variables are properly defined
df_epn['EMOTION'] = df_epn['EMOTION'].astype('category')
df_epn['PRONOUN'] = df_epn['PRONOUN'].astype('category')
df_epn['BLOCK'] = df_epn['BLOCK'].astype('category')
df_epn['ELEC_LR'] = df_epn['ELEC_LR'].astype('category')
df_epn['File'] = df_epn['File'].astype('category')  # Ensure File is categorical

# Aggregate data to ensure one observation per subject per condition
agg_func = {'PEAK': 'mean'}  # Compute mean PEAK per subject per condition
groupby_cols = ['File', 'EMOTION', 'PRONOUN', 'BLOCK', 'ELEC_LR']
df_epn_agg = df_epn.groupby(groupby_cols).agg(agg_func).reset_index()

# Run Repeated Measures ANOVA
rm_anova = AnovaRM(
    data=df_epn_agg,
    depvar='PEAK',
    subject='File',
    within=['EMOTION', 'PRONOUN', 'BLOCK', 'ELEC_LR']
).fit()

# Display the repeated-measures ANOVA table with significance markers
anova_results = rm_anova.anova_table
anova_results['Significance'] = anova_results['Pr > F'].apply(lambda p: '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else 'n.s.')
print("Repeated Measures ANOVA Results:")
print(anova_results)

# Post-hoc analysis for all significant main effects and interactions
significant_factors = [factor for factor in anova_results.index if anova_results.loc[factor, 'Pr > F'] < 0.05]
posthoc_results = []

for factor in significant_factors:
    print(f"Performing pairwise comparisons for {factor} with Bonferroni correction")
    if ':' in factor:
        # Interaction term handling
        factor_components = factor.split(':')
        df_epn_agg[factor] = df_epn_agg[factor_components].apply(lambda x: '_'.join(x.astype(str)), axis=1)

    levels = df_epn_agg[factor].unique()
    comparisons = list(combinations(levels, 2))
    p_values = []
    pairs = []
    for (level1, level2) in comparisons:
        group1 = df_epn_agg[df_epn_agg[factor] == level1]['PEAK']
        group2 = df_epn_agg[df_epn_agg[factor] == level2]['PEAK']
        t_stat, p_value = stats.ttest_rel(group1, group2)
        p_values.append(p_value)
        pairs.append(f"{level1} vs {level2}")

    # Apply Bonferroni correction
    reject, p_corrected, _, _ = multipletests(p_values, method='bonferroni')
    posthoc_df = pd.DataFrame({
        'Comparison': pairs,
        'p-Value': p_values,
        'p-Value (Bonferroni)': p_corrected,
        'Significance': ['***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else 'n.s.' for p in p_corrected]
    })
    posthoc_results.append(posthoc_df)
    print(posthoc_df)

# Save post-hoc results
if posthoc_results:
    posthoc_combined = pd.concat(posthoc_results, ignore_index=True)
    posthoc_table_path = "/content/posthoc_comparisons_apa.csv"
    posthoc_combined.to_csv(posthoc_table_path, index=False)
    print(f"Post-hoc results saved as: {posthoc_table_path}")
    files.download(posthoc_table_path)

# Save ANOVA table in APA format
apa_table_path = "/content/repeated_measures_anova_apa.csv"
apa_table = anova_results.rename(columns={
    'F Value': 'F',
    'Num DF': 'df1',
    'Den DF': 'df2',
    'Pr > F': 'p'
})
apa_table.to_csv(apa_table_path, index=True)
print(f"APA formatted ANOVA table saved as: {apa_table_path}")
files.download(apa_table_path)

Saving ERPadded_electrodes_final_E3_two_languages_P1-N1-EPN_active-passive.csv to ERPadded_electrodes_final_E3_two_languages_P1-N1-EPN_active-passive.csv


  df_epn_agg = df_epn.groupby(groupby_cols).agg(agg_func).reset_index()


UnboundLocalError: cannot access local variable 'key' where it is not associated with a value