In [None]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.anova import AnovaRM
import scipy.stats as stats
from statsmodels.stats.multitest import multipletests
import itertools
from google.colab import files

# Manually upload dataset
print("Please upload a CSV file")
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
df = pd.read_csv(file_path)

# Filter for P100 component only
df = df[df['COMPONENT'] == 'N100']

# Select only the required electrodes
elec_list = ['O1', 'O2', 'PO7', 'PO8', 'PO3', 'PO4']
df = df[df['ELEC'].isin(elec_list)]

# Convert categorical variables to appropriate types
df['EMOTION'] = df['EMOTION'].astype('category')
df['PRONOUN'] = df['PRONOUN'].astype('category')
df['BLOCK'] = df['BLOCK'].astype('category')
df['ELEC_LR'] = df['ELEC_LR'].astype('category')
df['File'] = df['File'].astype('category')

# Aggregate data to ensure one observation per subject per condition
groupby_cols = ['File', 'EMOTION', 'PRONOUN', 'BLOCK', 'ELEC_LR']
agg_func = {'PEAK': 'mean'}  # Compute mean PEAK per subject per condition
df_agg = df.groupby(groupby_cols).agg(agg_func).reset_index()

# Perform repeated-measures ANOVA
anova_model = AnovaRM(df_agg, depvar='PEAK', subject='File', within=['EMOTION', 'PRONOUN', 'BLOCK', 'ELEC_LR']).fit()

# Save ANOVA results to CSV
anova_results_df = anova_model.anova_table.reset_index()
anova_results_df['Significant'] = anova_results_df['Pr > F'] < 0.05
anova_results_df.to_csv("anova_results.csv", index=False)

# Display ANOVA results
print("ANOVA Results:")
print(anova_results_df)

# Post-hoc pairwise comparisons
significant_factors = anova_results_df[anova_results_df['Significant'] == True]['index'].tolist()

tests = []
for factor in significant_factors:
    factors = factor.split(':')  # Handle interactions
    if len(factors) == 1:
        levels = df_agg[factor].unique()
        for level1, level2 in itertools.combinations(levels, 2):
            subset = df_agg[df_agg[factor].isin([level1, level2])]
            t_stat, p_value = stats.ttest_rel(subset.loc[df_agg[factor] == level1, 'PEAK'],
                                              subset.loc[df_agg[factor] == level2, 'PEAK'])
            tests.append((factor, level1, level2, t_stat, p_value))
    else:
        unique_combinations = df_agg.groupby(factors)['PEAK'].mean().reset_index()
        for (idx1, row1), (idx2, row2) in itertools.combinations(unique_combinations.iterrows(), 2):
            t_stat, p_value = stats.ttest_rel(df_agg[(df_agg[list(factors)] == row1[factors]).all(axis=1)]['PEAK'],
                                              df_agg[(df_agg[list(factors)] == row2[factors]).all(axis=1)]['PEAK'])
            tests.append((factor, tuple(row1[factors]), tuple(row2[factors]), t_stat, p_value))

# Convert results to DataFrame
posthoc_df = pd.DataFrame(tests, columns=['Factor', 'Level1', 'Level2', 'T-statistic', 'P-value'])

# Apply Bonferroni correction
if not posthoc_df.empty:
    posthoc_df['Corrected P-value'] = multipletests(posthoc_df['P-value'], method='bonferroni')[1]
    posthoc_df['Significant'] = posthoc_df['Corrected P-value'] < 0.05

    # Save post-hoc results to CSV
    posthoc_df.to_csv("posthoc_results.csv", index=False)

    # Display post-hoc results
    print("Post-hoc Comparisons:")
    print("Post-hoc Comparisons:")
    print(posthoc_df)

    # Provide download links for CSV files
    files.download("anova_results.csv")
    files.download("posthoc_results.csv")
else:
    print("No significant post-hoc comparisons found.")


Please upload a CSV file


Saving ERPadded_electrodes_final_E3_two_languages_P1-N1-EPN_active-passive.csv to ERPadded_electrodes_final_E3_two_languages_P1-N1-EPN_active-passive.csv


  df_agg = df.groupby(groupby_cols).agg(agg_func).reset_index()


ANOVA Results:
                            index    F Value  Num DF  Den DF    Pr > F  \
0                         EMOTION   0.511857     2.0    46.0  0.602755   
1                         PRONOUN   0.079969     1.0    23.0  0.779869   
2                           BLOCK  12.919826     1.0    23.0  0.001531   
3                         ELEC_LR   1.249772     1.0    23.0  0.275137   
4                 EMOTION:PRONOUN   2.150173     2.0    46.0  0.128026   
5                   EMOTION:BLOCK   0.546519     2.0    46.0  0.582674   
6                   PRONOUN:BLOCK   0.309599     1.0    23.0  0.583300   
7                 EMOTION:ELEC_LR   0.428286     2.0    46.0  0.654197   
8                 PRONOUN:ELEC_LR   0.088193     1.0    23.0  0.769153   
9                   BLOCK:ELEC_LR   0.314822     1.0    23.0  0.580162   
10          EMOTION:PRONOUN:BLOCK   1.311664     2.0    46.0  0.279256   
11        EMOTION:PRONOUN:ELEC_LR   0.301759     2.0    46.0  0.740969   
12          EMOTION:BLO

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>