# Two-way ANOVA evaluting effect of data set and model type

### Two way ANOVA with interaction term

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#  LOAD AND COMBINE SUBJECT-LEVEL SCORES 
paths = [
    "subject_scores_rf_ec.csv",
    "subject_scores_rf_eo.csv",
    "subject_scores_rf_random.csv",
    "subject_scores_mlr_ec.csv",
    "subject_scores_mlr_eo.csv",
    "subject_scores_mlr_random.csv"
]

dfs = [pd.read_csv(path) for path in paths]
df_combined = pd.concat(dfs, ignore_index=True)

# Ensure categorical types
df_combined["model_type"] = df_combined["model_type"].astype("category")
df_combined["data_type"] = df_combined["data_type"].astype("category")

# ------------ TWO-WAY ANOVA ------------
model = ols('score ~ C(model_type) + C(data_type) + C(model_type):C(data_type)', data=df_combined).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("\n Two-Way ANOVA Results:\n")
print(anova_table)

# ------------ OPTIONAL: INTERACTION PLOT ------------
plt.figure(figsize=(10, 6))
palette = {"RFC": "green", "MLR": "blue"}
sns.pointplot(data=df_combined, x="data_type", y="score", hue="model_type", errorbar=('ci', 95), dodge=True, markers=["o", "s"], capsize=.1, palette=palette)
plt.title("Mean Accuracy by Model and Data Type")
plt.xlabel("EEG epoch types")
plt.ylabel("Accuracy Score")
plt.grid(True)
plt.tight_layout()
plt.show()


### Two-way ANOVA without Interaction term

In [None]:
#  LOAD AND COMBINE SUBJECT-LEVEL SCORES 
paths = [
    "subject_scores_rf_ec.csv",
    "subject_scores_rf_eo.csv",
    "subject_scores_rf_random.csv",
    "subject_scores_mlr_ec.csv",
    "subject_scores_mlr_eo.csv",
    "subject_scores_mlr_random.csv"
]

dfs = [pd.read_csv(path) for path in paths]
df_combined = pd.concat(dfs, ignore_index=True)

# Ensure categorical types
df_combined["model_type"] = df_combined["model_type"].astype("category")
df_combined["data_type"] = df_combined["data_type"].astype("category")

#  TWO-WAY ANOVA 
model = ols('score ~ C(model_type) + C(data_type)', data=df_combined).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("\n Two-Way ANOVA Results:\n")
print(anova_table)

# INTERACTION PLOT
plt.figure(figsize=(10, 6))
palette = {"RFC": "green", "MLR": "blue"}
sns.pointplot(data=df_combined, x="data_type", y="score", hue="model_type", ci=95, dodge=True, markers=["o", "s"], capsize=.1, palette=palette)
plt.title("Mean Accuracy by Model and Data Type")
plt.xlabel("EEG epoch types")
plt.ylabel("Accuracy Score")
plt.grid(True)
plt.tight_layout()
plt.show()
