In [3]:
import pickle
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [5]:
with open("denoised_cv_results/models_denoising_accuracies.pkl", "rb") as f:
    models_denoising_accuracies = pickle.load(f)
with open("denoised_cv_results/models_denoising_f1_scores.pkl", "rb") as f:
    models_denoising_f1_score = pickle.load(f)

In [6]:
data = []

for model, denoise_dict in models_denoising_accuracies.items():
    for denoiser, accuracies in denoise_dict.items():
        for fold, acc in enumerate(accuracies):
            data.append({
                'model': model,
                'denoiser': denoiser,
                'fold': fold,
                'accuracy': acc
            })

df = pd.DataFrame(data)

anova_model = ols('accuracy ~ C(model) + C(denoiser) + C(model):C(denoiser)', data=df).fit()
anova_table = sm.stats.anova_lm(anova_model, typ=2)

print(anova_table)

                        sum_sq     df             F         PR(>F)
C(model)              0.516350    3.0   6223.460832  1.678917e-181
C(denoiser)           1.400575    4.0  12660.638235  2.437586e-219
C(model):C(denoiser)  0.371716   12.0   1120.054850  4.276723e-162
Residual              0.004978  180.0           NaN            NaN


In [8]:
with open("masking_cv_results/models_masks_accuracies.pkl", "rb") as f:
    models_masks_accuracies = pickle.load(f)
with open("masking_cv_results/models_masks_f1_scores.pkl", "rb") as f:
    models_masks_f1_scores = pickle.load(f)

In [9]:
data = []

for model, mask_dict in models_masks_accuracies.items():
    for mask, accuracies in mask_dict.items():
        for fold, acc in enumerate(accuracies):
            data.append({
                'model': model,
                'mask': mask,
                'fold': fold,
                'accuracy': acc
            })

df = pd.DataFrame(data)

anova_model = ols('accuracy ~ C(model) + C(mask) + C(model):C(mask)', data=df).fit()
anova_table = sm.stats.anova_lm(anova_model, typ=2)

print(anova_table)

                    sum_sq     df          F        PR(>F)
C(model)          0.001408    3.0  14.885652  3.533272e-08
C(mask)           0.000451    2.0   7.159453  1.202898e-03
C(model):C(mask)  0.000307    6.0   1.623951  1.473782e-01
Residual          0.003404  108.0        NaN           NaN
