# Comparing each of the 6 models to the baseline 

In [None]:
import pickle
import numpy as np
from collections import Counter
from sklearn.metrics import accuracy_score
from statsmodels.stats.contingency_tables import mcnemar

# ----------------------- Helper functions -----------------------
def load_pickle(path):
    with open(path, "rb") as f:
        return pickle.load(f)

def get_baseline_preds(y_true):
    majority_class = Counter(y_true).most_common(1)[0][0]
    return [majority_class] * len(y_true)

def run_baseline_comparison(y_true, y_pred, model_name):
    baseline_preds = get_baseline_preds(y_true)

    model_acc = accuracy_score(y_true, y_pred)
    baseline_acc = accuracy_score(y_true, baseline_preds)

    # McNemar contingency table
    table = np.zeros((2, 2))  # Rows: baseline correct/incorrect, Cols: model correct/incorrect
    for true, pred_model, pred_base in zip(y_true, y_pred, baseline_preds):
        correct_model = int(pred_model == true)
        correct_base = int(pred_base == true)
        table[correct_base, correct_model] += 1

    p_value = mcnemar(table, exact=True).pvalue

    print(f"\n {model_name}")
    print(f"Baseline Accuracy: {baseline_acc:.3f}")
    print(f"McNemar's p-value: {p_value:.40f}")

# ----------------------- Load and compare all models -----------------------
model_paths = {
    "RF EC": ("y_test_EC_rf.pkl", "test_preds_EC_rf.pkl"),
    "RF EO": ("y_test_EC_rf.pkl", "y_pred_EO_rf.pkl"),
    "RF Random": ("y_test_EC_rf.pkl", "test_preds_random_rf.pkl"),
    "MLR EC": ("y_test_EC_rf.pkl", "test_preds_EC_mr.pkl"),
    "MLR EO": ("y_test_EC_rf.pkl", "test_preds_EO_mr.pkl"),
    "MLR Random": ("y_test_EC_rf.pkl", "test_preds_random_mr.pkl"),
}

for name, (y_path, pred_path) in model_paths.items():
    y_true = load_pickle(y_path)
    y_pred = load_pickle(pred_path)
    run_baseline_comparison(y_true, y_pred, name)
