In [2]:
# Day 4 — Unweighted ablations: LR and HGB across 5 feature sets
# Outputs are saved under Data/10_day_run/day4_*

import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef, make_scorer

In [3]:
# ---------- Load unweighted dataset ----------
df = pd.read_csv("Data/10_day_run/merged_dataset.csv")
df["Week"] = pd.to_datetime(df["Week"], errors="coerce")
df = df.sort_values("Week").reset_index(drop=True)

In [4]:
# ---------- Target: ±1% weekly move ----------
df["Return_protocol"] = df["Close"].pct_change()
delta = 0.01
def to_label(r):
    if pd.isna(r): return np.nan
    if r >  delta: return  1
    if r < -delta: return -1
    return 0

df["Target_protocol"] = df["Return_protocol"].apply(to_label)
df = df.dropna(subset=["Target_protocol"]).reset_index(drop=True)
y = df["Target_protocol"].astype(int).copy()
labels_order = [-1, 0, 1]


In [5]:
# ---------- Rolling-origin expanding splits (same as Day 3) ----------
N = len(df)
initial_train_weeks = max(52, int(0.5 * N))
test_weeks         = max(16, int(0.1 * N))

splits = []
train_end = initial_train_weeks - 1
sid = 1
while True:
    te_start = train_end + 1
    te_end   = te_start + test_weeks - 1
    if te_start >= N: break
    if te_end >= N: te_end = N - 1
    splits.append((sid, 0, int(train_end), int(te_start), int(te_end)))
    sid += 1
    if te_end >= N - 1: break
    train_end = te_end

pd.DataFrame(splits, columns=["split_id","train_start","train_end","test_start","test_end"])\
  .to_csv("Data/10_day_run/day4_splits.csv", index=False)

In [6]:
# ---------- Unweighted feature families ----------
news_feats = [c for c in [
    "mean_news_sentiment","smoothed_news_sentiment","num_news_articles","low_coverage_week"
] if c in df.columns]

filings_feats = [c for c in [
    "sent_10k_mean","10q_mda_sent","10q_risk_sent","opt_vs_caut",
    "sent_8k_mean","count_10k","count_10q","count_8k"
] if c in df.columns]

market_feats = [c for c in ["Volume"] if c in df.columns]

In [7]:
# ---------- Lag builder ----------
def add_lags(df, cols, lags=(1,2)):
    out = df.copy()
    for L in lags:
        for c in cols:
            if c in out.columns:
                out[f"{c}_lag{L}"] = out[c].shift(L)
    return out

df_feats = df.copy()
for fam in [news_feats, filings_feats, market_feats]:
    df_feats = add_lags(df_feats, fam, lags=(1,2))

In [8]:
# ---------- Design matrices for each feature set ----------
def build_X(df, base_cols):
    cols = []
    for c in base_cols:
        if c in df.columns:
            cols.append(c)
            for L in (1,2):
                cl = f"{c}_lag{L}"
                if cl in df.columns: cols.append(cl)
    cols = list(dict.fromkeys(cols))  # de-dup
    return df[cols].copy(), cols

X_news,   cols_news   = build_X(df_feats, news_feats)
X_filings,cols_filings= build_X(df_feats, filings_feats)
X_market, cols_market = build_X(df_feats, market_feats)
X_nf,     cols_nf     = build_X(df_feats, news_feats + filings_feats)
X_nfv,    cols_nfv    = build_X(df_feats, news_feats + filings_feats + market_feats)

In [9]:
feature_sets = {
    "news_only": X_news,
    "filings_only": X_filings,
    "market_only": X_market,
    "news_plus_filings": X_nf,
    "news_filings_volume": X_nfv,
}

In [10]:
print("Rows:", N, "Splits:", len(splits))
for name, X in feature_sets.items():
    print(f"{name:<22} -> {X.shape[1]} features")

Rows: 239 Splits: 6
news_only              -> 12 features
filings_only           -> 24 features
market_only            -> 3 features
news_plus_filings      -> 36 features
news_filings_volume    -> 39 features


## Metrics, bootstrap CIs, paired tests vs Random-Walk

In [11]:
# ---------- Metrics, bootstrap, significance vs RW ----------

from math import fabs
from scipy.stats import chi2, norm


In [22]:
def mcc_scorer():
    return make_scorer(matthews_corrcoef)

def _onehot_matrix(y_true, classes):
    """Version-safe one-hot (handles both sparse_output and sparse kw)."""
    try:
        oh = OneHotEncoder(categories=[classes], sparse_output=False, handle_unknown="ignore")
    except TypeError:
        oh = OneHotEncoder(categories=[classes], sparse=False, handle_unknown="ignore")
    return oh.fit_transform(y_true.reshape(-1, 1))

def brier_multiclass(y_true, proba, classes):
    Y = _onehot_matrix(y_true, classes)
    return np.mean(np.sum((proba - Y)**2, axis=1))

def reorder_proba(proba, est_classes, target_order=(-1, 0, 1)):
    """Reorder proba columns to match target_order."""
    idx = [list(est_classes).index(c) for c in target_order]
    return proba[:, idx]

def metric_tuple(y_true, y_pred, y_proba, classes):
    acc  = accuracy_score(y_true, y_pred)
    mcc  = matthews_corrcoef(y_true, y_pred)
    f1w  = f1_score(y_true, y_pred, average="weighted", labels=classes, zero_division=0)
    f1m  = f1_score(y_true, y_pred, average="macro",   labels=classes, zero_division=0)
    bri  = brier_multiclass(y_true, y_proba, classes) if y_proba is not None else np.nan
    return acc, f1w, f1m, mcc, bri

def mbb_indices(n, block_len=10, rng=None):
    if rng is None:
        rng = np.random.default_rng()
    k = int(np.ceil(n / block_len))
    starts = rng.integers(low=0, high=n, size=k)
    idx = []
    for s in starts:
        idx.extend([(s+i) % n for i in range(block_len)])
    return np.array(idx[:n], dtype=int)

def pooled_with_ci(preds_df, block_len=10, B=2000, seed=42):
    y_true = preds_df["y_true"].astype(int).to_numpy()
    y_pred = preds_df["y_pred"].astype(int).to_numpy()
    proba_cols = [c for c in preds_df.columns if c.startswith("p_")]
    proba = preds_df[proba_cols].to_numpy() if len(proba_cols)==3 else None
    n = len(y_true)
    rng = np.random.default_rng(seed)
    boot = []
    for _ in range(B):
        idx = mbb_indices(n, block_len=block_len, rng=rng)
        boot.append(metric_tuple(y_true[idx], y_pred[idx], None if proba is None else proba[idx], labels_order))
    boot = np.array(boot)
    pt = np.array(metric_tuple(y_true, y_pred, proba, labels_order))
    lo = np.percentile(boot, 2.5, axis=0); hi = np.percentile(boot, 97.5, axis=0)
    return pd.DataFrame({
        "Metric": ["Accuracy","Weighted F1","Macro F1","MCC","Brier"],
        "Point": pt, "CI Lower (95%)": lo, "CI Upper (95%)": hi, "n_weeks": [n]*5
    })

In [13]:
# Random-Walk predictions (Day 1)
rw = pd.read_csv("Data/10_day_run/rw_predictions_all.csv")

In [15]:
def eval_vs_rw(model_preds_csv):
    model_df = pd.read_csv(model_preds_csv)
    merged = pd.merge(
        rw.rename(columns={"y_pred":"y_pred_rw"}),
        model_df[["Week","split_id","y_pred","y_true"]],
        on=["Week","split_id","y_true"], how="inner"
    ).rename(columns={"y_pred":"y_pred_model"})
    y_true = merged["y_true"].astype(int).to_numpy()
    y_rw   = merged["y_pred_rw"].astype(int).to_numpy()
    y_m    = merged["y_pred_model"].astype(int).to_numpy()

    hit_rw = (y_rw == y_true).astype(int)
    hit_m  = (y_m  == y_true).astype(int)
    n01 = int(((hit_rw == 0) & (hit_m == 1)).sum())  # RW wrong, model right
    n10 = int(((hit_rw == 1) & (hit_m == 0)).sum())  # RW right, model wrong
    if (n01 + n10) == 0:
        mcnemar = {"n01": n01, "n10": n10, "chi2": 0.0, "p_value": 1.0}
    else:
        chi2_stat = (fabs(n01 - n10) - 1)**2 / (n01 + n10)
        mcnemar = {"n01": n01, "n10": n10, "chi2": chi2_stat, "p_value": float(1 - chi2.cdf(chi2_stat, 1))}
        
    # Diebold–Mariano on 0–1 loss, h=1 → lag=0
    loss_m  = (y_m  != y_true).astype(int)
    loss_rw = (y_rw != y_true).astype(int)
    d = loss_m - loss_rw
    T = len(d); d_bar = d.mean()
    var = d.var(ddof=0) / T
    dm_stat = d_bar / (np.sqrt(var) + 1e-12)
    dm = {"dm_stat": float(dm_stat), "p_value": float(2 * (1 - norm.cdf(abs(dm_stat)))), "mean_diff": float(d_bar), "T": int(T)}
    return mcnemar, dm, len(merged)

## Model trainers and ablation runner (LR + HGB)

In [23]:
# ---------- Trainers ----------
def fit_predict_lr(X_tr, y_tr, X_te):
    from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import matthews_corrcoef, make_scorer

    def mcc_scorer(): 
        return make_scorer(matthews_corrcoef)

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("clf", LogisticRegression(max_iter=2000, multi_class="multinomial"))
    ])
    grid = {"clf__C": [0.3, 1.0, 3.0], "clf__class_weight": [None, "balanced"]}
    gs = GridSearchCV(pipe, grid, scoring=mcc_scorer(), cv=TimeSeriesSplit(n_splits=3),
                      n_jobs=-1, refit=True)
    gs.fit(X_tr, y_tr)
    best = gs.best_estimator_
    raw_proba = best.predict_proba(X_te)
    est_classes = best.named_steps["clf"].classes_
    proba = reorder_proba(raw_proba, est_classes, target_order=(-1,0,1))
    y_hat = np.array([[-1,0,1][i] for i in proba.argmax(axis=1)])
    return y_hat, proba, gs.best_params_

In [24]:
def fit_predict_hgb(X_tr, y_tr, X_te):
    from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
    from sklearn.ensemble import HistGradientBoostingClassifier
    from sklearn.calibration import CalibratedClassifierCV
    from sklearn.metrics import matthews_corrcoef, make_scorer

    def mcc_scorer(): 
        return make_scorer(matthews_corrcoef)

    base = HistGradientBoostingClassifier(random_state=42)
    grid = {
        "max_depth": [3, None],
        "learning_rate": [0.1],
        "max_iter": [300],
        "min_samples_leaf": [20],
        "l2_regularization": [0.0],
    }
    gs = GridSearchCV(base, grid, scoring=mcc_scorer(), cv=TimeSeriesSplit(n_splits=3),
                      n_jobs=-1, refit=True)
    gs.fit(X_tr, y_tr)
    best = gs.best_estimator_
    calib = CalibratedClassifierCV(best, method="isotonic", cv=TimeSeriesSplit(n_splits=3))
    calib.fit(X_tr, y_tr)
    raw_proba = calib.predict_proba(X_te)
    est_classes = calib.classes_
    proba = reorder_proba(raw_proba, est_classes, target_order=(-1,0,1))
    y_hat = np.array([[-1,0,1][i] for i in proba.argmax(axis=1)])
    return y_hat, proba, gs.best_params_

In [25]:
# ---------- Run ablation for a (model, feature_set) ----------
def run_feature_ablation(model_name, trainer, feature_name, X):
    results, preds_records, best_rows = [], [], []
    for sid, tr0, tr1, te0, te1 in splits:
        X_tr = X.iloc[tr0:tr1+1].copy(); y_tr = y.iloc[tr0:tr1+1].copy()
        X_te = X.iloc[te0:te1+1].copy(); y_te = y.iloc[te0:te1+1].copy()

        # Drop lag-induced NaNs
        tr_mask = ~X_tr.isna().any(axis=1)
        te_mask = ~X_te.isna().any(axis=1)
        X_tr, y_tr = X_tr[tr_mask], y_tr[tr_mask]
        X_te, y_te = X_te[te_mask], y_te[te_mask]
        if len(X_tr) < 30 or len(X_te) < 5: 
            continue

        y_hat, proba, best_params = trainer(X_tr, y_tr, X_te)
        acc, f1w, f1m, mcc, bri = metric_tuple(y_te.values, y_hat, proba, labels_order)

        results.append({
            "model": model_name, "feature_set": feature_name, "split_id": sid, "test_size": len(y_te),
            "Accuracy": acc, "Weighted F1": f1w, "Macro F1": f1m, "MCC": mcc, "Brier": bri
        })

        tmp = pd.DataFrame({"Week": df.loc[te0:te1, "Week"].values[te_mask.values],
                            "split_id": sid, "y_true": y_te.values, "y_pred": y_hat})
        for j, c in enumerate(labels_order):
            tmp[f"p_{c}"] = proba[:, j]
        preds_records.append(tmp)

        best_rows.append({"model": model_name, "feature_set": feature_name, "split_id": sid, **best_params})

    mdf = pd.DataFrame(results)
    pdf = pd.concat(preds_records, ignore_index=True) if preds_records else pd.DataFrame()
    bdf = pd.DataFrame(best_rows)

    base = f"Data/10_day_run/day4_{model_name}_{feature_name}"
    mdf.to_csv(base + "_per_split.csv", index=False)
    pdf.to_csv(base + "_preds.csv", index=False)
    bdf.to_csv(base + "_best_params.csv", index=False)

    # Pooled CIs + significance vs RW
    if not pdf.empty:
        pooled = pooled_with_ci(pdf, block_len=10, B=2000, seed=42)
        pooled.insert(0, "model", model_name)
        pooled.insert(1, "feature_set", feature_name)
        pooled.to_csv(base + "_pooled_ci.csv", index=False)

        mc, dm, n_aligned = eval_vs_rw(base + "_preds.csv")
    else:
        pooled = pd.DataFrame(); mc = {}; dm = {}; n_aligned = 0

    return mdf, pdf, bdf, pooled, mc, dm, n_aligned

## Run the ablations and produce compact summary tables

In [26]:
# ---------- Run: LR and HGB × 5 feature sets ----------
feature_mats = {
    "news_only": X_news,
    "filings_only": X_filings,
    "market_only": X_market,
    "news_plus_filings": X_nf,
    "news_filings_volume": X_nfv,
}

summary_rows = []
mcc_rows = []


In [None]:
for model_name, trainer in [
    ("LR",  fit_predict_lr),
    ("HGB", fit_predict_hgb),
]:
    for feat_name, X in feature_mats.items():
        print(f"Running {model_name} × {feat_name} ...")
        mdf, pdf, bdf, pooled, mc, dm, n = run_feature_ablation(model_name, trainer, feat_name, X)
        # Significance summary
        summary_rows.append({
            "model": model_name, "feature_set": feat_name, "aligned_weeks": n,
            "McNemar_n01": mc.get("n01", np.nan), "McNemar_n10": mc.get("n10", np.nan),
            "McNemar_p": mc.get("p_value", np.nan),
            "DM_stat": dm.get("dm_stat", np.nan), "DM_p": dm.get("p_value", np.nan),
            "DM_mean_loss_diff": dm.get("mean_diff", np.nan)
        })
        # MCC compact table
        if not pooled.empty:
            m = pooled[pooled["Metric"]=="MCC"].iloc[0]
            mcc_rows.append({
                "model": model_name, "feature_set": feat_name,
                "Point": m["Point"], "CI Lower (95%)": m["CI Lower (95%)"], "CI Upper (95%)": m["CI Upper (95%)"],
                "n_weeks": m["n_weeks"]
            })

In [28]:
summary_df = pd.DataFrame(summary_rows)
mcc_table  = pd.DataFrame(mcc_rows)

In [29]:
summary_df.to_csv("Data/10_day_run/day4_summary_significance.csv", index=False)
mcc_table.to_csv("Data/10_day_run/day4_mcc_comparison.csv", index=False)

print("Saved:")
print("Data/10_day_run/day4_summary_significance.csv")
print("Data/10_day_run/day4_mcc_comparison.csv")

Saved:
Data/10_day_run/day4_summary_significance.csv
Data/10_day_run/day4_mcc_comparison.csv


In [30]:
summary_df

Unnamed: 0,model,feature_set,aligned_weeks,McNemar_n01,McNemar_n10,McNemar_p,DM_stat,DM_p,DM_mean_loss_diff
0,LR,news_only,120,25,32,0.426777,0.930512,0.352106,0.058333
1,LR,filings_only,120,29,35,0.531971,0.751764,0.452193,0.05
2,LR,market_only,120,31,26,0.596242,-0.66348,0.507023,-0.041667
3,LR,news_plus_filings,120,28,36,0.381574,1.004193,0.315286,0.066667
4,LR,news_filings_volume,120,28,33,0.608548,0.64128,0.521341,0.041667
5,HGB,news_only,120,26,34,0.366157,1.037417,0.299542,0.066667
6,HGB,filings_only,120,23,34,0.185326,1.470046,0.141549,0.091667
7,HGB,market_only,120,25,36,0.200415,1.420193,0.155552,0.091667
8,HGB,news_plus_filings,120,27,35,0.374003,1.020399,0.307539,0.066667
9,HGB,news_filings_volume,120,24,31,0.418492,0.947403,0.343433,0.058333


In [31]:
mcc_table

Unnamed: 0,model,feature_set,Point,CI Lower (95%),CI Upper (95%),n_weeks
0,LR,news_only,-0.043054,-0.173905,0.094212,120
1,LR,filings_only,-0.02382,-0.145022,0.102527,120
2,LR,market_only,0.080077,-0.04482,0.192079,120
3,LR,news_plus_filings,-0.055214,-0.153734,0.040854,120
4,LR,news_filings_volume,-0.029851,-0.133759,0.078052,120
5,HGB,news_only,-0.085468,-0.19505,0.027178,120
6,HGB,filings_only,-0.149956,-0.251317,-0.034757,120
7,HGB,market_only,-0.11513,-0.233177,0.027814,120
8,HGB,news_plus_filings,-0.127649,-0.209639,-0.024097,120
9,HGB,news_filings_volume,-0.099405,-0.216193,0.018686,120


## Results

In [34]:
# Day 4 — Deep-dive on the two selected ablation rows (LR × market_only vs LR × news_filings_volume)
# We'll compute confusion matrices, align to Random-Walk, and build simple reliability tables.

import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, matthews_corrcoef

# Load inputs
mkt_pooled  = pd.read_csv("Data/10_day_run/day4_LR_market_only_pooled_ci.csv")
nfv_pooled  = pd.read_csv("Data/10_day_run/day4_LR_news_filings_volume_pooled_ci.csv")
mkt_preds   = pd.read_csv("Data/10_day_run/day4_LR_market_only_preds.csv")
nfv_preds   = pd.read_csv("Data/10_day_run/day4_LR_news_filings_volume_preds.csv")

rw = pd.read_csv("Data/10_day_run/rw_predictions_all.csv")

# Ensure consistent dtypes
for df in (mkt_preds, nfv_preds, rw):
    df["Week"] = pd.to_datetime(df["Week"], errors="coerce")
    if "y_true" in df.columns:
        df["y_true"] = df["y_true"].astype(int)
    if "y_pred" in df.columns:
        df["y_pred"] = df["y_pred"].astype(int)

# Helper: pooled metrics from preds (sanity check)
labels_order = [-1,0,1]
def pooled_metrics_from_preds(df):
    y_true = df["y_true"].to_numpy()
    y_pred = df["y_pred"].to_numpy()
    acc  = accuracy_score(y_true, y_pred)
    mcc  = matthews_corrcoef(y_true, y_pred)
    f1w  = f1_score(y_true, y_pred, average="weighted", labels=labels_order, zero_division=0)
    f1m  = f1_score(y_true, y_pred, average="macro", labels=labels_order, zero_division=0)
    return pd.Series({"Accuracy":acc,"Weighted F1":f1w,"Macro F1":f1m,"MCC":mcc,"n":len(df)})

mkt_pm = pooled_metrics_from_preds(mkt_preds)
nfv_pm = pooled_metrics_from_preds(nfv_preds)

# Confusion matrices
def cm_df(df, labels=(-1,0,1)):
    cm = confusion_matrix(df["y_true"], df["y_pred"], labels=labels)
    return pd.DataFrame(cm, index=[f"true_{l}" for l in labels], columns=[f"pred_{l}" for l in labels])

mkt_cm = cm_df(mkt_preds)
nfv_cm = cm_df(nfv_preds)

# Align each model to RW on the exact same weeks and compute RW metrics
def aligned_rw_metrics(model_preds):
    merged = pd.merge(
        rw.rename(columns={"y_pred":"y_pred_rw"}),
        model_preds[["Week","split_id","y_true"]],
        on=["Week","split_id","y_true"],
        how="inner"
    )
    y_true = merged["y_true"].astype(int).to_numpy()
    y_rw   = merged["y_pred_rw"].astype(int).to_numpy()
    return {
        "n_aligned": len(merged),
        "RW_Accuracy": accuracy_score(y_true, y_rw),
        "RW_WeightedF1": f1_score(y_true, y_rw, average="weighted", labels=labels_order, zero_division=0),
        "RW_MacroF1": f1_score(y_true, y_rw, average="macro", labels=labels_order, zero_division=0),
        "RW_MCC": matthews_corrcoef(y_true, y_rw),
    }

mkt_rw = aligned_rw_metrics(mkt_preds)
nfv_rw = aligned_rw_metrics(nfv_preds)

# Reliability tables: decile bins for each class's predicted probability
def reliability_table(df, cls, n_bins=10):
    p = df[[f"p_{c}" for c in (-1,0,1)]].to_numpy()[:, [(-1,0,1).index(cls)][0]]
    y = (df["y_true"].to_numpy() == cls).astype(int)
    bins = np.linspace(0,1,n_bins+1)
    mids = 0.5*(bins[:-1]+bins[1:])
    idx = np.digitize(p, bins)-1
    rows = []
    for b in range(n_bins):
        mask = idx==b
        n = int(mask.sum())
        if n==0:
            rows.append({"bin_mid":mids[b],"n":0,"mean_pred":np.nan,"emp_freq":np.nan})
        else:
            rows.append({
                "bin_mid": mids[b],
                "n": n,
                "mean_pred": float(p[mask].mean()),
                "emp_freq": float(y[mask].mean())
            })
    return pd.DataFrame(rows)

mkt_rel_up   = reliability_table(mkt_preds, 1)
mkt_rel_neu  = reliability_table(mkt_preds, 0)
mkt_rel_down = reliability_table(mkt_preds, -1)

nfv_rel_up   = reliability_table(nfv_preds, 1)
nfv_rel_neu  = reliability_table(nfv_preds, 0)
nfv_rel_down = reliability_table(nfv_preds, -1)

# Package key summaries to display
summary_rows = [
    {
        "model":"LR", "feature_set":"market_only",
        **mkt_pm.to_dict(),
        **{f"RW_{k.split('_')[1]}":v for k,v in mkt_rw.items() if k!="n_aligned"},
        "aligned_weeks": mkt_rw["n_aligned"]
    },
    {
        "model":"LR", "feature_set":"news_filings_volume",
        **nfv_pm.to_dict(),
        **{f"RW_{k.split('_')[1]}":v for k,v in nfv_rw.items() if k!="n_aligned"},
        "aligned_weeks": nfv_rw["n_aligned"]
    },
]
summary = pd.DataFrame(summary_rows)

# Save everything for the report
mkt_cm.to_csv("Data/10_day_run/day4_lr_market_only_confusion_matrix.csv", index=True)
nfv_cm.to_csv("Data/10_day_run/day4_lr_news_filings_volume_confusion_matrix.csv", index=True)

mkt_rel_up.to_csv("Data/10_day_run/day4_lr_market_only_reliability_up.csv", index=False)
mkt_rel_neu.to_csv("Data/10_day_run/day4_lr_market_only_reliability_neutral.csv", index=False)
mkt_rel_down.to_csv("Data/10_day_run/day4_lr_market_only_reliability_down.csv", index=False)

nfv_rel_up.to_csv("Data/10_day_run/day4_lr_news_filings_volume_reliability_up.csv", index=False)
nfv_rel_neu.to_csv("Data/10_day_run/day4_lr_news_filings_volume_reliability_neutral.csv", index=False)
nfv_rel_down.to_csv("Data/10_day_run/day4_lr_news_filings_volume_reliability_down.csv", index=False)

# Return quick dict with core numbers so you can copy into Results
{
 "LR_market_only": {"pooled": mkt_pm.to_dict(), **mkt_rw},
 "LR_news_filings_volume": {"pooled": nfv_pm.to_dict(), **nfv_rw}
}


{'LR_market_only': {'pooled': {'Accuracy': 0.4166666666666667,
   'Weighted F1': 0.37254765984063526,
   'Macro F1': 0.33772266188028616,
   'MCC': 0.0800769866871827,
   'n': 120.0},
  'n_aligned': 120,
  'RW_Accuracy': 0.375,
  'RW_WeightedF1': 0.37526197364191577,
  'RW_MacroF1': 0.37487022712817253,
  'RW_MCC': np.float64(0.04812781537265903)},
 'LR_news_filings_volume': {'pooled': {'Accuracy': 0.3333333333333333,
   'Weighted F1': 0.33045045514557714,
   'Macro F1': 0.3153243925601649,
   'MCC': -0.02985085940311298,
   'n': 120.0},
  'n_aligned': 120,
  'RW_Accuracy': 0.375,
  'RW_WeightedF1': 0.37526197364191577,
  'RW_MacroF1': 0.37487022712817253,
  'RW_MCC': np.float64(0.04812781537265903)}}