In [8]:
# ================================
# Minimal eval of HINT-trained models on CTOD and HINT
# ================================
from pathlib import Path
import joblib, numpy as np
from scipy import sparse as sp
import xgboost as xgb
from sklearn.metrics import average_precision_score, roc_auc_score, precision_recall_curve

PHASES = ["phase_I", "phase_II", "phase_III"]
MODEL_BASE = Path("/Users/antoniocortes/Tese/MyModel(hybrid)/xgb_model_package")
CTOD_ARTIFACTS = Path("/Users/antoniocortes/Tese/MyModel(hybrid)/ctod_xgb_artifacts")
HINT_ARTIFACTS = Path("/Users/antoniocortes/Tese/MyModel(hybrid)/hint_xgb_artifacts")

def load_hint_model(phase):
    mdir = MODEL_BASE / phase
    jb = mdir / "hint_xgb_model.joblib"
    if jb.exists(): return joblib.load(jb)
    booster = xgb.Booster(); booster.load_model(str(mdir / "xgb_model.json"))
    mdl = xgb.XGBClassifier(); mdl._Booster = booster; return mdl

def load_ctod_test(phase):
    d = CTOD_ARTIFACTS / phase
    return sp.load_npz(d / "X_test_joined_reduced.npz"), np.load(d / "y_test_joined.npy")

def load_hint_test(phase):
    d1 = HINT_ARTIFACTS / phase
    d2 = HINT_ARTIFACTS / "phases_reduced" / phase
    Xp = d1 / "X_test_reduced.npz" if (d1 / "X_test_reduced.npz").exists() else d2 / "X_test_reduced.npz"
    yp = d1 / "y_test.npy"         if (d1 / "y_test.npy").exists()         else d2 / "y_test.npy"
    return sp.load_npz(Xp), np.load(yp)

def best_f1_from_pr(y, p):
    prec, rec, _ = precision_recall_curve(y, p)
    f1 = 2*prec*rec/(prec+rec+1e-9)
    return float(np.nanmax(f1[:-1])) if f1.size else np.nan

def quick_report(tag, y, p):
    ap = average_precision_score(y, p); roc = roc_auc_score(y, p); f1b = best_f1_from_pr(y, p)
    print(f"{tag}: AP={ap:.4f} | ROC={roc:.4f} | F1*={f1b:.4f} | pos={int((y==1).sum())} neg={int((y==0).sum())}")

our_ctod = {}
our_hint = {}

for phase in PHASES:
    # CTOD
    Xc, yc = load_ctod_test(phase); mdl = load_hint_model(phase)
    pc = mdl.predict_proba(Xc)[:,1]; our_ctod[phase] = (yc, pc)
    quick_report(f"CTOD/{phase}", yc, pc)

    # HINT
    Xh, yh = load_hint_test(phase)
    ph = mdl.predict_proba(Xh)[:,1]; our_hint[phase] = (yh, ph)
    quick_report(f"HINT/{phase}", yh, ph)

print("Done.")


CTOD/phase_I: AP=0.9616 | ROC=0.8673 | F1*=0.9335 | pos=4146 neg=876
HINT/phase_I: AP=0.9653 | ROC=0.9227 | F1*=0.9418 | pos=897 neg=263
CTOD/phase_II: AP=0.9157 | ROC=0.8329 | F1*=0.9152 | pos=5140 neg=1598
HINT/phase_II: AP=0.8256 | ROC=0.8329 | F1*=0.8639 | pos=863 neg=586
CTOD/phase_III: AP=0.9378 | ROC=0.8030 | F1*=0.9392 | pos=3745 neg=720
HINT/phase_III: AP=0.8797 | ROC=0.7991 | F1*=0.9017 | pos=641 neg=252
Done.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
# ===============================================
# Minimal Unified Comparison (CTOD & HINT) in %
# No mean columns
# ===============================================
import numpy as np, pandas as pd
from IPython.display import display, Markdown
from sklearn.metrics import average_precision_score, roc_auc_score, precision_recall_curve

PHASES = ["phase_I", "phase_II", "phase_III"]
METRICS = ("PR (AP)", "F1 (Best)", "ROC AUC")

# Expect from eval step:
# our_ctod = {phase: (y, proba)}, our_hint = {phase: (y, proba)}

def best_f1(y, p):
    pr, rc, _ = precision_recall_curve(y, p)
    f1 = 2*pr*rc/(pr+rc+1e-9)
    return float(np.nanmax(f1[:-1])) if f1.size else np.nan

def pct_metrics(y, p):
    return {
        "PR (AP)" : 100*average_precision_score(y, p),
        "F1 (Best)": 100*best_f1(y, p),
        "ROC AUC" : 100*roc_auc_score(y, p),
    }

def metrics_from_ours(phase_to_y_p):
    return {ph: pct_metrics(*phase_to_y_p[ph]) for ph in PHASES}

def make_df(model_results):
    # model_results: {model_name: {phase: {metric: val}}}
    cols = pd.MultiIndex.from_product([PHASES, METRICS])
    rows = {}
    for name, by_phase in model_results.items():
        row = {}
        for ph in PHASES:
            for m in METRICS:
                row[(ph, m)] = by_phase[ph][m]
        rows[name] = row
    return pd.DataFrame.from_dict(rows, orient="index")[cols]

def style_bold(df):
    def _bold(col):
        mx = col.max(); return ['font-weight:700' if v==mx else '' for v in col]
    return df.style.format("{:.2f}").apply(_bold, axis=0)

def winners_only(df):
    out = pd.DataFrame(index=METRICS, columns=PHASES)
    for ph in PHASES:
        for m in METRICS:
            out.loc[m, ph] = df[(ph, m)].idxmax()
    return out

# --- Our Model (computed live) ---
our_ctod_metrics = metrics_from_ours(our_ctod)
our_hint_metrics = metrics_from_ours(our_hint)

# --- Carolina & Baselines (already in %) ---
carolina_hint = {
    "phase_I":{"PR (AP)":86.88,"F1 (Best)":87.50,"ROC AUC":69.14},
    "phase_II":{"PR (AP)":68.22,"F1 (Best)":75.98,"ROC AUC":58.74},
    "phase_III":{"PR (AP)":77.75,"F1 (Best)":80.27,"ROC AUC":56.28},
}
baseline_hint_hint = {
    "phase_I":{"PR (AP)":79.70,"F1 (Best)":87.10,"ROC AUC":54.70},
    "phase_II":{"PR (AP)":61.20,"F1 (Best)":74.40,"ROC AUC":52.70},
    "phase_III":{"PR (AP)":74.80,"F1 (Best)":83.40,"ROC AUC":56.10},
}
baseline_lr_hint = {
    "phase_I":{"PR (AP)":78.10,"F1 (Best)":87.40,"ROC AUC":51.70},
    "phase_II":{"PR (AP)":60.30,"F1 (Best)":74.50,"ROC AUC":48.50},
    "phase_III":{"PR (AP)":72.40,"F1 (Best)":83.90,"ROC AUC":50.50},
}
baseline_mlp_hint = {
    "phase_I":{"PR (AP)":77.60,"F1 (Best)":87.20,"ROC AUC":51.80},
    "phase_II":{"PR (AP)":62.10,"F1 (Best)":74.50,"ROC AUC":52.30},
    "phase_III":{"PR (AP)":71.70,"F1 (Best)":83.50,"ROC AUC":49.60},
}
baseline_xgb_hint = {
    "phase_I":{"PR (AP)":77.40,"F1 (Best)":87.20,"ROC AUC":48.40},
    "phase_II":{"PR (AP)":58.70,"F1 (Best)":74.30,"ROC AUC":52.70},
    "phase_III":{"PR (AP)":72.10,"F1 (Best)":83.50,"ROC AUC":51.30},
}

carolina_ctod = {
    "phase_I":{"PR (AP)":91.58,"F1 (Best)":86.01,"ROC AUC":77.52},
    "phase_II":{"PR (AP)":85.57,"F1 (Best)":82.18,"ROC AUC":63.31},
    "phase_III":{"PR (AP)":91.15,"F1 (Best)":90.30,"ROC AUC":72.91},
}
baseline_hint_ctod = {
    "phase_I":{"PR (AP)":83.10,"F1 (Best)":84.70,"ROC AUC":66.40},
    "phase_II":{"PR (AP)":77.20,"F1 (Best)":80.50,"ROC AUC":58.40},
    "phase_III":{"PR (AP)":83.00,"F1 (Best)":85.40,"ROC AUC":67.50},
}
baseline_lr_ctod = {
    "phase_I":{"PR (AP)":85.60,"F1 (Best)":83.90,"ROC AUC":70.10},
    "phase_II":{"PR (AP)":80.80,"F1 (Best)":80.70,"ROC AUC":61.00},
    "phase_III":{"PR (AP)":84.10,"F1 (Best)":85.20,"ROC AUC":69.30},
}
baseline_mlp_ctod = {
    "phase_I":{"PR (AP)":86.00,"F1 (Best)":85.50,"ROC AUC":70.30},
    "phase_II":{"PR (AP)":78.40,"F1 (Best)":81.90,"ROC AUC":61.80},
    "phase_III":{"PR (AP)":85.60,"F1 (Best)":88.30,"ROC AUC":71.50},
}
baseline_xgb_ctod = {
    "phase_I":{"PR (AP)":85.80,"F1 (Best)":84.20,"ROC AUC":74.50},
    "phase_II":{"PR (AP)":80.20,"F1 (Best)":82.60,"ROC AUC":61.90},
    "phase_III":{"PR (AP)":85.10,"F1 (Best)":88.90,"ROC AUC":72.40},
}

ctod_models = {
    "Our Model": our_ctod_metrics,
    "Carolina — LIFTED/MMCTO": carolina_ctod,
    "Baseline HINT": baseline_hint_ctod,
    "Baseline LR": baseline_lr_ctod,
    "Baseline MLP": baseline_mlp_ctod,
    "Baseline XGB": baseline_xgb_ctod,
}
hint_models = {
    "Our Model": our_hint_metrics,
    "Carolina — LIFTED/MMCTO": carolina_hint,
    "Baseline HINT": baseline_hint_hint,
    "Baseline LR": baseline_lr_hint,
    "Baseline MLP": baseline_mlp_hint,
    "Baseline XGB": baseline_xgb_hint,
}

df_ctod = make_df(ctod_models)
df_hint = make_df(hint_models)

display(Markdown("## CTOD — Metrics (%)"))
display(style_bold(df_ctod))
display(Markdown("#### CTOD — Winners"))
display(winners_only(df_ctod))

display(Markdown("---"))

display(Markdown("## HINT — Metrics (%)"))
display(style_bold(df_hint))
display(Markdown("#### HINT — Winners"))
display(winners_only(df_hint))



## CTOD — Metrics (%)

Unnamed: 0_level_0,phase_I,phase_I,phase_I,phase_II,phase_II,phase_II,phase_III,phase_III,phase_III
Unnamed: 0_level_1,PR (AP),F1 (Best),ROC AUC,PR (AP),F1 (Best),ROC AUC,PR (AP),F1 (Best),ROC AUC
Our Model,96.16,93.35,86.73,91.57,91.52,83.29,93.78,93.92,80.3
Carolina — LIFTED/MMCTO,91.58,86.01,77.52,85.57,82.18,63.31,91.15,90.3,72.91
Baseline HINT,83.1,84.7,66.4,77.2,80.5,58.4,83.0,85.4,67.5
Baseline LR,85.6,83.9,70.1,80.8,80.7,61.0,84.1,85.2,69.3
Baseline MLP,86.0,85.5,70.3,78.4,81.9,61.8,85.6,88.3,71.5
Baseline XGB,85.8,84.2,74.5,80.2,82.6,61.9,85.1,88.9,72.4


#### CTOD — Winners

Unnamed: 0,phase_I,phase_II,phase_III
PR (AP),Our Model,Our Model,Our Model
F1 (Best),Our Model,Our Model,Our Model
ROC AUC,Our Model,Our Model,Our Model


---

## HINT — Metrics (%)

Unnamed: 0_level_0,phase_I,phase_I,phase_I,phase_II,phase_II,phase_II,phase_III,phase_III,phase_III
Unnamed: 0_level_1,PR (AP),F1 (Best),ROC AUC,PR (AP),F1 (Best),ROC AUC,PR (AP),F1 (Best),ROC AUC
Our Model,96.53,94.18,92.27,82.56,86.39,83.29,87.97,90.17,79.91
Carolina — LIFTED/MMCTO,86.88,87.5,69.14,68.22,75.98,58.74,77.75,80.27,56.28
Baseline HINT,79.7,87.1,54.7,61.2,74.4,52.7,74.8,83.4,56.1
Baseline LR,78.1,87.4,51.7,60.3,74.5,48.5,72.4,83.9,50.5
Baseline MLP,77.6,87.2,51.8,62.1,74.5,52.3,71.7,83.5,49.6
Baseline XGB,77.4,87.2,48.4,58.7,74.3,52.7,72.1,83.5,51.3


#### HINT — Winners

Unnamed: 0,phase_I,phase_II,phase_III
PR (AP),Our Model,Our Model,Our Model
F1 (Best),Our Model,Our Model,Our Model
ROC AUC,Our Model,Our Model,Our Model
