In [1]:
# P27 — release builder (v1.1)
from pathlib import Path
import json, os, shutil, hashlib, random
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score, brier_score_loss

from google.colab import drive
drive.mount('/content/drive')
BASE = Path("/content/drive/MyDrive/CognitivaAI")
REL  = BASE/"p26_release"

random_state = 42
np.random.seed(random_state)
random.seed(random_state)

def sha256_of_file(p: Path):
    h = hashlib.sha256()
    with open(p, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""): h.update(chunk)
    return h.hexdigest()

def metrics(y, p):
    return dict(
        AUC=float(roc_auc_score(y,p)),
        PRAUC=float(average_precision_score(y,p)),
        Brier=float(brier_score_loss(y,p)),
    )

print("BASE:", BASE)


Mounted at /content/drive
BASE: /content/drive/MyDrive/CognitivaAI


In [2]:
# Estructura release
dirs = {
  "MODELS": REL/"MODELS",
  "CONFIG": REL/"CONFIG",
  "COLSIG": REL/"CONFIG/column_signatures",
  "DOCS":   REL/"DOCS",
  "QA":     REL/"QA",
  "META":   REL/"META",
  "TMP":    REL/"_tmp"
}
for d in dirs.values(): d.mkdir(parents=True, exist_ok=True)
print("OK → estructura creada en", REL)


OK → estructura creada en /content/drive/MyDrive/CognitivaAI/p26_release


In [3]:
# Comprobaciones mínimas
P24 = BASE/"p24_meta_simple"
P26 = BASE/"p26_intermodal"
CLN = BASE/"clinical"

assert (P24/"p24_model.pkl").exists() and (P24/"p24_platt.pkl").exists(), "Faltan p24_model.pkl o p24_platt.pkl"
assert (P24/"p24_val_preds.csv").exists() and (P24/"p24_test_preds.csv").exists(), "Faltan p24_val/test_preds.csv"
assert (P24/"p24_coefficients.csv").exists(), "Falta p24_coefficients.csv (para firma de columnas imagen)"
assert (P26/"p26_clinical_consolidado.csv").exists(), "Falta p26_clinical_consolidado.csv"
assert (CLN/"p3_clinical_probs.csv").exists(), "Falta p3_clinical_probs.csv (probas clínicas por patient_id)"

# Copiar P24 modelos al release
shutil.copy2(P24/"p24_model.pkl", dirs["MODELS"]/"p24_model.pkl")
shutil.copy2(P24/"p24_platt.pkl", dirs["MODELS"]/"p24_platt.pkl")
print("OK → copiados modelos de P24")


OK → copiados modelos de P24


In [4]:
# Cargamos consolidado clínico y probas clínicas (VAL/TEST)
df_clin = pd.read_csv(P26/"p26_clinical_consolidado.csv")  # 9 features + patient_id
clin_probs = pd.read_csv(CLN/"p3_clinical_probs.csv")      # patient_id, split, y_prob_clin

# Labels (desde P24 preds con y_true)
p24_val = pd.read_csv(P24/"p24_val_preds.csv")  # patient_id, cohort, y_true, y_prob
p24_tst = pd.read_csv(P24/"p24_test_preds.csv")

# Unimos para armar (VAL/TEST) con y_true
val_ids = p24_val[["patient_id","cohort","y_true"]].copy()
tst_ids = p24_tst[["patient_id","cohort","y_true"]].copy()

# Unir clínico con ids
val_clin = val_ids.merge(df_clin, on="patient_id", how="left")
tst_clin = tst_ids.merge(df_clin, on="patient_id", how="left")

# Esquema clínico esperado (9 columnas brutas)
clin_cols = ["Age","Sex","Education","SES","MMSE","eTIV","nWBV","ASF","Delay"]
missing_cols = [c for c in clin_cols if c not in val_clin.columns]
assert not missing_cols, f"Faltan columnas clínicas: {missing_cols}"

# Armar X,y (VAL) para entrenar modelo clínico
Xc_val = val_clin[clin_cols].copy()
yc_val = val_clin["y_true"].astype(int).values

# Pipeline clínico (imputer numérico + OHE Sex + scaler + LR)
num_cols = [c for c in clin_cols if c!="Sex"]
pre = [
    ("impute_num", SimpleImputer(strategy="median")),
    ("scale", StandardScaler(with_mean=True, with_std=True)),
]
# OHE Sex -> convertimos a binaria 'Sex_M' aproximando: M/F (si viniera 0/1 también funciona)
def sex_to_str(s):
    if pd.isna(s): return np.nan
    s = str(s).strip().upper()
    if s in ["M","MALE","H"]: return "M"
    if s in ["F","FEMALE","Mujer".upper()]: return "F"
    return s

for split_df in [Xc_val]:
    split_df["Sex"] = split_df["Sex"].apply(sex_to_str)

ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
# Construimos DataFrame num y cat por separado
X_num = Xc_val[num_cols]
X_sex = pd.DataFrame({"Sex": Xc_val["Sex"]})

num_pipe = Pipeline(pre)
X_num_tr = num_pipe.fit_transform(X_num)
sex_mat  = ohe.fit_transform(X_sex)

# Unimos y entrenamos LR
Xc_val_mat = np.hstack([X_num_tr, sex_mat])
clin_lr = LogisticRegression(max_iter=1000, class_weight=None, solver="lbfgs", random_state=random_state)
clin_lr.fit(Xc_val_mat, yc_val)

# Guardamos componentes del modelo clínico:
import pickle
with open(dirs["MODELS"]/"p26_clin_num_pipe.pkl","wb") as f: pickle.dump(num_pipe, f)
with open(dirs["MODELS"]/"p26_clin_ohe.pkl","wb") as f: pickle.dump(ohe, f)
with open(dirs["MODELS"]/"p26_clin_model.pkl","wb") as f: pickle.dump(clin_lr, f)

print("✅ Modelo clínico entrenado y guardado (VAL).")


✅ Modelo clínico entrenado y guardado (VAL).


In [5]:
# p_img: usar probas calibradas de P24 (y_prob)
val_img = p24_val[["patient_id","cohort","y_true","y_prob"]].rename(columns={"y_prob":"p_img"})
tst_img = p24_tst[["patient_id","cohort","y_true","y_prob"]].rename(columns={"y_prob":"p_img"})

# p_clin: de p3_clinical_probs.csv
val_clp = clin_probs[clin_probs["split"]=="VAL"][["patient_id","y_prob_clin"]].rename(columns={"y_prob_clin":"p_clin"})
tst_clp = clin_probs[clin_probs["split"]=="TEST"][["patient_id","y_prob_clin"]].rename(columns={"y_prob_clin":"p_clin"})

VAL = val_img.merge(val_clp, on="patient_id", how="inner")
TST = tst_img.merge(tst_clp, on="patient_id", how="inner")

print("VAL:", VAL.shape, "| TEST:", TST.shape)
assert VAL["patient_id"].is_unique and TST["patient_id"].is_unique
print(VAL.head(3))


VAL: (69, 5) | TEST: (70, 5)
  patient_id cohort  y_true     p_img    p_clin
0  OAS1_0003   OAS1       1  0.672718  0.422624
1  OAS1_0010   OAS1       0  0.433014  0.431642
2  OAS1_0016   OAS1       1  0.621040  0.451515


In [6]:
# Meta-LR sobre [p_img, p_clin]
X_val = VAL[["p_img","p_clin"]].values
y_val = VAL["y_true"].astype(int).values

meta = LogisticRegression(max_iter=1000, solver="lbfgs", random_state=random_state)
meta.fit(X_val, y_val)

# Predicción cruda
VAL["p_meta_raw"] = meta.predict_proba(X_val)[:,1]

# Evaluación cruda (pre-calibración)
m_val = metrics(y_val, VAL["p_meta_raw"].values)
print("LATE (raw) VAL:", m_val)

# Guardar meta
import pickle
with open(dirs["MODELS"]/"p26_meta_late.pkl","wb") as f: pickle.dump(meta, f)

# Predicción en TEST (cruda)
X_tst = TST[["p_img","p_clin"]].values
y_tst = TST["y_true"].astype(int).values
TST["p_meta_raw"] = meta.predict_proba(X_tst)[:,1]
m_tst = metrics(y_tst, TST["p_meta_raw"].values)
print("LATE (raw) TEST:", m_tst)


LATE (raw) VAL: {'AUC': 0.9040747028862479, 'PRAUC': 0.9251447096482674, 'Brier': 0.20186942272517908}
LATE (raw) TEST: {'AUC': 0.7360197368421053, 'PRAUC': 0.7285510809902204, 'Brier': 0.2286766661338769}


In [7]:
# Platt por cohorte = LR sobre score
def fit_platt(y, s):
    m = LogisticRegression(solver="lbfgs", random_state=random_state)
    m.fit(s.reshape(-1,1), y.astype(int))
    return m

coh_cal = {}
for coh in ["OAS1","OAS2"]:
    mask = VAL["cohort"]==coh
    m = fit_platt(VAL.loc[mask,"y_true"].values, VAL.loc[mask,"p_meta_raw"].values)
    coh_cal[coh] = m

# Guardar calibradores
import pickle
with open(dirs["MODELS"]/"p26b_platt_OAS1.pkl","wb") as f: pickle.dump(coh_cal["OAS1"], f)
with open(dirs["MODELS"]/"p26b_platt_OAS2.pkl","wb") as f: pickle.dump(coh_cal["OAS2"], f)

# Aplicar calibración a VAL y TEST
def platt_pred(m, s): return m.predict_proba(s.reshape(-1,1))[:,1]

VAL["p_cal"] = np.nan
TST["p_cal"] = np.nan
for coh in ["OAS1","OAS2"]:
    vm = VAL["cohort"]==coh
    tm = TST["cohort"]==coh
    VAL.loc[vm,"p_cal"] = platt_pred(coh_cal[coh], VAL.loc[vm,"p_meta_raw"].values)
    TST.loc[tm,"p_cal"] = platt_pred(coh_cal[coh], TST.loc[tm,"p_meta_raw"].values)

print("VAL P26b:", metrics(VAL["y_true"], VAL["p_cal"]))
print("TST P26b:", metrics(TST["y_true"], TST["p_cal"]))


VAL P26b: {'AUC': 0.7614601018675721, 'PRAUC': 0.7293917476096345, 'Brier': 0.23428651719818258}
TST P26b: {'AUC': 0.6842105263157895, 'PRAUC': 0.6595296459970565, 'Brier': 0.24072039406689189}


In [8]:
def best_cost_thr(y, p, C_FN=5.0, C_FP=1.0, grid=1001):
    thrs = np.linspace(0,1,grid)
    best = None
    for t in thrs:
        ypred = (p>=t).astype(int)
        TP = ((y==1)&(ypred==1)).sum()
        FP = ((y==0)&(ypred==1)).sum()
        FN = ((y==1)&(ypred==0)).sum()
        cost = C_FN*FN + C_FP*FP
        if (best is None) or (cost < best["Cost"]-1e-9):
            best = dict(Thr=float(t), Cost=float(cost), TP=int(TP), FP=int(FP), FN=int(FN))
    return best

thr = {}
rows=[]
for coh in ["OAS1","OAS2"]:
    m = VAL["cohort"]==coh
    b = best_cost_thr(VAL.loc[m,"y_true"].values, VAL.loc[m,"p_cal"].values, 5.0, 1.0)
    thr[coh] = b["Thr"]
    rows.append({"Cohort":coh, **b})

thr_json = {"policy":"single","cost_policy":"FN:FP=5:1","thresholds":thr}
with open(dirs["CONFIG"]/"deployment_config.json","w") as f:
    json.dump(thr_json, f, indent=2)

pd.DataFrame(rows).to_csv(dirs["TMP"]/"val_thresholds_cost5to1.csv", index=False)
print("Umbrales (VAL):", thr_json)


Umbrales (VAL): {'policy': 'single', 'cost_policy': 'FN:FP=5:1', 'thresholds': {'OAS1': 0.42, 'OAS2': 0.49}}


In [9]:
def confusion_report(df, thr):
    y = df["y_true"].values.astype(int)
    p = df["p_cal"].values.astype(float)
    ypred = (p>=thr).astype(int)
    TP = int(((y==1)&(ypred==1)).sum())
    FP = int(((y==0)&(ypred==1)).sum())
    TN = int(((y==0)&(ypred==0)).sum())
    FN = int(((y==1)&(ypred==0)).sum())
    prec = TP/(TP+FP) if (TP+FP)>0 else np.nan
    rec  = TP/(TP+FN) if (TP+FN)>0 else np.nan
    acc  = (TP+TN)/len(y)
    cost = 5*FN + 1*FP
    return dict(TP=TP,FP=FP,TN=TN,FN=FN,Precision=prec,Recall=rec,Acc=acc,Cost=cost)

rows=[]
for coh in ["OAS1","OAS2"]:
    d = TST[TST["cohort"]==coh]
    rep = confusion_report(d, thr[coh])
    rows.append(dict(Cohort=coh, Thr=thr[coh], **rep))

rep_df = pd.DataFrame(rows)
out_rep = REL/"QA/p26b_test_report_cost_5to1.csv"
rep_df.to_csv(out_rep, index=False)
print(rep_df)
print("Guardado:", out_rep)

# Guardar preds calibradas para golden set
VAL[["patient_id","cohort","y_true","p_cal"]].to_csv(REL/"QA/p26b_val_preds_calibrated.csv", index=False)
TST[["patient_id","cohort","y_true","p_cal"]].to_csv(REL/"QA/p26b_test_preds_calibrated.csv", index=False)


  Cohort   Thr  TP  FP  TN  FN  Precision  Recall       Acc  Cost
0   OAS1  0.42  14   9  18   6   0.608696     0.7  0.680851    39
1   OAS2  0.49  12  11   0   0   0.521739     1.0  0.521739    11
Guardado: /content/drive/MyDrive/CognitivaAI/p26_release/QA/p26b_test_report_cost_5to1.csv


In [10]:
# Clinical signature (nombres y tipos esperados)
clinical_signature = {
  "fields": [
    {"name":"Age","dtype":"float"},
    {"name":"Sex","dtype":"str","allowed":["M","F"]},
    {"name":"Education","dtype":"float"},
    {"name":"SES","dtype":"float"},
    {"name":"MMSE","dtype":"float"},
    {"name":"eTIV","dtype":"float"},
    {"name":"nWBV","dtype":"float"},
    {"name":"ASF","dtype":"float"},
    {"name":"Delay","dtype":"float"}
  ],
  "notes":"Sex se normaliza a {'M','F'} internamente; numéricos con imputación mediana + StandardScaler."
}
with open(dirs["COLSIG"]/"clinical_signature.json","w") as f:
    json.dump(clinical_signature, f, indent=2)

# Image signature: leemos p24_coefficients.csv (índice = features)
coef = pd.read_csv(P24/"p24_coefficients.csv")
feat_col = "Feature" if "Feature" in coef.columns else coef.columns[0]
img_features = coef[feat_col].tolist()
image_signature = {
  "features": img_features,
  "notes":"Orden/nombres deben coincidir con entrenamiento P24. Si se provee p_img directamente, esta firma puede no ser usada."
}
with open(dirs["COLSIG"]/"image_signature.json","w") as f:
    json.dump(image_signature, f, indent=2)

print("OK → firmas creadas.")


OK → firmas creadas.


In [11]:
model_card = f"""# MODEL CARD — CognitivaAI v1.1 (P26b single)

**Arquitectura:**
- Imagen (P24: modelo + Platt) → `p_img`
- Clínico (LR con imputación+scaler) → `p_clin`
- Fusión Late (meta-LR sobre `p_img`+`p_clin`)
- Calibración Platt **por cohorte** (P26b) → `proba_cal`
- Decisión por coste (FN:FP=5:1) con umbrales aprendidos en VAL:
  - OAS1 = {thr['OAS1']:.3f}
  - OAS2 = {thr['OAS2']:.3f}

**Métricas (TEST, probs calibradas):**
- ALL: AUC≈{roc_auc_score(TST['y_true'], TST['p_cal']):.3f} · PR-AUC≈{average_precision_score(TST['y_true'], TST['p_cal']):.3f} · Brier≈{brier_score_loss(TST['y_true'], TST['p_cal']):.3f}
- OAS1 / OAS2: ver `QA/p26b_test_report_cost_5to1.csv`.

**Suposiciones de entrada:**
- Clínico: 9 campos (ver `clinical_signature.json`).
- Imagen: usar pipeline P24 **o** aportar `p_img` directo.
- Cohorte: 'OAS1'/'OAS2' (para calibrador/umbral). Si se desconoce, usar 'OAS1' por defecto y monitorizar calibración.

**Riesgos & mitigaciones:**
- Descalibración en dominios nuevos → recalibrar Platt por sitio con ≥50–100 casos y actualizar umbral 5:1.
- Tamaño muestral reducido → reportar ICs y monitorizar ECE/MCE.

**Archivos clave:** ver `META/MANIFEST.json`.
"""
howto = """# HOW TO DEPLOY — CognitivaAI v1.1

## 1) Entorno
- Python 3.10+
- Instala dependencias de `META/ENVIRONMENT.txt` (bloqueado sklearn).

## 2) Inferencia por lote (CSV)
- Prepara CSV con columnas clínicas (ver `clinical_signature.json`) y/o `p_img`.
- Ejecuta `scripts/predict_batch.py` (opcional: crearlo) para leer CSV y generar `proba_cal` y `decision`.

## 3) API (opcional)
- Montar un endpoint `/predict` que:
  - Valide `clinical` y/o calcule `p_img` con P24.
  - Aplique fusión Late + Platt por cohorte.
  - Devuelva `proba_cal`, `decision` y `threshold_used`.

## 4) Monitorización
- Guardar `proba_cal` y decisión por cohorte.
- Semanal: ECE/MCE por cohorte; recall/precision cuando lleguen etiquetas.
- Alarmas: ECE>0.20 (OAS1) / >0.25 (OAS2); recall OAS2<0.65.
"""
(dirs["DOCS"]/"MODEL_CARD.md").write_text(model_card, encoding="utf-8")
(dirs["DOCS"]/"HOW_TO_DEPLOY.md").write_text(howto, encoding="utf-8")
print("OK → DOCS creados.")


OK → DOCS creados.


In [12]:
# Golden set: usamos TEST calibrado (subset estable)
gold = TST.sample(n=min(40, len(TST)), random_state=random_state)[["patient_id","cohort","y_true","p_img","p_clin","p_cal"]]
gold.to_csv(dirs["QA"]/"golden_set.csv", index=False)

# Checksums de modelos
chk = {}
for p in (dirs["MODELS"]).glob("*.pkl"):
    chk[p.name] = sha256_of_file(p)
(pd.Series(chk, name="sha256")
   .to_frame().to_csv(dirs["QA"]/"qa_checksums.csv"))

print("OK → golden_set y checksums listos.")


OK → golden_set y checksums listos.


In [13]:
manifest = {
  "version":"v1.1",
  "files":[]
}
for root, _, files in os.walk(REL):
    for fn in files:
        p = Path(root)/fn
        relp = p.relative_to(REL).as_posix()
        if "/_tmp/" in relp:
            continue
        manifest["files"].append({
            "path": relp,
            "sha256": sha256_of_file(p),
            "bytes": p.stat().st_size
        })
with open(dirs["META"]/"MANIFEST.json","w") as f:
    json.dump(manifest, f, indent=2)
print("OK → MANIFEST.json generado con", len(manifest["files"]), "ficheros.")


OK → MANIFEST.json generado con 19 ficheros.


In [14]:
# Guardamos pip freeze (para reproducibilidad)
import subprocess, sys
out = subprocess.check_output([sys.executable, "-m", "pip", "freeze"]).decode()
# Recomendación: fijar scikit-learn==1.7.1 si coincide con tus pickles
lines = out.strip().splitlines()
(dirs["META"]/"ENVIRONMENT.txt").write_text("\n".join(lines), encoding="utf-8")
print("OK → ENVIRONMENT.txt escrito.")


OK → ENVIRONMENT.txt escrito.


In [15]:
zip_path = REL.with_suffix(".zip")
if zip_path.exists(): zip_path.unlink()
shutil.make_archive(str(REL), "zip", root_dir=REL)
print("🎁 Release ZIP listo:", zip_path)


🎁 Release ZIP listo: /content/drive/MyDrive/CognitivaAI/p26_release.zip


In [19]:
# === S2.1: OAS2 con recall objetivo en VAL; OAS1 se mantiene 5:1 ===
from pathlib import Path
import json, numpy as np, pandas as pd

BASE = Path("/content/drive/MyDrive/CognitivaAI")
REL  = BASE/"p26_release"

val = pd.read_csv(REL/"QA/p26b_val_preds_calibrated.csv")   # cols: patient_id, cohort, y_true, p_cal
tst = pd.read_csv(REL/"QA/p26b_test_preds_calibrated.csv")

def thr_for_recall(y, p, target=0.85):
    """Devuelve el mayor umbral t tal que recall(t) >= target (conservador en FP)."""
    y = np.asarray(y).astype(int)
    p = np.asarray(p).astype(float)
    # Candidatos = valores únicos de p (y extremos 0,1)
    cand = np.unique(np.concatenate([p, [0.0, 1.0]]))
    best_t = 0.0
    found = False
    # Probamos de 1.0 -> 0.0 para quedarnos con el UMBRAL MÁS ALTO que cumpla recall>=target
    for t in sorted(cand, reverse=True):
        yhat = (p >= t).astype(int)
        TP = ((y==1)&(yhat==1)).sum()
        FN = ((y==1)&(yhat==0)).sum()
        rec = TP / (TP + FN) if (TP+FN)>0 else 0.0
        if rec >= target:
            best_t = float(t)
            found = True
            break
    # Si jamás alcanzamos el target, caemos a t=0.0 (todo positivo)
    return best_t, found

def confusion_at(df, t):
    y = df["y_true"].to_numpy(int)
    p = df["p_cal"].to_numpy(float)
    yhat = (p >= t).astype(int)
    TP = int(((y==1)&(yhat==1)).sum())
    FP = int(((y==0)&(yhat==1)).sum())
    TN = int(((y==0)&(yhat==0)).sum())
    FN = int(((y==1)&(yhat==0)).sum())
    prec = TP/(TP+FP) if (TP+FP)>0 else np.nan
    rec  = TP/(TP+FN) if (TP+FN)>0 else np.nan
    acc  = (TP+TN)/len(y) if len(y)>0 else np.nan
    cost = 5*FN + 1*FP
    return dict(TP=TP,FP=FP,TN=TN,FN=FN,Precision=prec,Recall=rec,Acc=acc,Cost=cost)

# 1) Leer deployment_config actual (tiene 5:1)
cfg_path = REL/"CONFIG/deployment_config.json"
cfg = json.loads(cfg_path.read_text())

# Guardamos copia de seguridad
(REL/"CONFIG/deployment_config.backup.json").write_text(json.dumps(cfg, indent=2))

# 2) Calcular umbral OAS2 @ recall objetivo en VAL
target_recall = 0.85
val_oas2 = val[val["cohort"]=="OAS2"].copy()
t_rec, ok = thr_for_recall(val_oas2["y_true"], val_oas2["p_cal"], target=target_recall)

# 3) Mantener OAS1 desde 5:1 del config actual (no tocamos)
thr_oas1_5to1 = float(cfg["thresholds"]["OAS1"])
thr_oas2_5to1 = float(cfg["thresholds"]["OAS2"])

# 4) Actualizamos config principal para usar S2 por defecto
cfg["policy"] = "single"
cfg["cost_policy"] = "FN:FP=5:1 (OAS1) + recall_target (OAS2)"
cfg["thresholds"] = {
    "OAS1": thr_oas1_5to1,
    "OAS2": float(t_rec)
}
# y preservamos umbrales 5:1 como alternativa explícita
cfg["thresholds_5to1"] = {"OAS1": thr_oas1_5to1, "OAS2": thr_oas2_5to1}
cfg["thresholds_recall_target"] = {"OAS2": {"target": target_recall, "thr_val": float(t_rec), "found": bool(ok)}}

cfg_path.write_text(json.dumps(cfg, indent=2))
print("✅ Config actualizada con S2:",
      json.dumps(cfg["thresholds"], indent=2),
      "\n(backup en CONFIG/deployment_config.backup.json)")

# 5) Report en TEST con los nuevos umbrales
rows = []
for coh, thr in [("OAS1", thr_oas1_5to1), ("OAS2", float(t_rec))]:
    d = tst[tst["cohort"]==coh].copy()
    rep = confusion_at(d, thr)
    rows.append(dict(Cohort=coh, Thr=thr, **rep))
rep_df = pd.DataFrame(rows)
out = REL/"QA/p26b_test_report_recall_target.csv"
rep_df.to_csv(out, index=False)
print("\nTEST @S2")
print(rep_df)
print("\n💾 Guardado:", out)


✅ Config actualizada con S2: {
  "OAS1": 0.42,
  "OAS2": 0.4928655287824083
} 
(backup en CONFIG/deployment_config.backup.json)

TEST @S2
  Cohort       Thr  TP  FP  TN  FN  Precision    Recall       Acc  Cost
0   OAS1  0.420000  14   9  18   6   0.608696  0.700000  0.680851    39
1   OAS2  0.492866  11   6   5   1   0.647059  0.916667  0.695652    11

💾 Guardado: /content/drive/MyDrive/CognitivaAI/p26_release/QA/p26b_test_report_recall_target.csv


In [18]:
# === S2.2: revertir a 5:1 en ambos cohortes (opcional) ===
from pathlib import Path
import json
REL = Path("/content/drive/MyDrive/CognitivaAI/p26_release")
cfg_path = REL/"CONFIG/deployment_config.json"
cfg = json.loads(cfg_path.read_text())

# Recuperamos los de 5:1 (guardados en thresholds_5to1)
t_5to1 = cfg.get("thresholds_5to1", cfg["thresholds"])  # por si no existe
cfg["policy"] = "single"
cfg["cost_policy"] = "FN:FP=5:1"
cfg["thresholds"] = {"OAS1": float(t_5to1["OAS1"]), "OAS2": float(t_5to1["OAS2"])}

cfg_path.write_text(json.dumps(cfg, indent=2))
print("↩️  Revertido a 5:1 puro:", cfg["thresholds"])


↩️  Revertido a 5:1 puro: {'OAS1': 0.42, 'OAS2': 0.49}


In [20]:
from pathlib import Path
import json, pandas as pd, numpy as np

REL = Path("/content/drive/MyDrive/CognitivaAI/p26_release")
cfg = json.loads((REL/"CONFIG/deployment_config.json").read_text())
thr = cfg["thresholds"]

tst = pd.read_csv(REL/"QA/p26b_test_preds_calibrated.csv")  # patient_id, cohort, y_true, p_cal
def report_for(df, t):
    y, p = df["y_true"].to_numpy(int), df["p_cal"].to_numpy(float)
    yhat = (p >= t).astype(int)
    TP = int(((y==1)&(yhat==1)).sum()); FP = int(((y==0)&(yhat==1)).sum())
    TN = int(((y==0)&(yhat==0)).sum()); FN = int(((y==1)&(yhat==0)).sum())
    prec = TP/(TP+FP) if (TP+FP)>0 else np.nan
    rec  = TP/(TP+FN) if (TP+FN)>0 else np.nan
    acc  = (TP+TN)/len(y)
    cost = 5*FN + 1*FP
    return dict(TP=TP,FP=FP,TN=TN,FN=FN,Precision=prec,Recall=rec,Acc=acc,Cost=cost)

rows=[]
for coh in ["OAS1","OAS2"]:
    d = tst[tst["cohort"]==coh].copy()
    rows.append(dict(Cohort=coh, Thr=thr[coh], **report_for(d, thr[coh])))
print(pd.DataFrame(rows))


  Cohort       Thr  TP  FP  TN  FN  Precision    Recall       Acc  Cost
0   OAS1  0.420000  14   9  18   6   0.608696  0.700000  0.680851    39
1   OAS2  0.492866  11   6   5   1   0.647059  0.916667  0.695652    11


In [21]:
# === P27 helper ===
# Actualiza documentación con la "Política activa (S2)", regenera MANIFEST y el ZIP del release.
from pathlib import Path
import json, os, hashlib, shutil, re, subprocess, sys
from datetime import datetime

# 0) Paths
REL = Path("/content/drive/MyDrive/CognitivaAI/p26_release")
DOCS = REL/"DOCS"
CFG  = REL/"CONFIG"/"deployment_config.json"
MANI = REL/"META"/"MANIFEST.json"
ENV  = REL/"META"/"ENVIRONMENT.txt"

assert REL.exists(), f"No existe release: {REL}"
assert CFG.exists(), f"No existe config: {CFG}"
DOCS.mkdir(parents=True, exist_ok=True)

# 1) Cargar política/umbrales desde config
cfg = json.loads(CFG.read_text())
thr = cfg["thresholds"]
policy_str = cfg.get("cost_policy", "FN:FP=5:1 (OAS1) + recall_target (OAS2)")
t_oas1 = float(thr["OAS1"])
t_oas2 = float(thr["OAS2"])
recall_target = cfg.get("thresholds_recall_target", {}).get("OAS2", {}).get("target", 0.85)

# 2) Utilidades
def sha256_of_file(p: Path):
    h = hashlib.sha256()
    with open(p, "rb") as f:
        for ch in iter(lambda: f.read(8192), b""): h.update(ch)
    return h.hexdigest()

def upsert_section(md_path: Path, title: str, body_md: str, level: int = 2):
    """
    Inserta o reemplaza una sección Markdown completa:
    - Busca '## title' (o nivel dado).
    - Reemplaza hasta el siguiente encabezado del mismo nivel o superior.
    - Si no existe, la añade al final con un separador.
    """
    if md_path.exists():
        text = md_path.read_text(encoding="utf-8")
    else:
        text = ""
    h = "#"*level
    pattern = rf"^{h}\s+{re.escape(title)}\s*$"
    lines = text.splitlines()
    start = None
    for i, line in enumerate(lines):
        if re.match(pattern, line.strip(), flags=re.IGNORECASE):
            start = i
            break
    new_section = f"{h} {title}\n\n{body_md.strip()}\n"
    if start is None:
        # append
        if text.strip():
            text = text.rstrip()+"\n\n"+new_section
        else:
            text = new_section
    else:
        # find end (next heading of same or higher level)
        end = len(lines)
        for j in range(start+1, len(lines)):
            if re.match(r"^#{1,%d}\s+" % level, lines[j]):
                end = j
                break
        text = "\n".join(lines[:start] + [new_section.rstrip()] + lines[end:])
    md_path.write_text(text, encoding="utf-8")

# 3) Construir bloque de política activa (S2)
policy_block = f"""**Política activa (S2)** — *single pipeline*
- **OAS1:** decisión por coste **FN:FP=5:1**, umbral **thr = {t_oas1:.6f}**
- **OAS2:** **umbral por objetivo de recall** (VAL), target = **{recall_target:.2f}**, umbral **thr = {t_oas2:.6f}**
- Alternativas disponibles:
  - **5:1 puro**: ver `thresholds_5to1` en `CONFIG/deployment_config.json`.
- Nota operativa:
  - Monitorizar **ECE/MCE** y **positivity rate** por cohorte; recalibrar y/o ajustar umbral si deriva el dominio.
"""

# 4) Marcar en MODEL_CARD.md y HOW_TO_DEPLOY.md
model_card = DOCS/"MODEL_CARD.md"
howto      = DOCS/"HOW_TO_DEPLOY.md"
upsert_section(model_card, "Política activa (S2)", policy_block, level=2)
upsert_section(howto, "Política activa (S2)", policy_block, level=2)

# 5) Regenerar MANIFEST (hash/bytes de todos los ficheros salvo _tmp)
manifest = {"version":"v1.1", "generated_at": datetime.utcnow().isoformat()+"Z", "files":[]}
for root, _, files in os.walk(REL):
    for fn in files:
        p = Path(root)/fn
        relp = p.relative_to(REL).as_posix()
        if "/_tmp/" in relp:
            continue
        manifest["files"].append({
            "path": relp,
            "sha256": sha256_of_file(p),
            "bytes": p.stat().st_size
        })
MANI.write_text(json.dumps(manifest, indent=2), encoding="utf-8")

# 6) (Opcional) refrescar ENVIRONMENT.txt si quieres congelar de nuevo
try:
    out = subprocess.check_output([sys.executable, "-m", "pip", "freeze"]).decode()
    ENV.write_text(out, encoding="utf-8")
except Exception as e:
    print("Aviso: no se pudo refrescar ENVIRONMENT.txt:", e)

# 7) Regenerar ZIP del release
zip_path = REL.with_suffix(".zip")
if zip_path.exists():
    zip_path.unlink()
shutil.make_archive(str(REL), "zip", root_dir=REL)

print("✅ Política S2 marcada en DOCS.")
print("📄 MODEL_CARD.md y HOW_TO_DEPLOY.md actualizados.")
print("🧾 MANIFEST.json regenerado con", len(manifest["files"]), "ficheros.")
print("🎁 ZIP listo:", zip_path)
print("→ Umbrales activos:", json.dumps(cfg['thresholds'], indent=2))


  manifest = {"version":"v1.1", "generated_at": datetime.utcnow().isoformat()+"Z", "files":[]}


✅ Política S2 marcada en DOCS.
📄 MODEL_CARD.md y HOW_TO_DEPLOY.md actualizados.
🧾 MANIFEST.json regenerado con 23 ficheros.
🎁 ZIP listo: /content/drive/MyDrive/CognitivaAI/p26_release.zip
→ Umbrales activos: {
  "OAS1": 0.42,
  "OAS2": 0.4928655287824083
}


In [1]:
# --- RESET & RESUME (Colab) ---
from pathlib import Path
import pandas as pd, numpy as np
import matplotlib.pyplot as plt

# 1) Montar Drive (fuerza remount por si quedó colgado)
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
except Exception as e:
    print("Drive ya montado o no estás en Colab:", e)

BASE = Path("/content/drive/MyDrive/CognitivaAI")
P25  = BASE/"p25_informe_final"
P26R = BASE/"p26_release"
OUT  = BASE/"p27_final"
OUT.mkdir(parents=True, exist_ok=True)

# 2) Resolver master table de forma robusta
mt_path = P25/"p25_master_table.csv"
if not mt_path.exists():
    hits = list(BASE.rglob("p25_master_table.csv"))
    if not hits:
        raise FileNotFoundError("No se encontró p25_master_table.csv en CognitivaAI/*")
    mt_path = hits[0]
print("✅ Master table:", mt_path)

# 3) Cargar y filtrar TEST
mt = pd.read_csv(mt_path)
keep = ["Pipeline","Split","Cohort","Method","AUC","PRAUC","Brier"]
mt_clean = mt[[c for c in keep if c in mt.columns]].copy()
if "Split" in mt_clean:
    mt_clean["Split"] = mt_clean["Split"].astype(str).str.upper()
    mt_clean = mt_clean[mt_clean["Split"]=="TEST"]

# 4) Figuras AUC/PR-AUC/Brier
def plot_bar(metric, cohort, fname):
    df = mt_clean[mt_clean["Cohort"]==cohort].copy()
    if df.empty or metric not in df: return
    df["_rank"] = df.groupby("Pipeline")[metric].transform(lambda s: s.rank(ascending=False, method="first"))
    top = df[df["_rank"]==1].sort_values(["Pipeline"]).copy()
    if top.empty: return
    plt.figure(figsize=(7,4))
    plt.bar(top["Pipeline"], top[metric])
    plt.title(f"{metric} — {cohort} (TEST)")
    plt.ylabel(metric)
    plt.ylim(0, 1 if metric!="Brier" else max(0.3, float(top[metric].max())*1.1))
    plt.grid(alpha=0.3, axis="y")
    plt.tight_layout()
    plt.savefig(OUT/fname, dpi=160)
    plt.close()

for coh in ["ALL","OAS1","OAS2"]:
    plot_bar("AUC",   coh, f"p27_auc_{coh}.png")
    plot_bar("PRAUC", coh, f"p27_prauc_{coh}.png")
    plot_bar("Brier", coh, f"p27_brier_{coh}.png")

print("🎨 Figuras guardadas en:", OUT)

# 5) Tabla de decisión S2 (si está el QA del release)
qa_candidates = list(P26R.rglob("p26b_test_report_recall_target.csv"))
if qa_candidates:
    qa = pd.read_csv(qa_candidates[0])[["Cohort","Thr","TP","FP","TN","FN","Precision","Recall","Acc","Cost"]]
    qa.to_csv(OUT/"p27_decision_S2_table.csv", index=False)
    print("✅ Tabla decisión S2:", qa_candidates[0])
else:
    print("ℹ️ No se encontró QA S2; omito tabla de decisión.")



Mounted at /content/drive
✅ Master table: /content/drive/MyDrive/CognitivaAI/p25_informe_final/p25_master_table.csv
🎨 Figuras guardadas en: /content/drive/MyDrive/CognitivaAI/p27_final
✅ Tabla decisión S2: /content/drive/MyDrive/CognitivaAI/p26_release/QA/p26b_test_report_recall_target.csv


In [2]:
# === Write MODEL_CARD.md (P27) ===
from pathlib import Path
from datetime import datetime, timezone

BASE = Path("/content/drive/MyDrive/CognitivaAI")
REL  = BASE/"p26_release"
REL.mkdir(parents=True, exist_ok=True)

mc_path = REL/"MODEL_CARD.md"
backup  = mc_path.with_suffix(".backup.md")

# 1) Backup si existe
if mc_path.exists():
    backup.write_text(mc_path.read_text(encoding="utf-8"), encoding="utf-8")

# 2) Contenido del Model Card (P27)
mc = """# Model Card — CognitivaAI Intermodal (P26/P27)

**Versión:** P27 (intermodal LATE + calibración por cohorte + política S2)
**Tarea:** Predicción binaria (0=Control, 1=Dementia/Converted) a nivel **paciente**.
**Entradas:**
- **Imagen → `p_img`** (probabilidad calibrada con Platt a partir de features por paciente; base P24).
- **Clínico → `p_clin`** (LR sobre variables tabulares estandarizadas).
**Fusión:** **LATE** (combinación sobre probabilidades calibradas).
**Cohortes:** OASIS-1 (cross-sectional) y OASIS-2 (longitudinal, 1 visita/paciente).

---

## 1) Uso previsto
Sistema de **cribado** para apoyar la decisión clínica en evaluación cognitiva, con especial énfasis en **sensibilidad** (minimizar FN) y **calibración** de probabilidades. No sustituye el juicio clínico; requiere validación local.

---

## 2) Datos y entrenamiento (resumen)
- **Imagen**: 20 *slices* axiales/volumen, normalización z-score (+CLAHE opc.), agregación por paciente; meta-modelo P24 (LR elastic-net + Platt).
- **Clínico**: columnas mínimas `Age, Sex, Education, SES, MMSE, eTIV, nWBV, ASF, Delay, patient_id` (imputación mediana/one-hot básico).
- **P26**: intermodal **LATE**; **P26b**: recalibración **Platt por cohorte**.
- **P27**: empaquetado reproducible, política de decisión **S2** y QA final.

---

## 3) Métricas de probabilidad (TEST)
**Intermodal LATE (P26):**
- **ALL:** AUC **0.736**, PR-AUC **0.729**, Brier **0.229**
- **OAS1:** AUC **0.754**, PR-AUC **0.736**, Brier **0.208**
- **OAS2:** AUC **0.652**, PR-AUC **0.728**, Brier **0.288**

> Fuente: `p25_informe_final/p25_master_table.csv` (filas P26) y figuras en `p27_final/`.

---

## 4) Política de decisión **S2** (activa)
**Objetivo:** maximizar sensibilidad sin colapsar en “todo positivo” en dominios tipo OAS2.

- **OAS1 → 5:1 (FN:FP)** con umbral aprendido en VAL → **thr = 0.42**
- **OAS2 → “recall objetivo” (VAL, target≈0.85; aplicado en TEST)** → **thr ≈ 0.4928655**

**Resultados TEST @S2 (confusiones y métricas):**
- **OAS1 (0.42):** TP=14, FP=9, TN=18, FN=6 → **Recall=0.700**, Precision=0.609, Acc=0.681, **Coste=39**
- **OAS2 (≈0.4929):** TP=11, FP=6, TN=5, FN=1 → **Recall=0.917**, Precision=0.647, Acc=0.696, **Coste=11**

**Dónde cambiar:** `p26_release/CONFIG/deployment_config.json`
- `thresholds = {"OAS1": 0.42, "OAS2": 0.4928655287824083}`
- `thresholds_5to1 = {"OAS1": 0.42, "OAS2": 0.49}` *(fallback 5:1 puro)*

---

## 5) Calibración y monitorización
- **ECE/MCE (TEST intermodal, P26):** ALL≈0.178 / OAS1≈0.150 / **OAS2≈0.313** → monitorizar y **recalibrar** por cohorte si **ECE>0.20** o hay drift (sitio/escáner/población).
- Recomendar telemetría de **TP/FP/TN/FN**, **tasa de positivos** y **ECE** por cohorte. Recalibración con ventana móvil (≥50–100 casos).

---

## 6) Limitaciones
- Tamaños por cohorte moderados → **IC amplios**.
- **Shift** entre OAS1/OAS2 → aplicar **umbrales por cohorte** y validación local antes de uso asistencial.
- El modelo **no** es un diagnóstico automático; es soporte a la decisión.

---

## 7) Cómo ejecutar (resumen)
1. **Imagen → `p_img`:** `compute_pimg_from_features.py` sobre las matrices de features por paciente (catálogo P11 + OAS2 p14).
2. **Clínico → `p_clin`:** CSV con columnas mínimas (arriba).
3. **Inferencia E2E:** `predict_end_to_end.py` → combina `p_img + p_clin` (LATE), **calibra por cohorte** (P26b) y **aplica S2**.
4. Salidas: CSV de probabilidades/calibradas, decisión (0/1), y **QA** con confusiones/Coste.

---

## 8) Versionado y reproducibilidad
- **Release:** `p26_release/` (zip con 23 ficheros).
- **Modelos:** `p24_model.pkl`, `p24_platt.pkl`, `p26_clinical_model.pkl`.
- **Config:** `CONFIG/deployment_config.json` (+ backups).
- **QA:** `QA/p26b_test_report_recall_target.csv`.
- **Trazas:** `MANIFEST.json`, `ENVIRONMENT.txt`.

---

## 9) Figuras (collage rápido)
<table>
<tr>
<td width="50%">
<b>AUC — ALL (TEST)</b><br>
<img src="../p27_final/p27_auc_ALL.png" alt="AUC ALL" />
</td>
<td width="50%">
<b>OAS2 · S2 vs 5:1 (TEST)</b><br>
<img src="../p27_final/p27_s2_vs_5to1_OAS2.png" alt="S2 vs 5:1 OAS2" />
</td>
</tr>
</table>

> Más figuras en `p27_final/`:
> - `p27_auc_*.png`, `p27_prauc_*.png`, `p27_brier_*.png`
> - (si existe) `p27_s2_vs_5to1_OAS2.png`

---
"""

# 3) Escribir nuevo contenido
mc_path.write_text(mc, encoding="utf-8")
print("✅ MODEL_CARD.md escrito en:", mc_path)
print("🕒", datetime.now(timezone.utc).isoformat())


✅ MODEL_CARD.md escrito en: /content/drive/MyDrive/CognitivaAI/p26_release/MODEL_CARD.md
🕒 2025-09-08T21:40:46.211414+00:00


In [3]:
# === Opcional: escribir HOW_TO_DEPLOY.md + README_RELEASE.md y reempaquetar ===
from pathlib import Path
from datetime import datetime, timezone
import hashlib, json, shutil

BASE = Path("/content/drive/MyDrive/CognitivaAI")
REL  = BASE/"p26_release"
REL.mkdir(parents=True, exist_ok=True)

howto = REL/"HOW_TO_DEPLOY.md"
readme_rel = REL/"README_RELEASE.md"

HOWTO_MD = r"""# HOW_TO_DEPLOY — CognitivaAI Intermodal (P26/P27)
[... pega aquí el bloque HOW_TO_DEPLOY.md de arriba si quieres editarlo a mano ...]
"""
README_REL_MD = r"""# README — Paquete de Release (P26/P27)
[... pega aquí el bloque README_RELEASE.md de arriba si quieres editarlo a mano ...]
"""

# 1) Escribir archivos
howto.write_text(HOWTO_MD.strip()+"\n", encoding="utf-8")
readme_rel.write_text(README_REL_MD.strip()+"\n", encoding="utf-8")
print("✅ HOW_TO_DEPLOY.md y README_RELEASE.md escritos.")

# 2) Regenerar MANIFEST.json
def sha256(p: Path) -> str:
    h = hashlib.sha256()
    with open(p, "rb") as f:
        for chunk in iter(lambda: f.read(1024*1024), b""):
            h.update(chunk)
    return h.hexdigest()

files = []
for p in REL.rglob("*"):
    if p.is_file() and p.name != "p26_release.zip":
        files.append({"path": str(p.relative_to(REL)), "sha256": sha256(p), "size": p.stat().st_size})

manifest = {
    "version": "v1.2",
    "generated_at": datetime.now(timezone.utc).isoformat(),
    "files": files
}
(REL/"MANIFEST.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
print("🧾 MANIFEST.json regenerado.")

# 3) Reempaquetar ZIP
zip_path = BASE/"p26_release.zip"
if zip_path.exists():
    zip_path.unlink()
shutil.make_archive(str(zip_path.with_suffix("")), "zip", REL)
print("🎁 ZIP listo:", zip_path)


✅ HOW_TO_DEPLOY.md y README_RELEASE.md escritos.
🧾 MANIFEST.json regenerado.
🎁 ZIP listo: /content/drive/MyDrive/CognitivaAI/p26_release.zip
