In [4]:
# inferencia_local_openvino.py
#  INFERENCIA (4 VARIANTES DE PROMPT) ‚Äî MULTI-MODELO CL√çNICO (LOCAL, OpenVINO)
# Modelos ejemplo:
#   - microsoft/BioGPT
#   - microsoft/BioGPT-Large
# Backend: OpenVINO (Intel GPU si est√°, fallback CPU). Guarda CSVs localmente.

import os, re, json, random, warnings, sys
from pathlib import Path
import argparse
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
warnings.filterwarnings("ignore")

#  Dependencias de runtime 
# Recomendado (Windows, Py3.12):
#   python -m pip install "numpy<2.1" "transformers==4.46.2" "optimum-intel[openvino]==1.26.0" "openvino>=2025.1.0" "accelerate>=0.34" "huggingface_hub>=0.24"
from transformers import AutoTokenizer
from optimum.intel.openvino import OVModelForCausalLM

from huggingface_hub import HfApi
try:
    from huggingface_hub.utils import HfHubHTTPError
except Exception:
    try:
        from huggingface_hub.utils._errors import HfHubHTTPError
    except Exception:
        HfHubHTTPError = Exception

#  Config CLI (compatible con Jupyter/VSCode) 
def parse_args(argv=None):
    ap = argparse.ArgumentParser(add_help=True)
    ap.add_argument("--notes", default=r"C:\Users\hered\Desktop\TFM\TFM\TFM2\sample_notes_imc.csv", help="CSV con las 20 notas")
    ap.add_argument("--gt",    default=r"C:\Users\hered\Desktop\TFM\TFM\TFM2\gt_imc_final.csv",     help="CSV con GT")
    ap.add_argument("--outdir",default=r"C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs",              help="Directorio de salida")
    ap.add_argument("--n", type=int, default=20, help="N√∫mero de notas a usar")
    ap.add_argument("--device_pref", default="GPU", choices=["GPU","CPU"], help="Preferencia de dispositivo OpenVINO")
    ap.add_argument("--models", nargs="*", default=[
        "microsoft/BioGPT",
        "microsoft/BioGPT-Large",
    ], help="Lista de modelos HF (IDs v√°lidos)")
    ap.add_argument("--hf_token", default=os.getenv("HUGGINGFACE_HUB_TOKEN", None), help="Token HF (si el repo es privado/gated).")
    ap.add_argument("--seed", type=int, default=7)
    ap.add_argument("--limit_windows", type=int, default=6)
    ap.add_argument("--attempts_per_window", type=int, default=3)

    if argv is None:
        argv = [] if "ipykernel" in sys.modules else None

    args, _unknown = ap.parse_known_args(argv)
    return args

args = parse_args()
random.seed(args.seed); np.random.seed(args.seed)

# RUTAS FIJAS (como pediste)
PATH_NOTES = r"C:\Users\hered\Desktop\TFM\TFM\TFM2\sample_notes_imc.csv"
PATH_GT    = r"C:\Users\hered\Desktop\TFM\TFM\TFM2\gt_imc_final.csv"
OUT_DIR    = r"C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs"

N_NOTES    = args.n
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)
SAMPLE_OUT = str(Path(OUT_DIR) / f"notes_sample_{N_NOTES}.csv")

#  Helpers HF 
def validate_model_or_fail(repo_id: str, token: str | None = None):
    api = HfApi()
    try:
        info = api.model_info(repo_id, token=token)
        if not info:
            raise SystemExit(f" No se pudo obtener info del repo '{repo_id}'.")
        print(f"‚úî Modelo v√°lido en HF: {repo_id}  (sha: {getattr(info, 'sha', 'n/a')[:7]})")
    except HfHubHTTPError as e:
        code = getattr(e.response, "status_code", None)
        if code == 404:
            raise SystemExit(f" Repo no encontrado en HF: {repo_id}\nCorrige el ID (p.ej. 'microsoft/BioGPT').")
        elif code == 401:
            raise SystemExit(
                f" 401 Unauthorized para {repo_id}.\n"
                f"- Ejecuta `hf auth login` o pasa --hf_token.\n"
                f"- Si es un repo ‚Äògated‚Äô, acepta t√©rminos en su p√°gina."
            )
        else:
            raise

#  Cargar datos 
assert Path(PATH_NOTES).exists(), f"Falta {PATH_NOTES}"
assert Path(PATH_GT).exists(), f"Falta {PATH_GT}"

def load_notes(csv_path):
    df = pd.read_csv(csv_path, dtype={"patient_id": str})
    cols = {c.lower().strip(): c for c in df.columns}
    pid = next((cols[c] for c in ["patient_id","id","pid","subject_id"] if c in cols), None)
    txt = next((cols[c] for c in ["patient","note_text","note","text"] if c in cols), None)
    assert pid and txt, "Necesito columnas ['patient_id', 'patient'/'note_text']"
    df = df.rename(columns={pid:"patient_id", txt:"patient"})[["patient_id","patient"]]
    df["patient_id"] = df["patient_id"].astype(str).str.strip()
    df["patient"]    = df["patient"].astype(str)
    return df

notes_full = load_notes(PATH_NOTES)
gt = pd.read_csv(PATH_GT, dtype={"patient_id": str})
gt["patient_id"] = gt["patient_id"].astype(str).str.strip()
for c in ["height_m_true","weight_kg_true","BMI_true"]:
    if c in gt.columns:
        gt[c] = pd.to_numeric(gt[c], errors="coerce")
gt["bmi_explicit_in_note"] = gt["bmi_explicit_in_note"].astype(bool) if "bmi_explicit_in_note" in gt.columns else False

inter = notes_full.merge(gt[["patient_id"]], on="patient_id", how="inner")
assert len(inter) >= N_NOTES, f"No hay suficientes IDs comunes para N={N_NOTES}."
TXT_COL = "patient" if "patient" in inter.columns else ("note_text" if "note_text" in inter.columns else None)
assert TXT_COL is not None, f"Falta columna de texto en {PATH_NOTES}: {inter.columns.tolist()}"
notes_20 = (
    inter[["patient_id", TXT_COL]]
    .rename(columns={TXT_COL: "patient"})
    .drop_duplicates("patient_id")
    .head(N_NOTES)
    .copy()
)
notes_20.to_csv(SAMPLE_OUT, index=False)
print(f"Notas totales: {len(notes_full)} | GT: {len(gt)} | Intersecci√≥n: {len(inter)} | Usadas: {len(notes_20)}")
print(f"‚úî Muestra guardada: {SAMPLE_OUT}")

#  Modelo: OpenVINO (GPU Intel si hay; si falla ‚Üí CPU) 
def get_ov_model_and_tokenizer(model_id: str, device_pref: str = "GPU", token: str | None = None):
    """
    Carga tokenizer y modelo OpenVINO. Intenta GPU Intel; si falla, cae a CPU.
    Devuelve: (tokenizer, llm_generate, apply_chat_template)
    """
    print(f"\nCargando tokenizer: {model_id}")
    tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token=token)
    if tok.pad_token_id is None and tok.eos_token_id is not None:
        tok.pad_token = tok.eos_token

    device = "GPU" if device_pref.upper() == "GPU" else "CPU"
    print(f"Cargando modelo OpenVINO en {device} (exportando si es necesario)...")
    ov_model = OVModelForCausalLM.from_pretrained(
        model_id,
        export=True,
        device=device,
        compile=True,
        trust_remote_code=True,
        ov_config={"CACHE_DIR": str(Path(OUT_DIR) / "ov_cache")},
        token=token,
    )
    if getattr(ov_model.config, "pad_token_id", None) is None and tok.eos_token_id is not None:
        ov_model.config.pad_token_id = tok.eos_token_id

    # Probar inferencia m√≠nima; si falla en GPU, reintentar en CPU.
    try:
        test_ids = tok("ok", return_tensors="pt").input_ids
        _ = ov_model.generate(test_ids, max_new_tokens=1)
        print(f" Modelo operativo en {device}")
    except Exception as e:
        if device == "GPU":
            print(f" Fall√≥ en GPU ({e}). Reintentando en CPU‚Ä¶")
            ov_model = OVModelForCausalLM.from_pretrained(
                model_id,
                export=True,
                device="CPU",
                compile=True,
                trust_remote_code=True,
                ov_config={"CACHE_DIR": str(Path(OUT_DIR) / "ov_cache")},
                token=token,
            )
            print(" Modelo operativo en CPU")
        else:
            raise

    def llm_generate(prompt: str, max_new=160, temperature=0.8, top_p=0.95, do_sample=True):
        # Un poco m√°s "creativo" para evitar respuestas vac√≠as en modelos no-instruct
        inputs = tok(prompt, return_tensors="pt")
        out_ids = ov_model.generate(
            **inputs,
            max_new_tokens=max_new,
            min_new_tokens=8,
            do_sample=do_sample,
            temperature=float(temperature),
            top_p=float(top_p),
            repetition_penalty=1.05,
            eos_token_id=(tok.eos_token_id or ov_model.config.eos_token_id),
            pad_token_id=(ov_model.config.pad_token_id or tok.eos_token_id),
        )
        return tok.decode(out_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()

    def apply_chat_template(system_text: str, user_text: str):
        """
        Si el tokenizer trae chat_template, √∫salo. Si no (p. ej., BioGPT),
        construye un prompt Q/A simple.
        """
        has_apply = hasattr(tok, "apply_chat_template")
        has_template = bool(getattr(tok, "chat_template", None))
        if has_apply and has_template:
            return tok.apply_chat_template(
                [
                    {"role": "system", "content": system_text},
                    {"role": "user",  "content": user_text},
                ],
                tokenize=False,
                add_generation_prompt=True,
            )
        # Fallback Q/A
        return (
            "Task:\n" + system_text.strip() + "\n\n" +
            "Input:\n" + user_text.strip() + "\n" +
            "Output:\n"
        )

    return tok, llm_generate, apply_chat_template

#  Helpers de ventana / plausibilidad nWIN, STRIDE = 1100, 800
UNIT_TOKENS = [" cm"," m","meter","metre","ft"," in","inch","kg"," lb","lbs","pound"," stone"," st","bmi","BMI","weight","height"]

def window_iter(text, win=WIN, stride=STRIDE):
    t = str(text); n = len(t)
    if n <= win:
        yield 0, t; return
    for i in range(0, n, stride):
        yield i, t[i:i+win]
        if i+win >= n: break

def has_unit_token(s: str):
    sl = (s or "").lower()
    return any(tok.strip().lower() in sl for tok in UNIT_TOKENS)

H_MIN, H_MAX = 1.2, 2.2
W_MIN, W_MAX = 30, 300
BMI_MIN, BMI_MAX = 10, 80
BMI_TOL = 0.5

def is_num(x):
    try:
        return np.isfinite(float(x))
    except:
        return False

def clip_plausible(h, w, b):
    try:
        if is_num(h) and not (H_MIN <= float(h) <= H_MAX): h = None
    except: h = None
    try:
        if is_num(w) and not (W_MIN <= float(w) <= W_MAX): w = None
    except: w = None
    try:
        if is_num(b) and not (BMI_MIN <= float(b) <= BMI_MAX): b = None
    except: b = None
    return h, w, b

def recompute_bmi(h, w):
    try:
        h = float(h); w = float(w)
        if h > 0: return round(w/(h*h), 2)
    except: pass
    return None

#  4 VARIANTES DE PROMPT n# 1) SIMPLE (tripleta)
SYSTEM_SIMPLE = (
    "You are a careful clinical extractor. From the GIVEN WINDOW ONLY, return STRICT JSON with normalized SI values:\n"
    "{ \"height_m\": <float|null>, \"weight_kg\": <float|null>, \"bmi\": <float|null> }\n"
    "Rules: Use ONLY numbers present; convert units to SI; if either height or weight is missing, bmi=null. Output JSON only."
)

# 2) ESTRICTO (unidades expl√≠citas + verificaci√≥n)
SYSTEM_STRICT = (
    "You are a clinical extractor and verifier. From the GIVEN WINDOW ONLY, return STRICT JSON:\n"
    "{ \"height_m\": <float|null>, \"weight_kg\": <float|null>, \"bmi\": <float|null> }\n"
    "Rules:\n"
    "‚Ä¢ Use ONLY numbers with explicit units (cm/m/ft-in ‚Üí m; kg/lb/stone ‚Üí kg).\n"
    "‚Ä¢ Plausibility: 1.20 ‚â§ height_m ‚â§ 2.20, 30 ‚â§ weight_kg ‚â§ 300, 10 ‚â§ bmi ‚â§ 80.\n"
    "‚Ä¢ If both H & W exist, compute bmi=kg/(m^2) (2 decimals) and prefer this over any conflicting BMI text.\n"
    "‚Ä¢ If inconsistent, set bmi=null. Output JSON only."
)

# 3) FEW-SHOT (ejemplos)
SYSTEM_FEWSHOT = SYSTEM_SIMPLE  # usamos mismas reglas, pero con ejemplos en el prompt builder
FEW_SHOTS = [
    ("A 60-year-old woman, height 165 cm and weight 68 kg.",
     "{\"height_m\": 1.65, \"weight_kg\": 68.0, \"bmi\": 24.98}"),
    ("Male, 1.80 m, 90 kg; BMI not explicitly stated in text.",
     "{\"height_m\": 1.80, \"weight_kg\": 90.0, \"bmi\": 27.78}"),
    ("Patient reports good energy. No numeric measurements present.",
     "{\"height_m\": null, \"weight_kg\": null, \"bmi\": null}"),
]

# 4) ENCADENADO (SPAN -> NORM -> JUEZ)
# (definido dentro de funciones run_chain_on_window / run_chain_on_note)

#  Utilidades de generaci√≥n / parsing 
def safe_json(text: str):
    if not text: return None
    s = text.strip()
    if s.startswith("```"):
        try:
            s = s.split("```", 1)[-1]
            if "```" in s: s = s.split("```",1)[0]
        except: pass
    a, b = s.find("{"), s.rfind("}")
    if a!=-1 and b!=-1 and b>a: s = s[a:b+1]
    s = s.replace("None","null").replace("NaN","null").replace(",}", "}")
    try:
        obj = json.loads(s)
        return obj if isinstance(obj, dict) else None
    except: return None

def gen_prompt(apply_chat_template, system_text, window_text):
    return apply_chat_template(system_text, "NOTE WINDOW:\n"+window_text+"\n\nJSON ONLY")

#  Ejecutores comunes 
ATTEMPTS_PER_WIN = args.attempts_per_window
N_WINDOWS_MAX = args.limit_windows

def run_triplet(note_text: str, system_text: str, llm_generate, apply_chat_template):
    wins = sorted([(s,c) for s,c in window_iter(note_text)], key=lambda x: int(not has_unit_token(x[1])))
    best={"h":None,"w":None,"b":None,"score":-1e9}
    for _, chunk in wins[:N_WINDOWS_MAX]:
        prompt = gen_prompt(apply_chat_template, system_text, chunk)
        for _ in range(ATTEMPTS_PER_WIN):
            obj = safe_json(llm_generate(prompt))
            if obj is None: continue
            h,w,b = obj.get("height_m"), obj.get("weight_kg"), obj.get("bmi")
            try: h=float(h)
            except: h=None
            try: w=float(w)
            except: w=None
            try: b=float(b)
            except: b=None
            h,w,b = clip_plausible(h,w,b)
            sc=0.0
            if is_num(h): sc+=1.0
            if is_num(w): sc+=1.0
            if is_num(h) and is_num(w):
                b2 = recompute_bmi(h,w)
                if is_num(b2): sc += 0.7
                if is_num(b) and is_num(b2) and abs(float(b)-float(b2))<=BMI_TOL: sc += 0.4
            elif is_num(b):
                sc += 0.2
            if sc>best["score"]: best={"h":h,"w":w,"b":b,"score":sc}
        if is_num(best["h"]) and is_num(best["w"]): break
    H = round(float(best["h"]),2) if is_num(best["h"]) else None
    W = round(float(best["w"]),1) if is_num(best["w"]) else None
    B_from = recompute_bmi(H,W) if (is_num(H) and is_num(W)) else None
    B = B_from if is_num(B_from) else (round(float(best["b"]),2) if is_num(best["b"]) else None)
    return H,W,B_from,B

def run_fewshot(note_text: str, system_text: str, llm_generate, apply_chat_template):
    """
    Igual que triplet, pero el prompt incluye ejemplos Q/A cuando no hay chat_template.
    """
    wins = sorted([(s,c) for s,c in window_iter(note_text)], key=lambda x: int(not has_unit_token(x[1])))
    best={"h":None,"w":None,"b":None,"score":-1e9}
    for _, chunk in wins[:N_WINDOWS_MAX]:
        # Construcci√≥n del prompt con ejemplos
        msgs = [{"role":"system","content":system_text}]
        for ex_in, ex_out in FEW_SHOTS:
            msgs += [{"role":"user","content":"NOTE WINDOW:\n"+ex_in+"\n\nJSON ONLY"},
                     {"role":"assistant","content":ex_out}]
        msgs += [{"role":"user","content":"NOTE WINDOW:\n"+chunk+"\n\nJSON ONLY"}]

        # Si hay plantilla, √∫sala; si no, ensamblamos Q/A manual con Output:
        prompt = None
        try:
            prompt = apply_chat_template(system_text, "NOTE WINDOW:\n"+chunk+"\n\nJSON ONLY")  # fallback m√≠nimo
            # Si queremos forzar los ejemplos en el prompt incluso con plantilla ausente:
            if "Output:" in prompt:
                pass
        except Exception:
            pass

        if prompt is None or ("Output:" not in prompt):
            # Ensamble few-shot Q/A manual
            few = []
            for ex_in, ex_out in FEW_SHOTS:
                few.append(
                    "Input:\n"+ex_in+"\nOutput:\n"+ex_out+"\n"
                )
            few_block = "\n".join(few)
            prompt = (
                "Task:\n"+system_text.strip()+"\n\n"+
                "Examples:\n"+few_block+"\n"+
                "Input:\n"+("NOTE WINDOW:\n"+chunk+"\n\nJSON ONLY")+"\n"+
                "Output:\n"
            )

        # Generar y puntuar
        for _ in range(ATTEMPTS_PER_WIN):
            obj = safe_json(llm_generate(prompt))
            if obj is None: continue
            h,w,b = obj.get("height_m"), obj.get("weight_kg"), obj.get("bmi")
            try: h=float(h)
            except: h=None
            try: w=float(w)
            except: w=None
            try: b=float(b)
            except: b=None
            h,w,b = clip_plausible(h,w,b)
            sc=0.0
            if is_num(h): sc+=1.0
            if is_num(w): sc+=1.0
            if is_num(h) and is_num(w):
                b2 = recompute_bmi(h,w)
                if is_num(b2): sc += 0.7
                if is_num(b) and is_num(b2) and abs(float(b)-float(b2))<=BMI_TOL: sc += 0.4
            elif is_num(b):
                sc += 0.2
            if sc>best["score"]: best={"h":h,"w":w,"b":b,"score":sc}
        if is_num(best["h"]) and is_num(best["w"]): break

    H = round(float(best["h"]),2) if is_num(best["h"]) else None
    W = round(float(best["w"]),1) if is_num(best["w"]) else None
    B_from = recompute_bmi(H,W) if (is_num(H) and is_num(W)) else None
    B = B_from if is_num(B_from) else (round(float(best["b"]),2) if is_num(best["b"]) else None)
    return H,W,B_from,B

#  Cadena (SPAN -> NORM -> JUEZ)
def chat_prompt(apply_chat_template, system, user):
    return apply_chat_template(system, user)

def run_chain_on_window(window_text: str, llm_generate, apply_chat_template):
    SYS_SPAN = (
        "You are a clinical span finder. From the NOTE WINDOW, pick ONLY the earliest sentence that "
        "contains tokens/units for height or weight or BMI, and return STRICT JSON:\n"
        "{ \"sentence\": <string>, \"height_span\": <string|null>, \"weight_span\": <string|null>, \"bmi_span\": <string|null> }\n"
        "Spans must be exact substrings and include units when applicable. JSON only."
    )
    SYS_NORM = (
        "You are a clinical normalizer and calculator. Given the chosen sentence and spans, return STRICT JSON:\n"
        "{ \"height_m\": <float|null>, \"weight_kg\": <float|null>, \"bmi\": <float|null>, "
        "\"bmi_source\": <\"from_text\"|\"from_hw\"|null>, \"check\": <\"ok\"|\"mismatch\"|\"insufficient\"> }\n"
        "Normalize units; if both H & W exist, COMPUTE bmi=kg/(m^2) (2 decimals). Prefer computed BMI if conflicting."
    )
    span_raw = llm_generate(chat_prompt(apply_chat_template, SYS_SPAN, f"NOTE WINDOW:\n{window_text}\n\nJSON ONLY"))
    span_obj = safe_json(span_raw) or {}
    norm_user = json.dumps({
        "sentence": span_obj.get("sentence",""),
        "height_span": span_obj.get("height_span"),
        "weight_span": span_obj.get("weight_span"),
        "bmi_span": span_obj.get("bmi_span")
    }, ensure_ascii=False)
    norm_raw = llm_generate(chat_prompt(apply_chat_template, SYS_NORM, norm_user + "\n\nJSON ONLY"))
    norm_obj = safe_json(norm_raw) or {}
    return {
        "sentence": span_obj.get("sentence",""),
        "height_m": norm_obj.get("height_m"),
        "weight_kg": norm_obj.get("weight_kg"),
        "bmi": norm_obj.get("bmi"),
        "bmi_source": norm_obj.get("bmi_source"),
        "check": norm_obj.get("check")
    }

def run_chain_on_note(note_text: str, llm_generate, apply_chat_template, attempts_per_win=2, n_windows_max=6):
    cands=[]
    for _, chunk in list(window_iter(note_text))[:n_windows_max]:
        for _ in range(attempts_per_win):
            c = run_chain_on_window(chunk, llm_generate, apply_chat_template)
            if isinstance(c, dict): cands.append(c)
    SYS_JUDGE = (
        "You are a strict clinical judge. You will receive a list of candidate JSON objects each with "
        "height_m, weight_kg, bmi, bmi_source, check, and the chosen sentence.\n"
        "Pick the single BEST candidate and return STRICT JSON with the same fields. Prefer check=\"ok\"; "
        "ties ‚Üí same sentence H/W with explicit units; ties ‚Üí clearer SI units."
    )
    judge_user = json.dumps({"candidates": cands}, ensure_ascii=False)
    judge_raw = llm_generate(chat_prompt(apply_chat_template, SYS_JUDGE, judge_user + "\n\nJSON ONLY"))
    jud = safe_json(judge_raw) or {}
    def to_float(x):
        try: return float(x)
        except: return None
    return {
        "height_m_pred": to_float(jud.get("height_m")),
        "weight_kg_pred": to_float(jud.get("weight_kg")),
        "BMI_pred_raw":   to_float(jud.get("bmi")),
        "bmi_source":     jud.get("bmi_source"),
        "check":          jud.get("check")
    }

#  Orquestaci√≥n por modelo 
notes_run = pd.read_csv(SAMPLE_OUT, dtype={"patient_id": str})
notes_run["patient_id"] = notes_run["patient_id"].astype(str).str.strip()

for ALT_MODEL_ID in args.models:
    validate_model_or_fail(ALT_MODEL_ID, token=args.hf_token)
    print("\n"+"="*88)
    print(f" Ejecutando experimento para: {ALT_MODEL_ID}")
    print("="*88)

    tokenizer, llm_generate, apply_chat_template = get_ov_model_and_tokenizer(ALT_MODEL_ID, args.device_pref, token=args.hf_token)

    ALL_PRED_PATHS = []

    def save_csv(rows, out_csv):
        Path(out_csv).parent.mkdir(parents=True, exist_ok=True)
        pd.DataFrame(rows).to_csv(out_csv, index=False)
        print(f" Guardado: {out_csv}")
        ALL_PRED_PATHS.append(out_csv)

    #  V1: SIMPLE 
    rows=[]
    for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v1_simple"):
        pid, note = r["patient_id"], str(r["patient"])
        H,W,B_from,B = run_triplet(note, SYSTEM_SIMPLE, llm_generate, apply_chat_template)
        rows.append({
            "patient_id": pid,
            "height_m_pred": H if H is not None else np.nan,
            "weight_kg_pred": W if W is not None else np.nan,
            "BMI_from_pred_hw": B_from if B_from is not None else np.nan,
            "BMI_pred_raw": B if B is not None else np.nan,
            "note_len": len(note),
            "prompt_id": "v1_simple",
            "model_used": ALT_MODEL_ID
        })
    save_csv(rows, f"{OUT_DIR}/pred_v1_simple_{ALT_MODEL_ID.split('/')[-1]}_n{len(notes_run)}.csv")

    #  V2: ESTRICTO 
    rows=[]
    for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v2_estricto"):
        pid, note = r["patient_id"], str(r["patient"])
        H,W,B_from,B = run_triplet(note, SYSTEM_STRICT, llm_generate, apply_chat_template)
        rows.append({
            "patient_id": pid,
            "height_m_pred": H if H is not None else np.nan,
            "weight_kg_pred": W if W is not None else np.nan,
            "BMI_from_pred_hw": B_from if B_from is not None else np.nan,
            "BMI_pred_raw": B if B is not None else np.nan,
            "note_len": len(note),
            "prompt_id": "v2_estricto",
            "model_used": ALT_MODEL_ID
        })
    save_csv(rows, f"{OUT_DIR}/pred_v2_estricto_{ALT_MODEL_ID.split('/')[-1]}_n{len(notes_run)}.csv")

    #  V3: FEW-SHOT 
    rows=[]
    for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v3_fewshot"):
        pid, note = r["patient_id"], str(r["patient"])
        H,W,B_from,B = run_fewshot(note, SYSTEM_FEWSHOT, llm_generate, apply_chat_template)
        rows.append({
            "patient_id": pid,
            "height_m_pred": H if H is not None else np.nan,
            "weight_kg_pred": W if W is not None else np.nan,
            "BMI_from_pred_hw": B_from if B_from is not None else np.nan,
            "BMI_pred_raw": B if B is not None else np.nan,
            "note_len": len(note),
            "prompt_id": "v3_fewshot",
            "model_used": ALT_MODEL_ID
        })
    save_csv(rows, f"{OUT_DIR}/pred_v3_fewshot_{ALT_MODEL_ID.split('/')[-1]}_n{len(notes_run)}.csv")

    #  V4: ENCADENADO (SPAN ‚Üí NORM ‚Üí JUEZ) 
    rows=[]
    for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v4_encadenado"):
        pid, note = r["patient_id"], str(r["patient"])
        out = run_chain_on_note(note, llm_generate, apply_chat_template, attempts_per_win=2, n_windows_max=6)
        rows.append({
            "patient_id": pid,
            "note_len": len(note),
            "prompt_id": "v4_encadenado",
            "model_used": ALT_MODEL_ID,
            "height_m_pred": (out["height_m_pred"] if out["height_m_pred"] is not None else np.nan),
            "weight_kg_pred": (out["weight_kg_pred"] if out["weight_kg_pred"] is not None else np.nan),
            "BMI_pred_raw":   (out["BMI_pred_raw"] if out["BMI_pred_raw"] is not None else np.nan),
            "BMI_from_pred_hw": np.nan,
            "bmi_source": out.get("bmi_source"),
            "check": out.get("check")
        })
    save_csv(rows, f"{OUT_DIR}/pred_v4_encadenado_{ALT_MODEL_ID.split('/')[-1]}_n{len(notes_run)}.csv")

    print("\n Ficheros de predicci√≥n generados para", ALT_MODEL_ID)
    for p in ALL_PRED_PATHS: print(" -", p)

print("\n Fin.")


Notas totales: 50 | GT: 50 | Intersecci√≥n: 50 | Usadas: 20
‚úî Muestra guardada: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs\notes_sample_20.csv
‚úî Modelo v√°lido en HF: microsoft/BioGPT  (sha: eb0d815)

 Ejecutando experimento para: microsoft/BioGPT

Cargando tokenizer: microsoft/BioGPT
Cargando modelo OpenVINO en GPU (exportando si es necesario)...
 Modelo operativo en GPU


Inferencia v1_simple: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [09:52<00:00, 29.60s/it]


 Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v1_simple_BioGPT_n20.csv


Inferencia v2_estricto: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [09:50<00:00, 29.53s/it]


 Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v2_estricto_BioGPT_n20.csv


Inferencia v3_fewshot: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [11:03<00:00, 33.18s/it]


 Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v3_fewshot_BioGPT_n20.csv


Inferencia v4_encadenado: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [16:13<00:00, 48.67s/it]


Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v4_encadenado_BioGPT_n20.csv

 Ficheros de predicci√≥n generados para microsoft/BioGPT
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v1_simple_BioGPT_n20.csv
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v2_estricto_BioGPT_n20.csv
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v3_fewshot_BioGPT_n20.csv
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v4_encadenado_BioGPT_n20.csv
 Modelo v√°lido en HF: microsoft/BioGPT-Large  (sha: c6a5136)

 Ejecutando experimento para: microsoft/BioGPT-Large

Cargando tokenizer: microsoft/BioGPT-Large
Cargando modelo OpenVINO en GPU (exportando si es necesario)...
INFO:nncf:Statistics of the bitwidth distribution:
+---------------------------+-----------------------------+----------------------------------------+
| Weight compression mode   | % all parameters (layers)   | % ratio-defining parameters (layers)   |
| int8_asym                 | 100% (290 / 290)            | 10

 Modelo operativo en GPU


Inferencia v1_simple:  35%|‚ñà‚ñà‚ñà‚ñå      | 7/20 [23:22<43:24, 200.38s/it]  


KeyboardInterrupt: 

In [5]:
# inferencia_local_openvino_biogpt_large.py
#  INFERENCIA (4 VARIANTES DE PROMPT) ‚Äî CL√çNICO (LOCAL, OpenVINO) 
# Modelo: microsoft/BioGPT-Large
# Backend: OpenVINO (Intel GPU si est√°; fallback CPU). Guarda CSVs localmente.

import os, sys, json, random, warnings, subprocess
from pathlib import Path
import argparse
warnings.filterwarnings("ignore")

#  AUTO-INSTALACI√ìN DE DEPENDENCIAS FALTANTES 
REQUIRED = [
    "numpy<2.1",
    "transformers==4.46.2",
    "optimum-intel[openvino]==1.26.0",
    "openvino>=2025.1.0",
    "accelerate>=0.34",
    "huggingface_hub>=0.24",
    "sacremoses",            # <- necesario para BioGPT tokenizer
    "tqdm",
    "pandas"
]

def ensure_packages(pkgs):
    def _is_installed(pkg_spec: str) -> bool:
        # Comprobaci√≥n simple por import principal
        name = pkg_spec.split("==")[0].split(">=")[0].split("<")[0]
        try:
            __import__(name.replace("-", "_"))
            return True
        except Exception:
            return False

    need = [p for p in pkgs if not _is_installed(p)]
    if need:
        print(" Instalando dependencias que faltan:\n  - " + "\n  - ".join(need))
        cmd = [sys.executable, "-m", "pip", "install", "--upgrade"] + need
        subprocess.check_call(cmd)
        print(" Dependencias listas. Reiniciando imports...")
ensure_packages(REQUIRED)

#  Imports tras asegurar paquetes 
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from transformers import AutoTokenizer
from optimum.intel.openvino import OVModelForCausalLM
from huggingface_hub import HfApi
try:
    from huggingface_hub.utils import HfHubHTTPError
except Exception:
    try:
        from huggingface_hub.utils._errors import HfHubHTTPError
    except Exception:
        HfHubHTTPError = Exception

#  Config CLI (compatible con Jupyter/VSCode)
def parse_args(argv=None):
    ap = argparse.ArgumentParser(add_help=True)
    ap.add_argument("--notes", default=r"C:\Users\hered\Desktop\TFM\TFM\TFM2\sample_notes_imc.csv", help="CSV con las 20 notas")
    ap.add_argument("--gt",    default=r"C:\Users\hered\Desktop\TFM\TFM\TFM2\gt_imc_final.csv",     help="CSV con GT")
    ap.add_argument("--outdir",default=r"C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs",              help="Directorio de salida")
    ap.add_argument("--n", type=int, default=20, help="N√∫mero de notas a usar")
    ap.add_argument("--device_pref", default="GPU", choices=["GPU","CPU"], help="Preferencia de dispositivo OpenVINO")
    ap.add_argument("--model", default="microsoft/BioGPT-Large", help="ID del modelo cl√≠nico en HF")
    ap.add_argument("--hf_token", default=os.getenv("HUGGINGFACE_HUB_TOKEN", None), help="Token HF si repo es privado/gated")
    ap.add_argument("--seed", type=int, default=7)
    ap.add_argument("--limit_windows", type=int, default=6)
    ap.add_argument("--attempts_per_window", type=int, default=3)
    # En Jupyter/VSCode, ignora args de ipykernel
    if argv is None:
        argv = [] if "ipykernel" in sys.modules else None
    args, _ = ap.parse_known_args(argv)
    return args

args = parse_args()
random.seed(args.seed); np.random.seed(args.seed)

# Rutas fijas (puedes cambiarlas si quieres)
PATH_NOTES = args.notes
PATH_GT    = args.gt
OUT_DIR    = args.outdir
N_NOTES    = args.n
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)
SAMPLE_OUT = str(Path(OUT_DIR) / f"notes_sample_{N_NOTES}.csv")

# Helpers HF
def validate_model_or_fail(repo_id: str, token: str | None = None):
    api = HfApi()
    try:
        info = api.model_info(repo_id, token=token)
        if not info:
            raise SystemExit(f" No se pudo obtener info del repo '{repo_id}'.")
        print(f" Modelo v√°lido en HF: {repo_id}  (sha: {getattr(info, 'sha', 'n/a')[:7]})")
    except HfHubHTTPError as e:
        code = getattr(e.response, "status_code", None)
        if code == 404:
            raise SystemExit(f" Repo no encontrado en HF: {repo_id}")
        elif code == 401:
            raise SystemExit(
                f" 401 Unauthorized para {repo_id}.\n"
                f"- Ejecuta `hf auth login` o pasa --hf_token.\n"
                f"- Si es ‚Äògated‚Äô, acepta t√©rminos en su p√°gina."
            )
        else:
            raise

validate_model_or_fail(args.model, token=args.hf_token)

#  Cargar datos 
assert Path(PATH_NOTES).exists(), f"Falta {PATH_NOTES}"
assert Path(PATH_GT).exists(), f"Falta {PATH_GT}"

def load_notes(csv_path):
    df = pd.read_csv(csv_path, dtype={"patient_id": str})
    cols = {c.lower().strip(): c for c in df.columns}
    pid = next((cols[c] for c in ["patient_id","id","pid","subject_id"] if c in cols), None)
    txt = next((cols[c] for c in ["patient","note_text","note","text"] if c in cols), None)
    assert pid and txt, "Necesito columnas ['patient_id', 'patient'/'note_text']"
    df = df.rename(columns={pid:"patient_id", txt:"patient"})[["patient_id","patient"]]
    df["patient_id"] = df["patient_id"].astype(str).str.strip()
    df["patient"]    = df["patient"].astype(str)
    return df

notes_full = load_notes(PATH_NOTES)
gt = pd.read_csv(PATH_GT, dtype={"patient_id": str})
gt["patient_id"] = gt["patient_id"].astype(str).str.strip()
for c in ["height_m_true","weight_kg_true","BMI_true"]:
    if c in gt.columns:
        gt[c] = pd.to_numeric(gt[c], errors="coerce")
gt["bmi_explicit_in_note"] = gt["bmi_explicit_in_note"].astype(bool) if "bmi_explicit_in_note" in gt.columns else False

inter = notes_full.merge(gt[["patient_id"]], on="patient_id", how="inner")
assert len(inter) >= N_NOTES, f"No hay suficientes IDs comunes para N={N_NOTES}."
TXT_COL = "patient" if "patient" in inter.columns else ("note_text" if "note_text" in inter.columns else None)
assert TXT_COL is not None, f"Falta columna de texto en {PATH_NOTES}: {inter.columns.tolist()}"
notes_20 = (
    inter[["patient_id", TXT_COL]]
    .rename(columns={TXT_COL: "patient"})
    .drop_duplicates("patient_id")
    .head(N_NOTES)
    .copy()
)
notes_20.to_csv(SAMPLE_OUT, index=False)
print(f"Notas totales: {len(notes_full)} | GT: {len(gt)} | Intersecci√≥n: {len(inter)} | Usadas: {len(notes_20)}")
print(f"‚úî Muestra guardada: {SAMPLE_OUT}")

#  Modelo: OpenVINO (GPU Intel si hay; si falla ‚Üí CPU) 
def get_ov_model_and_tokenizer(model_id: str, device_pref: str = "GPU", token: str | None = None):
    """
    Carga tokenizer y modelo OpenVINO. Intenta GPU Intel; si falla, cae a CPU.
    Devuelve: (tokenizer, llm_generate, apply_chat_template)
    """
    print(f"\nCargando tokenizer: {model_id}")
    tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token=token)
    if tok.pad_token_id is None and tok.eos_token_id is not None:
        tok.pad_token = tok.eos_token

    device = "GPU" if device_pref.upper() == "GPU" else "CPU"
    print(f"Cargando modelo OpenVINO en {device} (exportando si es necesario)...")
    ov_model = OVModelForCausalLM.from_pretrained(
        model_id,
        export=True,
        device=device,
        compile=True,
        trust_remote_code=True,
        ov_config={"CACHE_DIR": str(Path(OUT_DIR) / "ov_cache")},
        token=token,
    )
    if getattr(ov_model.config, "pad_token_id", None) is None and tok.eos_token_id is not None:
        ov_model.config.pad_token_id = tok.eos_token_id

    # Probar inferencia m√≠nima; si falla en GPU, reintentar en CPU.
    try:
        test_ids = tok("ok", return_tensors="pt").input_ids
        _ = ov_model.generate(test_ids, max_new_tokens=1)
        print(f" Modelo operativo en {device}")
    except Exception as e:
        if device == "GPU":
            print(f" Fall√≥ en GPU ({e}). Reintentando en CPU‚Ä¶")
            ov_model = OVModelForCausalLM.from_pretrained(
                model_id,
                export=True,
                device="CPU",
                compile=True,
                trust_remote_code=True,
                ov_config={"CACHE_DIR": str(Path(OUT_DIR) / "ov_cache")},
                token=token,
            )
            print(" Modelo operativo en CPU")
        else:
            raise

    def llm_generate(prompt: str, max_new=160, temperature=0.8, top_p=0.95, do_sample=True):
        # Algo m√°s "creativo" para evitar respuestas vac√≠as en modelos no-instruct
        inputs = tok(prompt, return_tensors="pt")
        out_ids = ov_model.generate(
            **inputs,
            max_new_tokens=max_new,
            min_new_tokens=8,
            do_sample=do_sample,
            temperature=float(temperature),
            top_p=float(top_p),
            repetition_penalty=1.05,
            eos_token_id=(tok.eos_token_id or ov_model.config.eos_token_id),
            pad_token_id=(ov_model.config.pad_token_id or tok.eos_token_id),
        )
        return tok.decode(out_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()

    def apply_chat_template(system_text: str, user_text: str):
        """
        Si el tokenizer trae chat_template, √∫salo. Si no (p. ej., BioGPT),
        construye un prompt Q/A simple.
        """
        has_apply = hasattr(tok, "apply_chat_template")
        has_template = bool(getattr(tok, "chat_template", None))
        if has_apply and has_template:
            return tok.apply_chat_template(
                [
                    {"role": "system", "content": system_text},
                    {"role": "user",  "content": user_text},
                ],
                tokenize=False,
                add_generation_prompt=True,
            )
        # Fallback Q/A
        return (
            "Task:\n" + system_text.strip() + "\n\n" +
            "Input:\n" + user_text.strip() + "\n" +
            "Output:\n"
        )

    return tok, llm_generate, apply_chat_template

tokenizer, llm_generate, apply_chat_template = get_ov_model_and_tokenizer(args.model, args.device_pref, token=args.hf_token)

#  Helpers de ventana / plausibilidad 
WIN, STRIDE = 1100, 800
UNIT_TOKENS = [" cm"," m","meter","metre","ft"," in","inch","kg"," lb","lbs","pound"," stone"," st","bmi","BMI","weight","height"]

def window_iter(text, win=WIN, stride=STRIDE):
    t = str(text); n = len(t)
    if n <= win:
        yield 0, t; return
    for i in range(0, n, stride):
        yield i, t[i:i+win]
        if i+win >= n: break

def has_unit_token(s: str):
    sl = (s or "").lower()
    return any(tok.strip().lower() in sl for tok in UNIT_TOKENS)

H_MIN, H_MAX = 1.2, 2.2
W_MIN, W_MAX = 30, 300
BMI_MIN, BMI_MAX = 10, 80
BMI_TOL = 0.5

def is_num(x):
    try:
        return np.isfinite(float(x))
    except:
        return False

def clip_plausible(h, w, b):
    try:
        if is_num(h) and not (H_MIN <= float(h) <= H_MAX): h = None
    except: h = None
    try:
        if is_num(w) and not (W_MIN <= float(w) <= W_MAX): w = None
    except: w = None
    try:
        if is_num(b) and not (BMI_MIN <= float(b) <= BMI_MAX): b = None
    except: b = None
    return h, w, b

def recompute_bmi(h, w):
    try:
        h = float(h); w = float(w)
        if h > 0: return round(w/(h*h), 2)
    except: pass
    return None

#  4 VARIANTES DE PROMPT 
# 1) SIMPLE (tripleta)
SYSTEM_SIMPLE = (
    "You are a careful clinical extractor. From the GIVEN WINDOW ONLY, return STRICT JSON with normalized SI values:\n"
    "{ \"height_m\": <float|null>, \"weight_kg\": <float|null>, \"bmi\": <float|null> }\n"
    "Rules: Use ONLY numbers present; convert units to SI; if either height or weight is missing, bmi=null. Output JSON only."
)

# 2) ESTRICTO (unidades expl√≠citas + verificaci√≥n)
SYSTEM_STRICT = (
    "You are a clinical extractor and verifier. From the GIVEN WINDOW ONLY, return STRICT JSON:\n"
    "{ \"height_m\": <float|null>, \"weight_kg\": <float|null>, \"bmi\": <float|null> }\n"
    "Rules:\n"
    "‚Ä¢ Use ONLY numbers with explicit units (cm/m/ft-in ‚Üí m; kg/lb/stone ‚Üí kg).\n"
    "‚Ä¢ Plausibility: 1.20 ‚â§ height_m ‚â§ 2.20, 30 ‚â§ weight_kg ‚â§ 300, 10 ‚â§ bmi ‚â§ 80.\n"
    "‚Ä¢ If both H & W exist, compute bmi=kg/(m^2) (2 decimals) and prefer this over any conflicting BMI text.\n"
    "‚Ä¢ If inconsistent, set bmi=null. Output JSON only."
)

# 3) FEW-SHOT (ejemplos)
SYSTEM_FEWSHOT = SYSTEM_SIMPLE
FEW_SHOTS = [
    ("A 60-year-old woman, height 165 cm and weight 68 kg.",
     "{\"height_m\": 1.65, \"weight_kg\": 68.0, \"bmi\": 24.98}"),
    ("Male, 1.80 m, 90 kg; BMI not explicitly stated in text.",
     "{\"height_m\": 1.80, \"weight_kg\": 90.0, \"bmi\": 27.78}"),
    ("Patient reports good energy. No numeric measurements present.",
     "{\"height_m\": null, \"weight_kg\": null, \"bmi\": null}"),
]

# 4) ENCADENADO (SPAN -> NORM -> JUEZ) ‚Äî se define en funciones abajo

#  Utilidades de generaci√≥n / parsing ndef safe_json(text: str):
    if not text: return None
    s = text.strip()
    if s.startswith("```"):
        try:
            s = s.split("```", 1)[-1]
            if "```" in s: s = s.split("```",1)[0]
        except: pass
    a, b = s.find("{"), s.rfind("}")
    if a!=-1 and b!=-1 and b>a: s = s[a:b+1]
    s = s.replace("None","null").replace("NaN","null").replace(",}", "}")
    try:
        obj = json.loads(s)
        return obj if isinstance(obj, dict) else None
    except: return None

def gen_prompt(apply_chat_template, system_text, window_text):
    return apply_chat_template(system_text, "NOTE WINDOW:\n"+window_text+"\n\nJSON ONLY")

#  Ejecutores comunes
ATTEMPTS_PER_WIN = args.attempts_per_window
N_WINDOWS_MAX = args.limit_windows

def run_triplet(note_text: str, system_text: str, llm_generate, apply_chat_template):
    wins = sorted([(s,c) for s,c in window_iter(note_text)], key=lambda x: int(not has_unit_token(x[1])))
    best={"h":None,"w":None,"b":None,"score":-1e9}
    for _, chunk in wins[:N_WINDOWS_MAX]:
        prompt = gen_prompt(apply_chat_template, system_text, chunk)
        for _ in range(ATTEMPTS_PER_WIN):
            obj = safe_json(llm_generate(prompt))
            if obj is None: continue
            h,w,b = obj.get("height_m"), obj.get("weight_kg"), obj.get("bmi")
            try: h=float(h)
            except: h=None
            try: w=float(w)
            except: w=None
            try: b=float(b)
            except: b=None
            h,w,b = clip_plausible(h,w,b)
            sc=0.0
            if is_num(h): sc+=1.0
            if is_num(w): sc+=1.0
            if is_num(h) and is_num(w):
                b2 = recompute_bmi(h,w)
                if is_num(b2): sc += 0.7
                if is_num(b) and is_num(b2) and abs(float(b)-float(b2))<=BMI_TOL: sc += 0.4
            elif is_num(b):
                sc += 0.2
            if sc>best["score"]: best={"h":h,"w":w,"b":b,"score":sc}
        if is_num(best["h"]) and is_num(best["w"]): break
    H = round(float(best["h"]),2) if is_num(best["h"]) else None
    W = round(float(best["w"]),1) if is_num(best["w"]) else None
    B_from = recompute_bmi(H,W) if (is_num(H) and is_num(W)) else None
    B = B_from if is_num(B_from) else (round(float(best["b"]),2) if is_num(best["b"]) else None)
    return H,W,B_from,B

def run_fewshot(note_text: str, system_text: str, llm_generate, apply_chat_template):
    # Igual que triplet, pero con ejemplos cuando no hay chat_template
    wins = sorted([(s,c) for s,c in window_iter(note_text)], key=lambda x: int(not has_unit_token(x[1])))
    best={"h":None,"w":None,"b":None,"score":-1e9}
    for _, chunk in wins[:N_WINDOWS_MAX]:
        # Construir prompt con ejemplos (fallback estilo Q/A)
        few = []
        for ex_in, ex_out in FEW_SHOTS:
            few.append("Input:\n"+ex_in+"\nOutput:\n"+ex_out+"\n")
        few_block = "\n".join(few)
        prompt = (
            "Task:\n"+system_text.strip()+"\n\n"+
            "Examples:\n"+few_block+"\n"+
            "Input:\nNOTE WINDOW:\n"+chunk+"\n\nJSON ONLY\n"+
            "Output:\n"
        )
        for _ in range(ATTEMPTS_PER_WIN):
            obj = safe_json(llm_generate(prompt))
            if obj is None: continue
            h,w,b = obj.get("height_m"), obj.get("weight_kg"), obj.get("bmi")
            try: h=float(h)
            except: h=None
            try: w=float(w)
            except: w=None
            try: b=float(b)
            except: b=None
            h,w,b = clip_plausible(h,w,b)
            sc=0.0
            if is_num(h): sc+=1.0
            if is_num(w): sc+=1.0
            if is_num(h) and is_num(w):
                b2 = recompute_bmi(h,w)
                if is_num(b2): sc += 0.7
                if is_num(b) and is_num(b2) and abs(float(b)-float(b2))<=BMI_TOL: sc += 0.4
            elif is_num(b):
                sc += 0.2
            if sc>best["score"]: best={"h":h,"w":w,"b":b,"score":sc}
        if is_num(best["h"]) and is_num(best["w"]): break
    H = round(float(best["h"]),2) if is_num(best["h"]) else None
    W = round(float(best["w"]),1) if is_num(best["w"]) else None
    B_from = recompute_bmi(H,W) if (is_num(H) and is_num(W)) else None
    B = B_from if is_num(B_from) else (round(float(best["b"]),2) if is_num(best["b"]) else None)
    return H,W,B_from,B

#  Cadena (SPAN -> NORM -> JUEZ)
def chat_prompt(apply_chat_template, system, user):
    return apply_chat_template(system, user)

def run_chain_on_window(window_text: str, llm_generate, apply_chat_template):
    SYS_SPAN = (
        "You are a clinical span finder. From the NOTE WINDOW, pick ONLY the earliest sentence that "
        "contains tokens/units for height or weight or BMI, and return STRICT JSON:\n"
        "{ \"sentence\": <string>, \"height_span\": <string|null>, \"weight_span\": <string|null>, \"bmi_span\": <string|null> }\n"
        "Spans must be exact substrings and include units when applicable. JSON only."
    )
    SYS_NORM = (
        "You are a clinical normalizer and calculator. Given the chosen sentence and spans, return STRICT JSON:\n"
        "{ \"height_m\": <float|null>, \"weight_kg\": <float|null>, \"bmi\": <float|null>, "
        "\"bmi_source\": <\"from_text\"|\"from_hw\"|null>, \"check\": <\"ok\"|\"mismatch\"|\"insufficient\"> }\n"
        "Normalize units; if both H & W exist, COMPUTE bmi=kg/(m^2) (2 decimals). Prefer computed BMI if conflicting."
    )
    span_raw = llm_generate(chat_prompt(apply_chat_template, SYS_SPAN, f"NOTE WINDOW:\n{window_text}\n\nJSON ONLY"))
    span_obj = safe_json(span_raw) or {}
    norm_user = json.dumps({
        "sentence": span_obj.get("sentence",""),
        "height_span": span_obj.get("height_span"),
        "weight_span": span_obj.get("weight_span"),
        "bmi_span": span_obj.get("bmi_span")
    }, ensure_ascii=False)
    norm_raw = llm_generate(chat_prompt(apply_chat_template, SYS_NORM, norm_user + "\n\nJSON ONLY"))
    norm_obj = safe_json(norm_raw) or {}
    return {
        "sentence": span_obj.get("sentence",""),
        "height_m": norm_obj.get("height_m"),
        "weight_kg": norm_obj.get("weight_kg"),
        "bmi": norm_obj.get("bmi"),
        "bmi_source": norm_obj.get("bmi_source"),
        "check": norm_obj.get("check")
    }

def run_chain_on_note(note_text: str, llm_generate, apply_chat_template, attempts_per_win=2, n_windows_max=6):
    cands=[]
    for _, chunk in list(window_iter(note_text))[:n_windows_max]:
        for _ in range(attempts_per_win):
            c = run_chain_on_window(chunk, llm_generate, apply_chat_template)
            if isinstance(c, dict): cands.append(c)
    SYS_JUDGE = (
        "You are a strict clinical judge. You will receive a list of candidate JSON objects each with "
        "height_m, weight_kg, bmi, bmi_source, check, and the chosen sentence.\n"
        "Pick the single BEST candidate and return STRICT JSON with the same fields. Prefer check=\"ok\"; "
        "ties ‚Üí same sentence H/W with explicit units; ties ‚Üí clearer SI units."
    )
    judge_user = json.dumps({"candidates": cands}, ensure_ascii=False)
    judge_raw = llm_generate(chat_prompt(apply_chat_template, SYS_JUDGE, judge_user + "\n\nJSON ONLY"))
    jud = safe_json(judge_raw) or {}
    def to_float(x):
        try: return float(x)
        except: return None
    return {
        "height_m_pred": to_float(jud.get("height_m")),
        "weight_kg_pred": to_float(jud.get("weight_kg")),
        "BMI_pred_raw":   to_float(jud.get("bmi")),
        "bmi_source":     jud.get("bmi_source"),
        "check":          jud.get("check")
    }

#  Orquestaci√≥n ‚Äî 4 variantes 
notes_run = pd.read_csv(SAMPLE_OUT, dtype={"patient_id": str})
notes_run["patient_id"] = notes_run["patient_id"].astype(str).str.strip()

ALL_PRED_PATHS = []

def save_csv(rows, out_csv):
    Path(out_csv).parent.mkdir(parents=True, exist_ok=True)
    pd.DataFrame(rows).to_csv(out_csv, index=False)
    print(f"Guardado: {out_csv}")
    ALL_PRED_PATHS.append(out_csv)

# V1: SIMPLE
rows=[]
for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v1_simple"):
    pid, note = r["patient_id"], str(r["patient"])
    H,W,B_from,B = run_triplet(note, SYSTEM_SIMPLE, llm_generate, apply_chat_template)
    rows.append({
        "patient_id": pid,
        "height_m_pred": H if H is not None else np.nan,
        "weight_kg_pred": W if W is not None else np.nan,
        "BMI_from_pred_hw": B_from if B_from is not None else np.nan,
        "BMI_pred_raw": B if B is not None else np.nan,
        "note_len": len(note),
        "prompt_id": "v1_simple",
        "model_used": args.model
    })
save_csv(rows, f"{OUT_DIR}/pred_v1_simple_{args.model.split('/')[-1]}_n{len(notes_run)}.csv")

# V2: ESTRICTO
rows=[]
for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v2_estricto"):
    pid, note = r["patient_id"], str(r["patient"])
    H,W,B_from,B = run_triplet(note, SYSTEM_STRICT, llm_generate, apply_chat_template)
    rows.append({
        "patient_id": pid,
        "height_m_pred": H if H is not None else np.nan,
        "weight_kg_pred": W if W is not None else np.nan,
        "BMI_from_pred_hw": B_from if B_from is not None else np.nan,
        "BMI_pred_raw": B if B is not None else np.nan,
        "note_len": len(note),
        "prompt_id": "v2_estricto",
        "model_used": args.model
    })
save_csv(rows, f"{OUT_DIR}/pred_v2_estricto_{args.model.split('/')[-1]}_n{len(notes_run)}.csv")

# V3: FEW-SHOT
rows=[]
for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v3_fewshot"):
    pid, note = r["patient_id"], str(r["patient"])
    H,W,B_from,B = run_fewshot(note, SYSTEM_FEWSHOT, llm_generate, apply_chat_template)
    rows.append({
        "patient_id": pid,
        "height_m_pred": H if H is not None else np.nan,
        "weight_kg_pred": W if W is not None else np.nan,
        "BMI_from_pred_hw": B_from if B_from is not None else np.nan,
        "BMI_pred_raw": B if B is not None else np.nan,
        "note_len": len(note),
        "prompt_id": "v3_fewshot",
        "model_used": args.model
    })
save_csv(rows, f"{OUT_DIR}/pred_v3_fewshot_{args.model.split('/')[-1]}_n{len(notes_run)}.csv")

# V4: ENCADENADO (SPAN ‚Üí NORM ‚Üí JUEZ)
rows=[]
for _, r in tqdm(notes_run.iterrows(), total=len(notes_run), desc="Inferencia v4_encadenado"):
    pid, note = r["patient_id"], str(r["patient"])
    out = run_chain_on_note(note, llm_generate, apply_chat_template, attempts_per_win=2, n_windows_max=6)
    rows.append({
        "patient_id": pid,
        "note_len": len(note),
        "prompt_id": "v4_encadenado",
        "model_used": args.model,
        "height_m_pred": (out["height_m_pred"] if out["height_m_pred"] is not None else np.nan),
        "weight_kg_pred": (out["weight_kg_pred"] if out["weight_kg_pred"] is not None else np.nan),
        "BMI_pred_raw":   (out["BMI_pred_raw"] if out["BMI_pred_raw"] is not None else np.nan),
        "BMI_from_pred_hw": np.nan,
        "bmi_source": out.get("bmi_source"),
        "check": out.get("check")
    })
save_csv(rows, f"{OUT_DIR}/pred_v4_encadenado_{args.model.split('/')[-1]}_n{len(notes_run)}.csv")

print("\n Ficheros de predicci√≥n generados para", args.model)
for p in ALL_PRED_PATHS: print(" -", p)
print("\nFin.")


 Instalando dependencias que faltan:
  - optimum-intel[openvino]==1.26.0
 Dependencias listas. Reiniciando imports...
 Modelo v√°lido en HF: microsoft/BioGPT-Large  (sha: c6a5136)
Notas totales: 50 | GT: 50 | Intersecci√≥n: 50 | Usadas: 20
Muestra guardada: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs\notes_sample_20.csv

Cargando tokenizer: microsoft/BioGPT-Large
Cargando modelo OpenVINO en GPU (exportando si es necesario)...
INFO:nncf:Statistics of the bitwidth distribution:
+---------------------------+-----------------------------+----------------------------------------+
| Weight compression mode   | % all parameters (layers)   | % ratio-defining parameters (layers)   |
| int8_asym                 | 100% (290 / 290)            | 100% (290 / 290)                       |
+---------------------------+-----------------------------+----------------------------------------+


 Modelo operativo en GPU


Inferencia v1_simple: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [53:27<00:00, 160.37s/it]


 Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v1_simple_BioGPT-Large_n20.csv


Inferencia v2_estricto: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [52:59<00:00, 158.97s/it]


 Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v2_estricto_BioGPT-Large_n20.csv


Inferencia v3_fewshot: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [56:26<00:00, 169.34s/it]


 Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v3_fewshot_BioGPT-Large_n20.csv


Inferencia v4_encadenado: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [1:16:23<00:00, 229.18s/it]

 Guardado: C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v4_encadenado_BioGPT-Large_n20.csv

 Ficheros de predicci√≥n generados para microsoft/BioGPT-Large
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v1_simple_BioGPT-Large_n20.csv
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v2_estricto_BioGPT-Large_n20.csv
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v3_fewshot_BioGPT-Large_n20.csv
 - C:\Users\hered\Desktop\TFM\TFM\TFM2\outputs/pred_v4_encadenado_BioGPT-Large_n20.csv

 Fin.





In [8]:
!pip uninstall torch torchvision torchaudio
!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cpu

^C
Looking in indexes: https://download.pytorch.org/whl/cpu


"DOSKEY" no se reconoce como un comando interno o externo,
programa o archivo por lotes ejecutable.
ERROR: Could not find a version that satisfies the requirement torch==2.1.0 (from versions: 2.2.0+cpu, 2.2.1+cpu, 2.2.2+cpu, 2.3.0+cpu, 2.3.1+cpu, 2.4.0+cpu, 2.4.1+cpu, 2.5.0+cpu, 2.5.1+cpu, 2.6.0+cpu, 2.7.0+cpu, 2.7.1+cpu, 2.8.0+cpu, 2.9.0+cpu)
ERROR: No matching distribution found for torch==2.1.0


In [None]:
def get_ov_model_and_tokenizer(model_id: str, device_pref: str = "GPU", token: str | None = None):
    print(f"\nCargando tokenizer: {model_id}")
    tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, token=token)
    if tok.pad_token_id is None and tok.eos_token_id is not None:
        tok.pad_token = tok.eos_token

    device = "GPU" if device_pref.upper() == "GPU" else "CPU"
    print(f"Cargando modelo OpenVINO en {device}...")
    
    # Configuraci√≥n para evitar problemas de tracing
    ov_config_dict = {
        "CACHE_DIR": str(Path(OUT_DIR) / "ov_cache"),
        "PERFORMANCE_HINT": "LATENCY",
        "INFERENCE_PRECISION_HINT": "f32"  # Forzar precisi√≥n float32
    }
    
    try:
        # Intentar cargar sin exportar primero
        ov_model = OVModelForCausalLM.from_pretrained(
            model_id,
            export=False,  # No exportar, solo cargar
            device=device,
            compile=False,  # No compilar inmediatamente
            trust_remote_code=True,
            ov_config=ov_config_dict,
            token=token,
        )
    except Exception:
        # Si falla, intentar con exportaci√≥n pero con configuraciones m√°s conservadoras
        print("  Modelo no encontrado en formato OpenVINO. Exportando...")
        try:
            ov_model = OVModelForCausalLM.from_pretrained(
                model_id,
                export=True,
                device="CPU",  # Usar CPU para exportaci√≥n que es m√°s estable
                compile=False,
                trust_remote_code=True,
                ov_config=ov_config_dict,
                token=token,
                # Par√°metros espec√≠ficos para evitar problemas de tracing
                model_kwargs={
                    "torch_dtype": torch.float32,
                    "low_cpu_mem_usage": True
                }
            )
        except Exception as e:
            print(f" Error en exportaci√≥n: {e}")
            print(" Intentando enfoque alternativo...")
            # Enfoque alternativo: cargar el modelo PyTorch primero y luego convertirlo
            from transformers import AutoModelForCausalLM
            import torch
            
            print("Cargando modelo PyTorch primero...")
            pt_model = AutoModelForCausalLM.from_pretrained(
                model_id,
                torch_dtype=torch.float32,
                trust_remote_code=True,
                token=token
            )
            
            # Forzar el modelo a modo evaluaci√≥n y float32
            pt_model.eval()
            pt_model = pt_model.to(torch.float32)
            
            print("Exportando desde modelo PyTorch cargado...")
            ov_model = OVModelForCausalLM.from_pretrained(
                model_id,
                export=True,
                device="CPU",
                compile=False,
                trust_remote_code=True,
                ov_config=ov_config_dict,
                token=token,
                model_kwargs={"_from_torch": True}
            )

    # Configurar token de padding si es necesario
    if getattr(ov_model.config, "pad_token_id", None) is None and tok.eos_token_id is not None:
        ov_model.config.pad_token_id = tok.eos_token_id

    # Ahora compilar el modelo
    print("üîß Compilando modelo...")
    ov_model.compile()

    # Probar inferencia m√≠nima
    try:
        test_ids = tok("ok", return_tensors="pt").input_ids
        # Usar configuraci√≥n m√°s conservadora para la prueba
        _ = ov_model.generate(test_ids, max_new_tokens=1, do_sample=False)
        print(f" Modelo operativo en {device}")
    except Exception as e:
        print(f" Advertencia en prueba de inferencia: {e}")
        # Continuar de todos modos

    def llm_generate(prompt: str, max_new=180, temperature=0.9, top_p=0.95, do_sample=True):
        inputs = tok(prompt, return_tensors="pt")
        
        # Configuraci√≥n m√°s robusta para generaci√≥n
        generation_config = {
            "max_new_tokens": max_new,
            "min_new_tokens": 8,
            "do_sample": do_sample,
            "temperature": float(temperature),
            "top_p": float(top_p),
            "repetition_penalty": 1.05,
            "eos_token_id": (tok.eos_token_id or getattr(ov_model.config, "eos_token_id", None)),
            "pad_token_id": (getattr(ov_model.config, "pad_token_id", None) or tok.eos_token_id),
        }
        
        # Para primeros intentos, usar configuraci√≥n m√°s conservadora
        if not do_sample:
            generation_config.update({
                "num_beams": 1,
                "do_sample": False
            })
        
        try:
            out_ids = ov_model.generate(**inputs, **generation_config)
            return tok.decode(out_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
        except Exception as e:
            print(f" Error en generaci√≥n: {e}")
            # Fallback: intentar con configuraci√≥n m√≠nima
            try:
                out_ids = ov_model.generate(
                    **inputs,
                    max_new_tokens=50,
                    do_sample=False,
                    num_beams=1
                )
                return tok.decode(out_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
            except:
                return ""

    def apply_chat_template(system_text: str, user_text: str):
        """Aplicar plantilla de chat de manera segura"""
        try:
            has_apply = hasattr(tok, "apply_chat_template")
            has_template = bool(getattr(tok, "chat_template", None))
            if has_apply and has_template:
                return tok.apply_chat_template(
                    [
                        {"role": "system", "content": system_text},
                        {"role": "user",  "content": user_text},
                    ],
                    tokenize=False,
                    add_generation_prompt=True,
                )
        except Exception as e:
            print(f" Error aplicando plantilla de chat: {e}")
        
        # Fallback Q/A
        return (
            "Task:\n" + system_text.strip() + "\n\n" +
            "Input:\n" + user_text.strip() + "\n" +
            "Output:\n"
        )

    return tok, llm_generate, apply_chat_template

In [None]:
REQUIRED = [
    "numpy<2.1",
    "transformers==4.46.2",
    "optimum-intel[openvino]==1.26.0", 
    "openvino>=2025.1.0",
    "accelerate>=0.34",
    "huggingface_hub>=0.24",
    "sacremoses",
    "sentencepiece", 
    "tiktoken",
    "pandas",
    "tqdm",
    "torch==2.1.0",  # Versi√≥n espec√≠fica compatible
    "torchvision==0.16.0",
    "torchaudio==2.1.0"
]