In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv("/Users/aryanb/aryan personal/code/datathon26/rl/data/data_v1_max_72_h.csv")  # your dataset
df["measure_time"] = pd.to_numeric(df["measure_time"], errors="coerce")
df = df.dropna(subset=["visit_occurrence_id", "measure_time"]).copy()
df = df.sort_values(["visit_occurrence_id", "measure_time"]).reset_index(drop=True)

# ---- carry-forward (LOCF) ----
VENT = ["peep_median", "peak_median"]
PHYS = ["map_median","sbp_median","dbp_median","temp_median",
        "wbc_median","hemoglobin_median","platelets_median",
        "sodium_median","potassium_median","chloride_median",
        "glucose_median","bun_median","creatinine_median","crp_median"]
VENT = [c for c in VENT if c in df.columns]
PHYS = [c for c in PHYS if c in df.columns]

for c in VENT + PHYS:
    df[f"{c}_missing"] = df[c].isna().astype(int)

# physiology: forward fill within visit
df[PHYS] = df.groupby("visit_occurrence_id")[PHYS].ffill()

# vent: (option 1) forward fill too, but keep missingness flags so model can learn
df[VENT] = df.groupby("visit_occurrence_id")[VENT].ffill()

# ---- build dp + transitions ----
df["dp_dyn"] = df["peak_median"] - df["peep_median"]

# action: PEEP bin or delta-PEEP (we'll do delta bins)
df["peep_t"] = df["peep_median"]
df["delta_peep"] = df.groupby("visit_occurrence_id")["peep_t"].diff()

# next-step labels
df["dp_dyn_next"] = df.groupby("visit_occurrence_id")["dp_dyn"].shift(-1)
df["map_next"] = df.groupby("visit_occurrence_id")["map_median"].shift(-1) if "map_median" in df.columns else np.nan

# terminal: last hour in each visit
df["is_terminal"] = df.groupby("visit_occurrence_id")["measure_time"].transform(lambda x: x == x.max()).astype(int)

# keep usable rows
rl = df[df["peep_t"].notna() & df["dp_dyn_next"].notna()].copy()
rl.reset_index(drop=True, inplace=True)

print("RL rows:", len(rl))
rl.head()


RL rows: 233791


Unnamed: 0,visit_occurrence_id,measure_time,person_id,gender,year_of_birth,visit_start_datetime,visit_end_datetime,visit_type_concept_name,admitted_from,discharged_to,...,glucose_median_missing,bun_median_missing,creatinine_median_missing,crp_median_missing,dp_dyn,peep_t,delta_peep,dp_dyn_next,map_next,is_terminal
0,1,-1,6656,MALE,1942,2017-01-01 00:00:00+00:00,2017-01-18 08:52:00+00:00,EHR encounter record,No matching concept,No matching concept,...,1,1,1,1,32.0,8.0,,32.15,69.0,0
1,1,0,6656,MALE,1942,2017-01-01 00:00:00+00:00,2017-01-18 08:52:00+00:00,EHR encounter record,No matching concept,No matching concept,...,1,1,0,0,32.15,7.85,-0.15,32.05,71.0,0
2,1,1,6656,MALE,1942,2017-01-01 00:00:00+00:00,2017-01-18 08:52:00+00:00,EHR encounter record,No matching concept,No matching concept,...,1,1,1,1,32.05,7.95,0.1,32.05,71.0,0
3,1,2,6656,MALE,1942,2017-01-01 00:00:00+00:00,2017-01-18 08:52:00+00:00,EHR encounter record,No matching concept,No matching concept,...,1,1,1,1,32.05,7.95,0.0,32.0,75.0,0
4,1,3,6656,MALE,1942,2017-01-01 00:00:00+00:00,2017-01-18 08:52:00+00:00,EHR encounter record,No matching concept,No matching concept,...,1,1,1,1,32.0,8.0,0.05,34.95,78.0,0


In [3]:
def bin_delta(dp):
    # clip extreme chart noise
    if pd.isna(dp): 
        return np.nan
    dp = float(np.clip(dp, -5, 5))
    # bins in cmH2O
    if dp <= -2: return -2
    if dp == -1: return -1
    if dp == 0:  return 0
    if dp == 1:  return 1
    return 2  # >= +2

rl["a_bin"] = rl["delta_peep"].apply(bin_delta)
rl = rl.dropna(subset=["a_bin"]).copy()
rl["a_bin"] = rl["a_bin"].astype(int)
rl["reward"] = -rl["dp_dyn_next"]


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

FEATURES = ["peep_t","peak_median","dp_dyn"] + PHYS
FEATURES = [c for c in FEATURES if c in rl.columns]

X = rl[FEATURES].fillna(-999)
y = rl["a_bin"]

Xtr, Xte, ytr, yte = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

pi_b = LogisticRegression(max_iter=2000)
pi_b.fit(Xtr, ytr)

print(classification_report(yte, pi_b.predict(Xte)))


              precision    recall  f1-score   support

          -2       0.00      0.00      0.00      1020
          -1       0.00      0.00      0.00       355
           0       0.78      1.00      0.88     35657
           1       0.00      0.00      0.00       406
           2       0.54      0.01      0.01      8222

    accuracy                           0.78     45660
   macro avg       0.26      0.20      0.18     45660
weighted avg       0.71      0.78      0.69     45660



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=2000).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [5]:
def peep_bin(peep):
    if pd.isna(peep): return np.nan
    p = float(peep)
    if p <= 5: return 5
    if p <= 8: return 8
    if p <= 10: return 10
    if p <= 12: return 12
    if p <= 15: return 15
    return 20  # 16+

rl["a_peep_bin"] = rl["peep_t"].apply(peep_bin).astype("Int64")
rl = rl.dropna(subset=["a_peep_bin"]).copy()


In [6]:
# ============================================================
# Precision Vent MVP (Offline RL on hourly-aggregated dataset)
# Dataset assumption:
#   - measure_time = hours from admission (can be -1 for pre-admission)
#   - rows are hourly bins with *_median/_min/_max/_mean columns
#
# Goal:
#   - Learn a conservative offline policy to choose a PEEP "level bin"
#   - Objective: minimize next-step dynamic driving pressure dp_dyn_next
#   - Safety gate: conformal lower bound on MAP_next >= MAP_MIN
#
# Output:
#   - rl: analysis-ready transition table
#   - behavior_policy (pi_b): PEEP-bin classifier
#   - q_model: offline Q approximator
#   - map_model + conformal quantile q: safety predictor + bound
#   - recommend_safe(row): safe recommendation with abstain option
# ============================================================

import numpy as np
import pandas as pd

# -----------------------------
# CONFIG
# -----------------------------
CSV_PATH = "/Users/aryanb/aryan personal/code/datathon26/rl/data/data_v1_max_72_h.csv"
ID_COL   = "visit_occurrence_id"
T_COL    = "measure_time"

MAP_MIN = 65         # safety threshold
ALPHA   = 0.10       # conformal miscoverage (0.10 -> ~90% bound)
RANDOM_SEED = 42

# Fake datetime (optional; RL only needs ordered time index)
MAKE_FAKE_TIMESTAMP = True
FAKE_ANCHOR = "2001-01-01 00:00:00"

# -----------------------------
# LOAD + SORT
# -----------------------------
df = pd.read_csv(CSV_PATH)

df[T_COL] = pd.to_numeric(df[T_COL], errors="coerce")
df = df.dropna(subset=[ID_COL, T_COL]).copy()
df = df.sort_values([ID_COL, T_COL]).reset_index(drop=True)

if MAKE_FAKE_TIMESTAMP:
    anchor = pd.Timestamp(FAKE_ANCHOR)
    df["hour_ts"] = anchor + pd.to_timedelta(df[T_COL], unit="h")
else:
    df["hour_ts"] = df[T_COL]

# -----------------------------
# FEATURE SETS
# -----------------------------
VENT_COLS = [c for c in ["peep_median", "peak_median"] if c in df.columns]

PHYS_COLS = [
    "map_median", "sbp_median", "dbp_median", "temp_median",
    "wbc_median", "hemoglobin_median", "platelets_median",
    "sodium_median", "potassium_median", "chloride_median",
    "glucose_median", "bun_median", "creatinine_median", "crp_median",
]
PHYS_COLS = [c for c in PHYS_COLS if c in df.columns]

STATIC_COLS = ["age", "gender", "year_of_birth"]
STATIC_COLS = [c for c in STATIC_COLS if c in df.columns]

if not ("peep_median" in df.columns and "peak_median" in df.columns):
    raise ValueError("Need peep_median and peak_median columns for dp_dyn surrogate.")

# -----------------------------
# MISSINGNESS FLAGS + CARRY FORWARD (LOCF)
# -----------------------------
for c in VENT_COLS + PHYS_COLS:
    df[f"{c}_missing"] = df[c].isna().astype(int)

# LOCF within visit
if PHYS_COLS:
    df[PHYS_COLS] = df.groupby(ID_COL)[PHYS_COLS].ffill()
if VENT_COLS:
    df[VENT_COLS] = df.groupby(ID_COL)[VENT_COLS].ffill()

# -----------------------------
# BUILD DP + TRANSITIONS
# dp_dyn = peak - peep (surrogate)
# -----------------------------
df["peep_t"] = df["peep_median"]
df["peak_t"] = df["peak_median"]
df["dp_dyn"] = df["peak_t"] - df["peep_t"]

df["dp_dyn_next"] = df.groupby(ID_COL)["dp_dyn"].shift(-1)
df["peep_next"]   = df.groupby(ID_COL)["peep_t"].shift(-1)
df["peak_next"]   = df.groupby(ID_COL)["peak_t"].shift(-1)

df["map_next"] = df.groupby(ID_COL)["map_median"].shift(-1) if "map_median" in df.columns else np.nan

df["is_terminal"] = df.groupby(ID_COL)[T_COL].transform(lambda x: (x == x.max())).astype(int)

# -----------------------------
# ACTION DEFINITION: PEEP LEVEL BINS
# -----------------------------
def peep_bin(peep):
    if pd.isna(peep):
        return np.nan
    p = float(peep)
    if p <= 5:  return 5
    if p <= 8:  return 8
    if p <= 10: return 10
    if p <= 12: return 12
    if p <= 15: return 15
    return 20  # 16+

df["a_peep_bin"] = df["peep_t"].apply(peep_bin)

# -----------------------------
# BUILD RL TABLE
# -----------------------------
rl = df[df["a_peep_bin"].notna() & df["dp_dyn_next"].notna()].copy()
rl["a_peep_bin"] = rl["a_peep_bin"].astype(int)

rl["reward"] = -rl["dp_dyn_next"]
rl["time_from_admit_h"] = rl[T_COL]

# -----------------------------
# SANITY CHECKS (do not skip)
# -----------------------------
print("Rows in RL:", len(rl))
print("\nPEEP-bin distribution:")
print(rl["a_peep_bin"].value_counts(normalize=True).sort_index())

print("\nDP sanity (dp_dyn):")
print(rl["dp_dyn"].describe())

# If dp looks insane, you can clip for MVP:
# rl["dp_dyn"] = rl["dp_dyn"].clip(-5, 50)
# rl["dp_dyn_next"] = rl["dp_dyn_next"].clip(-5, 50)
# rl["reward"] = -rl["dp_dyn_next"]

# -----------------------------
# DEFINE STATE FEATURES
# -----------------------------
FEATURES = (
    ["peep_t", "peak_t", "dp_dyn", "time_from_admit_h"]
    + STATIC_COLS
    + PHYS_COLS
)
FEATURES = [c for c in FEATURES if c in rl.columns]
print("\nUsing FEATURES:", FEATURES)

# ============================================================
# STEP 3: Behavior Policy pi_b(a|s) (PEEP-bin classifier)
# Handles categorical columns like gender via OneHotEncoder
# ============================================================
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

X = rl[FEATURES].copy()
y = rl["a_peep_bin"].copy()

# Identify numeric vs categorical columns
num_cols = X.select_dtypes(include=[np.number]).columns.tolist()
cat_cols = [c for c in X.columns if c not in num_cols]

print("Numeric cols:", num_cols)
print("Categorical cols:", cat_cols)

Xtr, Xte, ytr, yte = train_test_split(
    X, y,
    test_size=0.2,
    random_state=RANDOM_SEED,
    stratify=y
)

numeric_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

pre = ColumnTransformer(
    transformers=[
        ("num", numeric_pipe, num_cols),
        ("cat", categorical_pipe, cat_cols),
    ],
    remainder="drop"
)

pi_b = Pipeline([
    ("pre", pre),
    ("lr", LogisticRegression(
        max_iter=8000,
        class_weight="balanced",
        solver="lbfgs"
    ))
])

pi_b.fit(Xtr, ytr)
print("\n=== Behavior policy report (PEEP-bin) ===")
print(classification_report(yte, pi_b.predict(Xte)))

# ============================================================
# FIX: drop all-missing columns (bun_median) to remove warnings
# ============================================================
if "bun_median" in rl.columns:
    if rl["bun_median"].notna().sum() == 0:
        print("Dropping bun_median (all missing).")
        PHYS_COLS = [c for c in PHYS_COLS if c != "bun_median"]
        FEATURES = [c for c in FEATURES if c != "bun_median"]

# ============================================================
# Stabilize DP (your min -3244 is a data artifact)
# ============================================================
rl["dp_dyn"] = rl["dp_dyn"].clip(lower=0, upper=60)
rl["dp_dyn_next"] = rl["dp_dyn_next"].clip(lower=0, upper=60)
rl["reward"] = -rl["dp_dyn_next"]

ACTIONS = sorted(rl["a_peep_bin"].unique().tolist())
print("\nAction bins:", ACTIONS)

# ============================================================
# STEP 4: Q model with proper (state, action) separation
# We will train a model to predict reward from:
#   inputs = state FEATURES + candidate action (as categorical)
# IMPORTANT: do NOT include a_peep_bin inside FEATURES
# ============================================================
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

STATE_FEATURES = FEATURES[:]  # state only (includes gender etc.)
if "a_peep_bin" in STATE_FEATURES:
    STATE_FEATURES.remove("a_peep_bin")

# Build training frame: state + observed action as a separate column
X_sa = rl[STATE_FEATURES].copy()
X_sa["action_bin"] = rl["a_peep_bin"].astype(int)  # separate action column
y_q = rl["reward"].copy()

Xtr, Xte, ytr, yte = train_test_split(X_sa, y_q, test_size=0.2, random_state=RANDOM_SEED)

num_cols = Xtr.select_dtypes(include=[np.number]).columns.tolist()
cat_cols = [c for c in Xtr.columns if c not in num_cols]

print("Q-model numeric cols:", num_cols)
print("Q-model categorical cols:", cat_cols)

pre_q = ColumnTransformer(
    transformers=[
        ("num", Pipeline([("imputer", SimpleImputer(strategy="median"))]), num_cols),
        ("cat", Pipeline([
            ("imputer", SimpleImputer(strategy="most_frequent")),
            ("onehot", OneHotEncoder(handle_unknown="ignore"))
        ]), cat_cols),
    ],
    remainder="drop"
)

q_model = Pipeline([
    ("pre", pre_q),
    ("rf", RandomForestRegressor(
        n_estimators=500,
        random_state=RANDOM_SEED,
        n_jobs=-1,
        min_samples_leaf=50
    ))
])

q_model.fit(Xtr, ytr)
print("\n=== Q model holdout R^2 (sanity) ===")
print(q_model.score(Xte, yte))

def q_hat(row, a_bin: int) -> float:
    x = row[STATE_FEATURES].copy()
    x["action_bin"] = int(a_bin)
    return float(q_model.predict(pd.DataFrame([x]))[0])

def rank_actions_by_q(row):
    scores = [(a, q_hat(row, a)) for a in ACTIONS]
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores

# ============================================================
# STEP 5 (REVISED): Safety model on ΔMAP = MAP_next - MAP_current
# Conformal lower bound on ΔMAP
# ============================================================
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import GradientBoostingRegressor

DELTA_MAP_MIN = -10   # allow up to 10 mmHg drop worst-case (tune: -5, -10, -15)
ALPHA = 0.10          # 90% conformal bound

# Build label only where both MAP_current and MAP_next exist
tmp = rl.copy()
tmp = tmp[tmp["map_median"].notna() & tmp["map_next"].notna()].copy()
tmp["delta_map"] = tmp["map_next"] - tmp["map_median"]

print("\nSafety training rows (delta_map):", len(tmp))

map_model = None
q_delta = None

if len(tmp) > 5000:  # basic guard
    idx = np.arange(len(tmp))
    tr_idx, cal_idx = train_test_split(idx, test_size=0.2, random_state=RANDOM_SEED)

    train = tmp.iloc[tr_idx].copy()
    calib = tmp.iloc[cal_idx].copy()

    X_train = train[STATE_FEATURES].copy()
    X_train["action_bin"] = train["a_peep_bin"].astype(int)
    y_train = train["delta_map"].copy()

    X_cal = calib[STATE_FEATURES].copy()
    X_cal["action_bin"] = calib["a_peep_bin"].astype(int)
    y_cal = calib["delta_map"].copy()

    num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = [c for c in X_train.columns if c not in num_cols]

    pre_map = ColumnTransformer(
        transformers=[
            ("num", Pipeline([("imputer", SimpleImputer(strategy="median"))]), num_cols),
            ("cat", Pipeline([
                ("imputer", SimpleImputer(strategy="most_frequent")),
                ("onehot", OneHotEncoder(handle_unknown="ignore"))
            ]), cat_cols),
        ],
        remainder="drop"
    )

    map_model = Pipeline([
        ("pre", pre_map),
        ("gbr", GradientBoostingRegressor(random_state=RANDOM_SEED))
    ])

    map_model.fit(X_train, y_train)

    pred_cal = map_model.predict(X_cal)
    resid = np.abs(y_cal - pred_cal)

    q_delta = float(np.quantile(resid, 1 - ALPHA))
    print("\n=== ΔMAP safety model ===")
    print("Conformal residual quantile q_delta:", q_delta)
else:
    print("\nNot enough rows with MAP_current & MAP_next. Safety gate will be OFF.")

def delta_map_pred_lb(row, a_bin: int):
    if map_model is None or q_delta is None:
        return None, None
    x = row[STATE_FEATURES].copy()
    x["action_bin"] = int(a_bin)
    pred = float(map_model.predict(pd.DataFrame([x]))[0])
    lb = pred - q_delta
    return pred, lb

# ============================================================
# STEP 6 (REVISED): Safe recommend using ΔMAP gate
# ============================================================
def recommend_safe(row):
    ranked = rank_actions_by_q(row)

    for a, val in ranked:
        if map_model is not None:
            pred, lb = delta_map_pred_lb(row, a)
            if lb < DELTA_MAP_MIN:
                continue
            return {"rec": a, "q_hat": val, "reason": "ok", "delta_map_pred": pred, "delta_map_lb": lb}

        return {"rec": a, "q_hat": val, "reason": "ok_no_safety", "delta_map_pred": None, "delta_map_lb": None}

    hold = int(row["a_peep_bin"])
    pred, lb = (None, None)
    if map_model is not None:
        pred, lb = delta_map_pred_lb(row, hold)

    return {"rec": hold, "q_hat": q_hat(row, hold), "reason": "abstain_hold", "delta_map_pred": pred, "delta_map_lb": lb}

print("\n=== Demo (first 10 rows) ===")
for i in range(min(10, len(rl))):
    out = recommend_safe(rl.iloc[i])
    print(i, "clin=", int(rl.iloc[i]["a_peep_bin"]), "rec=", out["rec"], "reason=", out["reason"], "ΔMAP_lb=", out["delta_map_lb"])

# ============================================================
# FAST STEP 7: vectorized evaluation (no per-row loops)
# ============================================================
N_EVAL = 20000  # 20k is plenty; change to 5000 if still slow
eval_df = rl.sample(min(N_EVAL, len(rl)), random_state=RANDOM_SEED).copy().reset_index(drop=True)

# Build base state frame once
X_base = eval_df[STATE_FEATURES].copy()

def batch_q_for_action(a):
    X = X_base.copy()
    X["action_bin"] = int(a)
    return q_model.predict(X)

def batch_map_lb_for_action(a):
    if map_model is None or q is None:
        return None
    X = X_base.copy()
    X["action_bin"] = int(a)
    mp = map_model.predict(X)
    lb = mp - q
    return lb

# Compute Q for each action in a matrix: shape (n, num_actions)
Q = np.column_stack([batch_q_for_action(a) for a in ACTIONS])

# If safety is enabled, compute MAP lower bounds for each action too
if map_model is not None and q is not None:
    MAP_LB = np.column_stack([batch_map_lb_for_action(a) for a in ACTIONS])
else:
    MAP_LB = None

# Choose best action by Q, but only among actions that pass safety
if MAP_LB is not None:
    safe_mask = (MAP_LB >= MAP_MIN)  # shape (n, num_actions)
    # set Q to -inf where unsafe so argmax ignores them
    Q_safe = np.where(safe_mask, Q, -np.inf)
    best_idx = np.argmax(Q_safe, axis=1)
    any_safe = np.any(safe_mask, axis=1)

    # If no action safe, abstain -> hold clinician action
    hold = eval_df["a_peep_bin"].values.astype(int)
    rec = np.where(any_safe, np.array(ACTIONS)[best_idx], hold)
    reasons = np.where(any_safe, "ok", "abstain_hold")
else:
    best_idx = np.argmax(Q, axis=1)
    rec = np.array(ACTIONS)[best_idx]
    reasons = np.array(["ok_no_safety"] * len(eval_df))

clin = eval_df["a_peep_bin"].values.astype(int)

# Metrics
abstain_rate = float(np.mean(reasons == "abstain_hold"))
exact_agree = float(np.mean(rec == clin))
within1 = float(np.mean(np.abs(
    np.vectorize(ACTIONS.index)(rec) - np.vectorize(ACTIONS.index)(clin)
) <= 1))

# ΔQ(rec - hold)
# compute Q for hold and rec in batch
hold_idx = np.vectorize(ACTIONS.index)(clin)
rec_idx  = np.vectorize(ACTIONS.index)(rec)
delta_q = Q[np.arange(len(eval_df)), rec_idx] - Q[np.arange(len(eval_df)), hold_idx]
mean_delta_q = float(np.mean(delta_q))

print("\n=== FAST Evaluation ===")
print("N:", len(eval_df))
print("Abstain rate:", abstain_rate)
print("Exact agreement:", exact_agree)
print("Within-1-bin:", within1)
print("Mean predicted ΔQ(rec - hold):", mean_delta_q)

# Optional: if safety enabled, report fraction of rows where ANY action passes
if MAP_LB is not None:
    print("Any-safe fraction:", float(np.mean(any_safe)))
    print("Median MAP_LB(best_safe):", float(np.median(MAP_LB[np.arange(len(eval_df)), best_idx])))


# ============================================================
# SAVE ARTIFACTS
# ============================================================
OUT_RL = "precision_vent_rl_table_mvp.csv"
keep_cols = [ID_COL, "person_id", "hour_ts", T_COL, "a_peep_bin", "reward", "dp_dyn", "dp_dyn_next"] + FEATURES
keep_cols = [c for c in keep_cols if c in rl.columns]
rl[keep_cols].to_csv(OUT_RL, index=False)
print("\nWrote:", OUT_RL)


Rows in RL: 233791

PEEP-bin distribution:
a_peep_bin
5     0.551933
8     0.332776
10    0.068412
12    0.019881
15    0.013131
20    0.013867
Name: proportion, dtype: float64

DP sanity (dp_dyn):
count    233449.000000
mean         17.986637
std          33.119781
min       -3244.350000
25%          11.000000
50%          18.000000
75%          25.200000
max          66.300000
Name: dp_dyn, dtype: float64

Using FEATURES: ['peep_t', 'peak_t', 'dp_dyn', 'time_from_admit_h', 'age', 'gender', 'year_of_birth', 'map_median', 'sbp_median', 'dbp_median', 'temp_median', 'wbc_median', 'hemoglobin_median', 'platelets_median', 'sodium_median', 'potassium_median', 'chloride_median', 'glucose_median', 'bun_median', 'creatinine_median', 'crp_median']
Numeric cols: ['peep_t', 'peak_t', 'dp_dyn', 'time_from_admit_h', 'age', 'year_of_birth', 'map_median', 'sbp_median', 'dbp_median', 'temp_median', 'wbc_median', 'hemoglobin_median', 'platelets_median', 'sodium_median', 'potassium_median', 'chloride_me




=== Behavior policy report (PEEP-bin) ===
              precision    recall  f1-score   support

           5       0.93      1.00      0.97     25808
           8       0.98      0.85      0.91     15560
          10       0.82      0.87      0.84      3199
          12       0.75      0.80      0.77       930
          15       0.90      0.82      0.86       614
          20       1.00      0.94      0.97       648

    accuracy                           0.93     46759
   macro avg       0.90      0.88      0.89     46759
weighted avg       0.94      0.93      0.93     46759

Dropping bun_median (all missing).

Action bins: [5, 8, 10, 12, 15, 20]




Q-model numeric cols: ['peep_t', 'peak_t', 'dp_dyn', 'time_from_admit_h', 'age', 'year_of_birth', 'map_median', 'sbp_median', 'dbp_median', 'temp_median', 'wbc_median', 'hemoglobin_median', 'platelets_median', 'sodium_median', 'potassium_median', 'chloride_median', 'glucose_median', 'creatinine_median', 'crp_median', 'action_bin']
Q-model categorical cols: ['gender']

=== Q model holdout R^2 (sanity) ===
0.9829980012144108

Safety training rows (delta_map): 233740

=== ΔMAP safety model ===
Conformal residual quantile q_delta: 16.118880841545767

=== Demo (first 10 rows) ===
0 clin= 8 rec= 8 reason= abstain_hold ΔMAP_lb= -10.003528285205153
1 clin= 8 rec= 8 reason= abstain_hold ΔMAP_lb= -13.264852218973123
2 clin= 8 rec= 8 reason= abstain_hold ΔMAP_lb= -13.934203502427568
3 clin= 8 rec= 8 reason= abstain_hold ΔMAP_lb= -13.934203502427568
4 clin= 8 rec= 8 reason= abstain_hold ΔMAP_lb= -15.079164201540944
5 clin= 8 rec= 8 reason= abstain_hold ΔMAP_lb= -16.32736198093209
6 clin= 8 rec= 8 

NameError: name 'q' is not defined

In [8]:
# ============================================================
# Safety diagnostic: is the conformal LB always < 65?
# ============================================================
diag = rl.sample(5000, random_state=RANDOM_SEED).copy().reset_index(drop=True)
Xdiag = diag[STATE_FEATURES].copy()
Xdiag["action_bin"] = diag["a_peep_bin"].astype(int)

mp = map_model.predict(Xdiag)
lb = mp - q

print("MAP_pred median:", float(np.median(mp)))
print("MAP_LB median:", float(np.median(lb)))
print("Frac LB >= 65:", float(np.mean(lb >= 65)))
print("Frac observed MAP_next >= 65:", float(np.mean(diag['map_next'] >= 65)))


NameError: name 'q' is not defined

In [11]:
from ventilation_rewards import friday_implementation

rl = pd.read_csv("/Users/aryanb/aryan personal/code/datathon26/rl/data/data_v3_max_72_h.csv")
rl = friday_implementation(rl, option=2)  # Option 2 = Hybrid Oxygenation (recommended)
rl['reward'] = rl['reward_new']  # Replace old rewards

# Now use rl['reward'] in your Q-learning training!


    ╔══════════════════════════════════════════════════════════════════════╗
    ║     PRECISION VENTILATION RL - IMPROVED REWARD IMPLEMENTATION        ║
    ╚══════════════════════════════════════════════════════════════════════╝
    

[STEP 1/5] Checking data requirements...
DATA REQUIREMENTS CHECK

REQUIRED: ✗ MISSING DATA
  Available: visit_occurrence_id, measure_time
  Missing:   is_terminal

OPTION1: ✓ READY
  Available: discharged_to (as outcome)

OPTION2: ✗ MISSING DATA
  Available: discharged_to (as outcome)
  Missing:   spo2

OPTION3: ✗ MISSING DATA
  Available: discharged_to (as outcome)
  Missing:   spo2, fio2, peep_t

RECOMMENDATION:
⚠ Only Option 1 available - need to add SpO2 for better results

[STEP 2/5] Preparing data...
Creating 'outcome' from 'discharged_to'...
Created 'outcome' column:
outcome
Unknown    3270431
Name: count, dtype: int64

[STEP 3/5] No old rewards to compare (or compare_old=False)

[STEP 4/5] Computing new rewards (Option 2)...
Using: Hybrid Oxyge

In [13]:
# Copy the entire contents of debug_data.py into a cell
# Or if the file is accessible, just run:
%run debug_data.py

STEP 1: Loading data
✓ Data loaded: 3,270,431 rows, 95 columns

First few rows:
   visit_occurrence_id  measure_time  person_id gender  year_of_birth  \
0                    0            -1      54818   MALE           1935   
1                    0             0      54818   MALE           1935   
2                    0             1      54818   MALE           1935   

        visit_start_datetime         visit_end_datetime  \
0  2010-01-01 00:00:00+00:00  2010-01-01 17:39:00+00:00   
1  2010-01-01 00:00:00+00:00  2010-01-01 17:39:00+00:00   
2  2010-01-01 00:00:00+00:00  2010-01-01 17:39:00+00:00   

  visit_type_concept_name        admitted_from        discharged_to  ...  \
0    EHR encounter record  No matching concept  No matching concept  ...   
1    EHR encounter record  No matching concept  No matching concept  ...   
2    EHR encounter record  No matching concept  No matching concept  ...   

  chloride_max  chloride_mean  creatinine_median  creatinine_min  \
0          NaN   

In [14]:
"""
═══════════════════════════════════════════════════════════════════════════
SINGLE CELL SOLUTION - COPY THIS ENTIRE CELL AND RUN IT
═══════════════════════════════════════════════════════════════════════════

This ONE cell does EVERYTHING:
1. Loads your data
2. Adds missing columns (is_terminal, outcome)  
3. Finds SpO2 data if it exists
4. Computes new rewards
5. Gives you a ready-to-use dataframe

Just update the file path and run!
"""

import pandas as pd
import numpy as np

# ============================================================================
# STEP 0: UPDATE THIS PATH TO YOUR FILE
# ============================================================================
FILE_PATH = "/Users/aryanb/aryan personal/code/datathon26/rl/data/data_v3_max_72_h.csv"

print("Loading data...")
rl = pd.read_csv(FILE_PATH)
print(f"✓ Loaded: {len(rl):,} rows")

# ============================================================================
# STEP 1: Add is_terminal column (marks last timestep for each visit)
# ============================================================================
print("\nAdding is_terminal column...")
rl = rl.sort_values(['visit_occurrence_id', 'measure_time'])
rl['is_terminal'] = 0
last_rows = rl.groupby('visit_occurrence_id').tail(1).index
rl.loc[last_rows, 'is_terminal'] = 1
print(f"✓ Added is_terminal: {(rl['is_terminal']==1).sum():,} terminal states")

# ============================================================================
# STEP 2: Create outcome column from discharged_to
# ============================================================================
print("\nCreating outcome column...")

def map_outcome(val):
    if pd.isna(val) or val == 'No matching concept':
        return None  # Use None for non-terminal rows
    val_lower = str(val).lower()
    
    if any(term in val_lower for term in ['home', 'alive', 'survived', 'discharge', 'rehab']):
        return 'Survived'
    elif any(term in val_lower for term in ['died', 'death', 'deceased', 'expired']):
        return 'Died'
    return 'Unknown'

rl['outcome'] = rl['discharged_to'].apply(map_outcome)
# Only keep outcome for terminal rows
rl.loc[rl['is_terminal'] == 0, 'outcome'] = np.nan

print("Outcome distribution:")
print(rl[rl['is_terminal']==1]['outcome'].value_counts())

# ============================================================================
# STEP 3: Check for SpO2 data
# ============================================================================
print("\nChecking for SpO2...")

# Look for SpO2 in any column name
spo2_cols = [col for col in rl.columns if 'spo2' in col.lower()]

if spo2_cols:
    print(f"Found SpO2 columns: {spo2_cols}")
    # Use median if available, otherwise first match
    if any('median' in col.lower() for col in spo2_cols):
        source = [col for col in spo2_cols if 'median' in col.lower()][0]
    else:
        source = spo2_cols[0]
    
    rl['spo2'] = rl[source]
    print(f"✓ Using '{source}' as spo2: {rl['spo2'].notna().sum():,} values")
    HAS_SPO2 = True
else:
    print("✗ No SpO2 data found")
    HAS_SPO2 = False

# ============================================================================
# STEP 4: Compute rewards
# ============================================================================
print("\n" + "="*70)
print("COMPUTING REWARDS")
print("="*70)

if HAS_SPO2:
    print("\nUsing Option 2: Hybrid Oxygenation (with SpO2)")
    
    # Carry-forward imputation for SpO2
    print("Applying carry-forward imputation...")
    rl['spo2'] = rl.groupby('visit_occurrence_id')['spo2'].ffill()
    
    # Compute hybrid rewards
    rewards = np.zeros(len(rl))
    
    for idx in range(len(rl)):
        row = rl.iloc[idx]
        reward = 0.0
        
        # Intermediate rewards from SpO2
        if idx + 1 < len(rl):
            next_row = rl.iloc[idx + 1]
            if row['visit_occurrence_id'] == next_row['visit_occurrence_id']:
                # Delta SpO2
                if pd.notna(row['spo2']) and pd.notna(next_row['spo2']):
                    delta_spo2 = next_row['spo2'] - row['spo2']
                    reward += 0.1 * delta_spo2
                
                # Target range bonus (92-96%)
                if pd.notna(next_row['spo2']):
                    if 92 <= next_row['spo2'] <= 96:
                        reward += 0.5
        
        # Terminal reward
        if row['is_terminal'] == 1:
            if row['outcome'] == 'Survived':
                reward += 100.0
            elif row['outcome'] == 'Died':
                reward -= 100.0
        
        rewards[idx] = reward
    
    rl['reward'] = rewards
    
else:
    print("\nUsing Option 1: Pure Terminal (no SpO2)")
    
    # Pure terminal rewards
    rewards = np.zeros(len(rl))
    
    for idx in range(len(rl)):
        row = rl.iloc[idx]
        if row['is_terminal'] == 1:
            if row['outcome'] == 'Survived':
                rewards[idx] = 100.0
            elif row['outcome'] == 'Died':
                rewards[idx] = -100.0
    
    rl['reward'] = rewards

# ============================================================================
# STEP 5: Validate results
# ============================================================================
print("\n" + "="*70)
print("RESULTS")
print("="*70)

print(f"\nReward statistics:")
print(f"  Total rows:     {len(rl):,}")
print(f"  Non-zero:       {(rl['reward'] != 0).sum():,} ({(rl['reward'] != 0).mean()*100:.1f}%)")
print(f"  Mean:           {rl['reward'].mean():.4f}")
print(f"  Std:            {rl['reward'].std():.4f}")
print(f"  Range:          [{rl['reward'].min():.2f}, {rl['reward'].max():.2f}]")

print(f"\nDistribution:")
print(f"  Positive:       {(rl['reward'] > 0).sum():,}")
print(f"  Negative:       {(rl['reward'] < 0).sum():,}")
print(f"  Zero:           {(rl['reward'] == 0).sum():,}")

print("\n" + "="*70)
print("✓ COMPLETE!")
print("="*70)
print("\nYour 'rl' dataframe now has a 'reward' column.")
print("You can use it in your Q-learning training!")

if not HAS_SPO2:
    print("\n⚠ NOTE: Rewards are sparse because no SpO2 data was found.")
    print("Consider adding SpO2 data for better results (Option 2).")

print("\nNext: Continue with your Q-learning code using rl['reward']")

Loading data...
✓ Loaded: 3,270,431 rows

Adding is_terminal column...
✓ Added is_terminal: 74,186 terminal states

Creating outcome column...
Outcome distribution:
Series([], Name: count, dtype: int64)

Checking for SpO2...
Found SpO2 columns: ['spo2_median', 'spo2_min', 'spo2_max', 'spo2_mean']
✓ Using 'spo2_median' as spo2: 939,292 values

COMPUTING REWARDS

Using Option 2: Hybrid Oxygenation (with SpO2)
Applying carry-forward imputation...

RESULTS

Reward statistics:
  Total rows:     3,270,431
  Non-zero:       937,754 (28.7%)
  Mean:           0.0797
  Std:            0.3165
  Range:          [-106.27, 106.30]

Distribution:
  Positive:       751,373
  Negative:       186,381
  Zero:           2,332,677

✓ COMPLETE!

Your 'rl' dataframe now has a 'reward' column.
You can use it in your Q-learning training!

Next: Continue with your Q-learning code using rl['reward']
