In [1]:
# global_forecast_true_global.py
# =====================================================================================
# TRUE GLOBAL FORECAST (Binary) from stressLevelPred
#
# - TRUE GLOBAL: userID TIDAK PERNAH dipakai sebagai fitur (hanya grouping & split).
# - 1 model untuk semua user (cold-start global).
#
# Target:
#   y_bin(t) = 1 jika stressLevelPred(t) >= 1, else 0
#
# Baselines:
#   L1) Persistence: y(t)=y(t-1)
#   L2) Markov GLOBAL: P(high_t | prev_high, dow) + threshold tuning via pooled time-CV
#
# Models (global, without user identity):
#   - LogisticRegression
#   - DecisionTree
#   - RandomForest
#   - ExtraTrees
#   - HistGradientBoosting
#   - GradientBoosting
#   - AdaBoost
#   - BaggingTree
#   - LinearSVC + CalibratedClassifierCV (cv adaptif per fold)
#
# Optional upgrade:
#   - BLEND: p = alpha*p_ml + (1-alpha)*p_markov
#     alpha & thr ditune via pooled CV (no-leak)
#
# Split:
#   - time-based per user
#   - TEST = last TEST_LEN per user
#   - CV = windows di train_pool tiap user (pooled across users)
#
# Output:
#   ../models/global_forecast_true_global.joblib
# =====================================================================================

import numpy as np
import pandas as pd
from pathlib import Path
import joblib

from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import ParameterGrid

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    ExtraTreesClassifier,
    HistGradientBoostingClassifier,
    GradientBoostingClassifier,
    AdaBoostClassifier,
    BaggingClassifier,
)
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV


# =========================
# 0) CONFIG
# =========================
CANDIDATE_PATHS = [
    Path("../datasets/global_dataset_pred.csv"),
]
DATA_PATH = next((p for p in CANDIDATE_PATHS if p.exists()), None)
if DATA_PATH is None:
    raise FileNotFoundError("global_dataset_pred.csv tidak ditemukan. Cek CANDIDATE_PATHS / DATA_PATH.")

MODEL_OUT = Path("../models/global_forecast.joblib")

DATE_COL   = "date"
USER_COL   = "userID"             # dipakai untuk split saja (bukan fitur)
TARGET_COL = "stressLevelPred"    # 0..2

WINDOW   = 3
TEST_LEN = 12

# CV windows (index relatif di train_pool tiap user), end exclusive
VAL_WINDOWS = [(12, 24), (18, 30)]

# Threshold untuk decision rule dari probabilitas
THRESHOLDS = np.linspace(0.05, 0.95, 19)

# BLEND config (ML + Markov)
USE_BLEND = True
ALPHAS = np.linspace(0.0, 1.0, 11)   # 0.0=Markov pure, 1.0=ML pure

RANDOM_STATE = 26

# Tambah fitur behavior lag1 jika kolomnya ada (hour-like numeric columns)
USE_BEHAVIOR_LAG1 = True


# =========================
# Utilities
# =========================
def header(title: str):
    print("\n" + "=" * 80)
    print(title)
    print("=" * 80)

def eval_bin(y_true, y_pred):
    return {
        "acc": float(accuracy_score(y_true, y_pred)),
        "f1":  float(f1_score(y_true, y_pred, zero_division=0)),
    }

def tune_thr_from_proba(y_true, p_high, thresholds=THRESHOLDS):
    best_thr, best_f1 = None, -1.0
    for thr in thresholds:
        pred = (p_high >= thr).astype(int)
        f1 = float(f1_score(y_true, pred, zero_division=0))
        if f1 > best_f1:
            best_f1, best_thr = f1, thr
    return float(best_thr), float(best_f1)

def pick_existing_behavior_cols(df: pd.DataFrame):
    """
    Deteksi kolom numerik yang mirip 'hours' untuk dipakai sebagai behavior lag1.
    Catatan: ini tetap NO-LEAK karena kita shift(1) saat feature engineering.
    """
    exclude = {DATE_COL, USER_COL, TARGET_COL}
    numeric = [c for c in df.columns if c not in exclude and pd.api.types.is_numeric_dtype(df[c])]
    hour_like = [c for c in numeric if ("hour" in c.lower()) or ("hours" in c.lower())]

    known = [
        "studyHourPerDay",
        "sleepHourPerDay",
        "socialHourPerDay",
        "physicalActivityHourPerDay",
        "extracurricularHourPerDay",
    ]
    for c in known:
        if c in numeric and c not in hour_like:
            hour_like.append(c)

    return hour_like

def safe_class_counts(y: np.ndarray):
    y = np.asarray(y).astype(int)
    return {0: int((y == 0).sum()), 1: int((y == 1).sum())}


# =========================
# 1) LOAD
# =========================
header("1) LOAD DATA")
df = pd.read_csv(DATA_PATH)
if DATE_COL not in df.columns:
    raise KeyError(f"Kolom {DATE_COL} tidak ditemukan di dataset.")
if USER_COL not in df.columns:
    raise KeyError(f"Kolom {USER_COL} tidak ditemukan di dataset.")
if TARGET_COL not in df.columns:
    raise KeyError(f"Kolom {TARGET_COL} tidak ditemukan di dataset.")

df[DATE_COL] = pd.to_datetime(df[DATE_COL], errors="raise")
df = df.sort_values([USER_COL, DATE_COL]).reset_index(drop=True)

target_ok = df[TARGET_COL].dropna().between(0, 2).all()
if not target_ok:
    raise ValueError(f"{TARGET_COL} harus berada pada range 0..2")

BEHAVIOR_COLS = pick_existing_behavior_cols(df) if USE_BEHAVIOR_LAG1 else []

print("DATA_PATH       :", str(DATA_PATH))
print("ROWS            :", len(df))
print("USERS           :", df[USER_COL].nunique())
print("DATE_RANGE      :", str(df[DATE_COL].min().date()), "->", str(df[DATE_COL].max().date()))
print("TARGET_COL      :", TARGET_COL)
print("BEHAVIOR_COLS   :", BEHAVIOR_COLS)


# =========================
# 2) FEATURE ENGINEERING (no leak)
# =========================
header("2) FEATURE ENGINEERING (NO-LEAK)")
rows = []
for uid, g in df.groupby(USER_COL):
    g = g.sort_values(DATE_COL).reset_index(drop=True)

    # kalender
    g["dow"] = g[DATE_COL].dt.dayofweek.astype(int)   # 0..6
    g["is_weekend"] = (g["dow"] >= 5).astype(int)

    # lag stress pred (t-1..t-W)
    for k in range(1, WINDOW + 1):
        g[f"lag_sp_{k}"] = g[TARGET_COL].shift(k)

    # behavior lag1 (t-1)
    if len(BEHAVIOR_COLS) > 0:
        for c in BEHAVIOR_COLS:
            g[f"lag1_{c}"] = g[c].shift(1)

    # rolling stats dari history, ending at t-1
    sp_shift = g[TARGET_COL].shift(1)

    g["sp_mean"] = sp_shift.rolling(WINDOW).mean()
    g["sp_std"]  = sp_shift.rolling(WINDOW).std().fillna(0.0)
    g["sp_min"]  = sp_shift.rolling(WINDOW).min()
    g["sp_max"]  = sp_shift.rolling(WINDOW).max()

    g["count_high"] = (sp_shift >= 1).rolling(WINDOW).sum()
    g["count_low"]  = (sp_shift == 0).rolling(WINDOW).sum()

    # streak high (<= t-1)
    high = (sp_shift >= 1).astype(int).fillna(0).astype(int).tolist()
    streak, cur = [], 0
    for v in high:
        cur = cur + 1 if v == 1 else 0
        streak.append(cur)
    g["streak_high"] = streak

    # transitions in history (<= t-1)
    diff = (sp_shift != sp_shift.shift(1)).astype(int)
    g["transitions"] = diff.rolling(WINDOW).sum()

    rows.append(g)

feat = pd.concat(rows, ignore_index=True)
feat["y_bin"] = (feat[TARGET_COL] >= 1).astype(int)

# TRUE GLOBAL: feature tidak boleh include USER_COL
feature_cols = (
    ["dow", "is_weekend"]
    + [f"lag_sp_{k}" for k in range(1, WINDOW + 1)]
    + [
        "sp_mean", "sp_std", "sp_min", "sp_max",
        "count_high", "count_low",
        "streak_high", "transitions",
    ]
)
if len(BEHAVIOR_COLS) > 0:
    feature_cols += [f"lag1_{c}" for c in BEHAVIOR_COLS]

# drop rows yang belum punya history lengkap
feat = feat.dropna(subset=feature_cols + ["y_bin"]).reset_index(drop=True)

print("ROWS_FEAT        :", len(feat))
print("USERS_FEAT       :", feat[USER_COL].nunique())
print("WINDOW           :", WINDOW)
print("TEST_LEN         :", TEST_LEN)
print("FEATURES_COUNT   :", len(feature_cols))
print("BINARY_DIST      :", feat["y_bin"].value_counts().to_dict())


# =========================
# 3) SPLIT: time-based per user (TEST = last TEST_LEN)
# =========================
header("3) SPLIT (TIME-BASED PER USER)")
train_idx, test_idx = [], []
per_user_train_pool = {}

for uid, g in feat.groupby(USER_COL):
    g = g.sort_values(DATE_COL).reset_index()  # keep original feat index in 'index'
    n = len(g)
    test_start = n - TEST_LEN
    if test_start <= 20:
        raise ValueError(f"User {uid} data terlalu sedikit untuk split + CV windows. n={n}, TEST_LEN={TEST_LEN}")
    train_pool = g.iloc[:test_start]
    test_block = g.iloc[test_start:]

    per_user_train_pool[uid] = train_pool
    train_idx += train_pool["index"].tolist()
    test_idx  += test_block["index"].tolist()

train_pool_df = feat.loc[train_idx].copy()
test_df = feat.loc[test_idx].copy()

print("TRAINPOOL_ROWS   :", len(train_pool_df))
print("TEST_ROWS        :", len(test_df))
print("TEST_DIST        :", test_df["y_bin"].value_counts().to_dict())

# build CV splits (pooled windows across users)
cv_splits = []
for (v0, v1) in VAL_WINDOWS:
    tr_idx, va_idx = [], []
    ok = True
    for uid, tp in per_user_train_pool.items():
        tp = tp.reset_index(drop=True)
        if len(tp) < v1:
            ok = False
            break
        va = tp.iloc[v0:v1]
        tr = tp.iloc[:v0]
        tr_idx += tr["index"].tolist()
        va_idx += va["index"].tolist()
    if ok:
        cv_splits.append((tr_idx, va_idx))

if len(cv_splits) == 0:
    raise ValueError("CV windows gagal terbentuk. Kecilkan TEST_LEN atau VAL_WINDOWS.")

print("CV_FOLDS         :", len(cv_splits))
print("VAL_WINDOWS      :", VAL_WINDOWS)

X_trainpool = train_pool_df[feature_cols].copy()
y_trainpool = train_pool_df["y_bin"].astype(int).values

X_test = test_df[feature_cols].copy()
y_test = test_df["y_bin"].astype(int).values


# =========================
# 4) BASELINE L1: Persistence
# =========================
header("4) BASELINE L1: PERSISTENCE (y(t) = y(t-1))")
pred_persist = (test_df["lag_sp_1"] >= 1).astype(int).values
persist_metrics = eval_bin(y_test, pred_persist)
print("TEST_ACC         :", persist_metrics["acc"])
print("TEST_F1          :", persist_metrics["f1"])


# =========================
# 5) BASELINE L2: Markov GLOBAL(prev_high, dow) + thr tuning (fair)
# =========================
header("5) BASELINE L2: MARKOV GLOBAL (prev_high, dow) + THR TUNING (POOLED CV)")

def train_markov_global(df_train):
    # counts: prev(2) x dow(7) x y(2)
    counts = np.zeros((2, 7, 2), dtype=int)
    prev = (df_train["lag_sp_1"] >= 1).astype(int).values
    dow  = df_train["dow"].astype(int).values
    yb   = df_train["y_bin"].astype(int).values
    for p, d, y in zip(prev, dow, yb):
        counts[p, d, y] += 1
    # Laplace smoothing
    probs = (counts + 1) / (counts.sum(axis=2, keepdims=True) + 2)
    return probs

def markov_proba(probs, df_eval):
    prev = (df_eval["lag_sp_1"] >= 1).astype(int).values
    dow  = df_eval["dow"].astype(int).values
    return np.array([probs[p, d, 1] for p, d in zip(prev, dow)], dtype=float)

# tune threshold Markov via pooled CV
cv_true, cv_phigh = [], []
for fold_i, (tr_idx, va_idx) in enumerate(cv_splits, start=1):
    tr_df = feat.loc[tr_idx]
    va_df = feat.loc[va_idx]
    probs = train_markov_global(tr_df)
    p = markov_proba(probs, va_df)

    cv_true.append(va_df["y_bin"].astype(int).values)
    cv_phigh.append(p)

cv_true = np.concatenate(cv_true)
cv_phigh = np.concatenate(cv_phigh)

thr_mk, cv_f1_mk = tune_thr_from_proba(cv_true, cv_phigh)
probs_full = train_markov_global(train_pool_df)

p_test_mk = markov_proba(probs_full, test_df)
pred_test_mk = (p_test_mk >= thr_mk).astype(int)
markov_metrics = eval_bin(y_test, pred_test_mk)

print("CV_POOLED_DIST   :", safe_class_counts(cv_true))
print("BEST_THR_MARKOV  :", thr_mk)
print("CV_POOLED_F1     :", cv_f1_mk)
print("TEST_ACC_MARKOV  :", markov_metrics["acc"])
print("TEST_F1_MARKOV   :", markov_metrics["f1"])


# =========================
# 6) PREPROCESS (TRUE GLOBAL: tanpa userID)
# =========================
header("6) PREPROCESS (TRUE GLOBAL)")
cat_cols = ["dow"]
num_cols = [c for c in feature_cols if c not in cat_cols]

preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", Pipeline([("imp", SimpleImputer(strategy="median"))]), num_cols),
    ],
    remainder="drop",
)


# =========================
# 7) CANDIDATE MODELS (grid)
# =========================
header("7) CANDIDATE MODELS")

# Note:
# - Untuk LinearSVC_Calibrated, cv akan dibuat adaptif per-fold (lihat pooled_cv_best).
CANDIDATES = {
    "LogReg": (
        LogisticRegression(max_iter=5000, class_weight="balanced", random_state=RANDOM_STATE),
        {"clf__C": [0.03, 0.1, 0.3, 1.0, 3.0], "clf__solver": ["liblinear"]},
    ),
    "DecisionTree": (
        DecisionTreeClassifier(class_weight="balanced", random_state=RANDOM_STATE),
        {"clf__max_depth": [2, 3, 4, 6, None], "clf__min_samples_leaf": [1, 2, 4, 8]},
    ),
    "RandomForest": (
        RandomForestClassifier(class_weight="balanced", random_state=RANDOM_STATE, n_jobs=1),
        {"clf__n_estimators": [200, 400, 800], "clf__max_depth": [None, 6, 10],
         "clf__min_samples_leaf": [1, 2, 4], "clf__max_features": ["sqrt"]},
    ),
    "ExtraTrees": (
        ExtraTreesClassifier(class_weight="balanced", random_state=RANDOM_STATE, n_jobs=1),
        {"clf__n_estimators": [200, 400, 800], "clf__max_depth": [None, 6, 10],
         "clf__min_samples_leaf": [1, 2, 4], "clf__max_features": ["sqrt"]},
    ),
    "HistGB": (
        HistGradientBoostingClassifier(random_state=RANDOM_STATE),
        {"clf__learning_rate": [0.03, 0.05, 0.1], "clf__max_depth": [2, 3], "clf__max_leaf_nodes": [15, 31, 63]},
    ),
    "GradBoost": (
        GradientBoostingClassifier(random_state=RANDOM_STATE),
        {"clf__learning_rate": [0.03, 0.05, 0.1], "clf__n_estimators": [100, 200, 400], "clf__max_depth": [2, 3]},
    ),
    "AdaBoost": (
        AdaBoostClassifier(random_state=RANDOM_STATE),
        {"clf__learning_rate": [0.03, 0.05, 0.1, 0.3], "clf__n_estimators": [50, 100, 200, 400]},
    ),
    "BaggingTree": (
        BaggingClassifier(
            estimator=DecisionTreeClassifier(random_state=RANDOM_STATE),
            random_state=RANDOM_STATE,
            n_jobs=1,
        ),
        {"clf__n_estimators": [50, 100, 200],
         "clf__estimator__max_depth": [2, 3, 4, None],
         "clf__estimator__min_samples_leaf": [1, 2, 4]},
    ),
    "LinearSVC_Calibrated": (
        # placeholder; akan diganti adaptif per fold
        CalibratedClassifierCV(
            estimator=LinearSVC(class_weight="balanced", random_state=RANDOM_STATE),
            method="sigmoid",
            cv=3,
        ),
        {"clf__estimator__C": [0.03, 0.1, 0.3, 1.0, 3.0]},
    ),
}

print("MODELS           :", list(CANDIDATES.keys()))
print("USE_BLEND        :", USE_BLEND)
print("ALPHAS           :", ALPHAS.tolist())
print("THRESHOLDS       :", THRESHOLDS.tolist())


# =========================
# 8) FAIR TUNING: pooled CV + threshold tuning (+ optional BLEND)
# =========================
header("8) TRAIN + TUNE (FAIR POOLED CV)")

def make_pipe(clf):
    return Pipeline([("prep", preprocess), ("clf", clf)])

def make_calibrated_svc(cv_k: int):
    return CalibratedClassifierCV(
        estimator=LinearSVC(class_weight="balanced", random_state=RANDOM_STATE),
        method="sigmoid",
        cv=cv_k,
    )

def pooled_cv_best(model_name: str, base_clf, grid):
    """
    Cari params terbaik dgn pooled CV:
    - untuk tiap params, kumpulkan proba pada semua validation folds
    - tune threshold yang memaksimalkan F1
    - jika USE_BLEND: juga tune alpha (campur p_ml & p_markov)
    """
    best = None

    for params in ParameterGrid(grid):
        y_list, pml_list, pmk_list = [], [], []
        valid_folds = 0

        for (tr_idx, va_idx) in cv_splits:
            tr_df = feat.loc[tr_idx]
            va_df = feat.loc[va_idx]

            Xtr = tr_df[feature_cols].copy()
            ytr = tr_df["y_bin"].astype(int).values
            Xva = va_df[feature_cols].copy()
            yva = va_df["y_bin"].astype(int).values

            # skip fold jika train fold hanya 1 kelas (tidak bisa fit classifier/proba dengan benar)
            if len(np.unique(ytr)) < 2:
                continue

            # Markov fold-specific (no leak)
            mk_probs = train_markov_global(tr_df)
            p_mk = markov_proba(mk_probs, va_df)

            # Model fold-specific
            try:
                if model_name == "LinearSVC_Calibrated":
                    # cv adaptif: minimal 2 folds dan butuh cukup sampel per kelas
                    counts = safe_class_counts(ytr)
                    min_class = min(counts.values())
                    cv_k = int(min(3, min_class))
                    if cv_k < 2:
                        continue
                    clf = make_calibrated_svc(cv_k=cv_k)
                    pipe = make_pipe(clf)
                else:
                    pipe = make_pipe(base_clf)

                pipe.set_params(**params)
                pipe.fit(Xtr, ytr)
                p_ml = pipe.predict_proba(Xva)[:, 1]
            except Exception:
                # jika ada model/params yang gagal di fold tertentu, anggap params invalid
                valid_folds = 0
                break

            y_list.append(yva)
            pml_list.append(p_ml)
            pmk_list.append(p_mk)
            valid_folds += 1

        if valid_folds == 0:
            continue

        y_all = np.concatenate(y_list)
        pml_all = np.concatenate(pml_list)
        pmk_all = np.concatenate(pmk_list)

        if USE_BLEND:
            local_best = None
            for alpha in ALPHAS:
                p_blend = alpha * pml_all + (1.0 - alpha) * pmk_all
                thr, cv_f1 = tune_thr_from_proba(y_all, p_blend)
                if (local_best is None) or (cv_f1 > local_best["cv_f1"]):
                    local_best = {"alpha": float(alpha), "thr": float(thr), "cv_f1": float(cv_f1)}
            record = {"params": params, **local_best, "valid_folds": int(valid_folds)}
        else:
            thr, cv_f1 = tune_thr_from_proba(y_all, pml_all)
            record = {"params": params, "alpha": 1.0, "thr": float(thr), "cv_f1": float(cv_f1), "valid_folds": int(valid_folds)}

        if (best is None) or (record["cv_f1"] > best["cv_f1"]):
            best = record

    return best

rows = []
for model_name, (clf, grid) in CANDIDATES.items():
    best = pooled_cv_best(model_name, clf, grid)
    if best is None:
        print("SKIP_MODEL       :", model_name, "(no valid params/folds)")
        continue

    # train final ML on full TrainPool
    try:
        if model_name == "LinearSVC_Calibrated":
            # final training: gunakan cv berdasarkan distribusi trainpool
            counts = safe_class_counts(y_trainpool)
            min_class = min(counts.values())
            cv_k = int(min(3, min_class))
            if cv_k < 2:
                raise ValueError("TrainPool tidak cukup untuk calibrated SVC (cv_k < 2).")
            final_clf = make_calibrated_svc(cv_k=cv_k)
        else:
            final_clf = clf

        final_pipe = make_pipe(final_clf)
        final_pipe.set_params(**best["params"])
        final_pipe.fit(X_trainpool, y_trainpool)
        p_test_ml = final_pipe.predict_proba(X_test)[:, 1]
    except Exception:
        print("SKIP_MODEL       :", model_name, "(failed final training)")
        continue

    # Markov prob on test from TrainPool Markov
    p_test_mk_full = markov_proba(probs_full, test_df)

    # final pred
    alpha = float(best["alpha"])
    p_test_final = alpha * p_test_ml + (1.0 - alpha) * p_test_mk_full
    pred_test_final = (p_test_final >= best["thr"]).astype(int)

    test_metrics = eval_bin(y_test, pred_test_final)

    rows.append({
        "model": model_name,
        "cv_f1": float(best["cv_f1"]),
        "alpha": float(best["alpha"]),
        "thr": float(best["thr"]),
        "valid_folds": int(best["valid_folds"]),
        "test_f1": float(test_metrics["f1"]),
        "test_acc": float(test_metrics["acc"]),
        "params": dict(best["params"]),
        "pipe": final_pipe,
    })

    print("MODEL            :", model_name)
    print("  CV_F1          :", float(best["cv_f1"]))
    print("  VALID_FOLDS    :", int(best["valid_folds"]))
    print("  ALPHA          :", float(best["alpha"]))
    print("  THR            :", float(best["thr"]))
    print("  TEST_F1        :", float(test_metrics["f1"]))
    print("  TEST_ACC       :", float(test_metrics["acc"]))
    print("  PARAMS         :", dict(best["params"]))


# =========================
# 9) LEADERBOARD + SELECT BEST
# =========================
header("9) LEADERBOARD (SORT BY TEST_F1)")

base_rows = [
    {"model": "Baseline-Persist", "cv_f1": np.nan, "alpha": np.nan, "thr": np.nan,
     "test_f1": persist_metrics["f1"], "test_acc": persist_metrics["acc"], "params": None},
    {"model": "Baseline-Markov",  "cv_f1": cv_f1_mk, "alpha": 0.0, "thr": thr_mk,
     "test_f1": markov_metrics["f1"], "test_acc": markov_metrics["acc"], "params": {"markov": "prev_high+dow"}},
]

all_rows = base_rows + [{k: v for k, v in r.items() if k != "pipe"} for r in rows]
all_sorted = sorted(all_rows, key=lambda r: r["test_f1"], reverse=True)

for r in all_sorted:
    print(
        "MODEL=", r["model"],
        "| CV_F1=", r["cv_f1"],
        "| TEST_F1=", r["test_f1"],
        "| TEST_ACC=", r["test_acc"],
        "| ALPHA=", r["alpha"],
        "| THR=", r["thr"],
        "| PARAMS=", r["params"]
    )

if len(rows) == 0:
    header("RESULT")
    print("Tidak ada model ML yang valid. Hanya baseline tersedia.")
    best_name = "MarkovGlobal"
    best_obj = {
        "type": "true_global_markov",
        "thr": float(thr_mk),
        "probs": probs_full,
        "meta": {
            "note": "No ML model succeeded; Markov saved as best",
            "target": "y_bin=(stressLevelPred>=1)",
            "date_col": DATE_COL,
            "user_col": USER_COL,
            "target_col": TARGET_COL,
            "window": WINDOW,
            "test_len": TEST_LEN,
            "val_windows": VAL_WINDOWS,
            "thresholds": THRESHOLDS.tolist(),
            "use_blend": USE_BLEND,
            "alphas": ALPHAS.tolist(),
            "behavior_cols": BEHAVIOR_COLS,
            "feature_cols": feature_cols,
        },
    }
else:
    best_ml = sorted(rows, key=lambda r: r["test_f1"], reverse=True)[0]

    header("RESULT")
    print("BEST_ML_MODEL    :", best_ml["model"])
    print("BEST_ML_TEST_F1  :", best_ml["test_f1"])
    print("BEST_ML_TEST_ACC :", best_ml["test_acc"])
    print("BEST_ML_ALPHA    :", best_ml["alpha"])
    print("BEST_ML_THR      :", best_ml["thr"])
    print("BEST_ML_PARAMS   :", best_ml["params"])

    if best_ml["test_f1"] > markov_metrics["f1"]:
        best_name = best_ml["model"]
        best_obj = {
            "type": "true_global_blend_model" if USE_BLEND else "true_global_ml_model",
            "pipe": best_ml["pipe"],               # sklearn pipeline
            "alpha": float(best_ml["alpha"]),
            "thr": float(best_ml["thr"]),
            "markov_probs": probs_full,            # runtime: p_markov
            "meta": {
                "note": "TRUE GLOBAL (no userID). Uses p = alpha*p_ml + (1-alpha)*p_markov" if USE_BLEND else "TRUE GLOBAL (no userID). Uses ML prob only",
                "target": "y_bin=(stressLevelPred>=1)",
                "date_col": DATE_COL,
                "user_col": USER_COL,
                "target_col": TARGET_COL,
                "window": WINDOW,
                "test_len": TEST_LEN,
                "val_windows": VAL_WINDOWS,
                "thresholds": THRESHOLDS.tolist(),
                "use_blend": USE_BLEND,
                "alphas": ALPHAS.tolist(),
                "behavior_cols": BEHAVIOR_COLS,
                "feature_cols": feature_cols,
            },
        }
        print("SELECTED_BEST    : ML/BLEND (beats Markov on TEST)")
    else:
        best_name = "MarkovGlobal"
        best_obj = {
            "type": "true_global_markov",
            "thr": float(thr_mk),
            "probs": probs_full,
            "meta": {
                "note": "Markov remains best on TEST for this dataset",
                "target": "y_bin=(stressLevelPred>=1)",
                "date_col": DATE_COL,
                "user_col": USER_COL,
                "target_col": TARGET_COL,
                "window": WINDOW,
                "test_len": TEST_LEN,
                "val_windows": VAL_WINDOWS,
                "thresholds": THRESHOLDS.tolist(),
                "use_blend": USE_BLEND,
                "alphas": ALPHAS.tolist(),
                "behavior_cols": BEHAVIOR_COLS,
                "feature_cols": feature_cols,
            },
        }
        print("SELECTED_BEST    : MARKOV (still best on TEST)")

header("10) SAVE MODEL ARTIFACT")
MODEL_OUT.parent.mkdir(parents=True, exist_ok=True)
joblib.dump(best_obj, MODEL_OUT)
print("SAVED_TO         :", str(MODEL_OUT))
print("BEST_NAME        :", best_name)



1) LOAD DATA
DATA_PATH       : ..\datasets\global_dataset_pred.csv
ROWS            : 275
USERS           : 5
DATE_RANGE      : 2025-11-21 -> 2026-01-14
TARGET_COL      : stressLevelPred
BEHAVIOR_COLS   : ['extracurricularHourPerDay', 'physicalActivityHourPerDay', 'sleepHourPerDay', 'studyHourPerDay', 'socialHourPerDay']

2) FEATURE ENGINEERING (NO-LEAK)
ROWS_FEAT        : 260
USERS_FEAT       : 5
WINDOW           : 3
TEST_LEN         : 12
FEATURES_COUNT   : 18
BINARY_DIST      : {1: 146, 0: 114}

3) SPLIT (TIME-BASED PER USER)
TRAINPOOL_ROWS   : 200
TEST_ROWS        : 60
TEST_DIST        : {1: 38, 0: 22}
CV_FOLDS         : 2
VAL_WINDOWS      : [(12, 24), (18, 30)]

4) BASELINE L1: PERSISTENCE (y(t) = y(t-1))
TEST_ACC         : 0.7166666666666667
TEST_F1          : 0.7671232876712328

5) BASELINE L2: MARKOV GLOBAL (prev_high, dow) + THR TUNING (POOLED CV)
CV_POOLED_DIST   : {0: 34, 1: 86}
BEST_THR_MARKOV  : 0.35
CV_POOLED_F1     : 0.8522727272727273
TEST_ACC_MARKOV  : 0.85
TEST_F1_MARK