In [None]:
import os
import numpy as np
import optuna
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from darts import TimeSeries
from darts.models import LightGBMModel
from darts.metrics import mae, rmse, mape, smape

# Force CPU if no GPU available
os.environ["CUDA_VISIBLE_DEVICES"] = "" 
prediction_length = prediction_length


def get_train_test_split(series_list, prediction_length):
    train_series = [s[:-prediction_length] for s in series_list]
    test_series = [s[-prediction_length:] for s in series_list]
    return train_series, test_series


def bootstrap_metrics(actual, predicted, n_bootstraps=1000, alpha=0.05):
 
    actual = np.asarray(actual)
    predicted = np.asarray(predicted)
    n = len(actual)
    rng = np.random.default_rng(42)

    def compute_metrics(a, p):
        return {
            "mae": mae(TimeSeries.from_values(a), TimeSeries.from_values(p)),
            "rmse": rmse(TimeSeries.from_values(a), TimeSeries.from_values(p)),
            "mse": np.mean((a - p) ** 2),
            "mape": mape(TimeSeries.from_values(a), TimeSeries.from_values(p)),
            "smape": smape(TimeSeries.from_values(a), TimeSeries.from_values(p)),
        }

    base = compute_metrics(actual, predicted)
    boot = []
    for _ in range(n_bootstraps):
        idx = rng.integers(0, n, n)
        a_s, p_s = actual[idx], predicted[idx]
        boot.append(list(compute_metrics(a_s, p_s).values()))
    boot = np.array(boot)
    lower = np.percentile(boot, 100 * (alpha / 2), axis=0)
    upper = np.percentile(boot, 100 * (1 - alpha / 2), axis=0)
    keys = list(base.keys())
    return {**{k: base[k] for k in keys},
            **{f"{k}_ci_lower": l for k, l in zip(keys, lower)},
            **{f"{k}_ci_upper": u for k, u in zip(keys, upper)}}


def inner_objective(trial, train_series_list):
    # Search space
    lags = trial.suggest_int("lags", 3, 42)
    min_data_in_leaf = trial.suggest_int("min_data_in_leaf", 5, 50)
    num_leaves = trial.suggest_int("num_leaves", 8, 64)
    feature_fraction = trial.suggest_float("feature_fraction", 0.5, 1.0)
    bagging_fraction = trial.suggest_float("bagging_fraction", 0.5, 1.0)
    max_depth = trial.suggest_int("max_depth", 3, 15)
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 1e-1, log=True)
    lambda_l1 = trial.suggest_float("lambda_l1", 0.0, 1.0)
    lambda_l2 = trial.suggest_float("lambda_l2", 0.0, 1.0)
    n_estimators = trial.suggest_int("n_estimators", 100, 1000)

    kf_inner = KFold(n_splits=3, shuffle=True, random_state=42)
    inner_scores = []

    for train_idx, val_idx in kf_inner.split(train_series_list):
        fold_train = [train_series_list[i] for i in train_idx]
        fold_val = [train_series_list[i] for i in val_idx]

        min_req = lags + prediction_length
        if any(len(s) <= min_req for s in fold_train + fold_val):
            raise optuna.TrialPruned()

        model = LightGBMModel(
            lags=lags,
            lags_past_covariates=None,
            output_chunk_length=prediction_length,
            random_state=42,
            verbose=-1,
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            num_leaves=num_leaves,
            feature_fraction=feature_fraction,
            bagging_fraction=bagging_fraction,
            max_depth=max_depth,
            lambda_l1=lambda_l1,
            lambda_l2=lambda_l2,
            min_data_in_leaf=min_data_in_leaf,
        )

        model.fit(fold_train, verbose=False)
        preds = model.predict(n=prediction_length, series=fold_val)
        actual = np.concatenate([s.values().flatten() for s in fold_val])
        pred = np.concatenate([p.values().flatten() for p in preds])
        score = mae(TimeSeries.from_values(actual), TimeSeries.from_values(pred))
        inner_scores.append(score)

    return np.mean(inner_scores)


def nested_cross_validation(full_series_dict, n_outer_folds=5, n_trials=20):
    all_series_list = list(full_series_dict.values())
    train_input_series, test_target_series = get_train_test_split(all_series_list, prediction_length)
    kf_outer = KFold(n_splits=n_outer_folds, shuffle=True, random_state=42)

    aggregated_actuals, aggregated_predictions = [], []
    outer_results = []

    print(f"Starting Nested {n_outer_folds}-Fold Cross-Validation...")

    for fold_num, (train_idx, test_idx) in enumerate(kf_outer.split(train_input_series)):
        print(f"\n--- Outer Fold {fold_num+1}/{n_outer_folds} ---")

        outer_train = [train_input_series[i] for i in train_idx]
        outer_test = [train_input_series[i] for i in test_idx]
        outer_targets = [test_target_series[i] for i in test_idx]

        # Scale based on outer training data
        all_train_vals = np.concatenate([s.values().flatten() for s in outer_train])
        scaler = MinMaxScaler((0, 1))
        scaler.fit(all_train_vals.reshape(-1, 1))
        outer_train_scaled = [TimeSeries.from_values(scaler.transform(s.values())) for s in outer_train]
        outer_test_scaled = [TimeSeries.from_values(scaler.transform(s.values())) for s in outer_test]
        outer_targets_scaled = [TimeSeries.from_values(scaler.transform(s.values())) for s in outer_targets]

        # Inner optimization
        study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
        wrapped_objective = lambda trial: inner_objective(trial, outer_train_scaled)
        study.optimize(wrapped_objective, n_trials=n_trials, show_progress_bar=False)

        if len(study.trials) == 0 or study.best_trial is None:
            print(f"[Warning] No valid trials in Fold {fold_num+1}. Skipping...")
            continue

        best_params = study.best_trial.params
        print(f"Best hyperparameters (Fold {fold_num+1}): {best_params}")

        # Train final fold model
        model = LightGBMModel(
            lags=best_params["lags"],
            output_chunk_length=prediction_length,
            random_state=42,
            verbose=-1,
            n_estimators=best_params["n_estimators"],
            learning_rate=best_params["learning_rate"],
            num_leaves=best_params["num_leaves"],
            feature_fraction=best_params["feature_fraction"],
            bagging_fraction=best_params["bagging_fraction"],
            max_depth=best_params["max_depth"],
            lambda_l1=best_params["lambda_l1"],
            lambda_l2=best_params["lambda_l2"],
            min_data_in_leaf=best_params["min_data_in_leaf"],
        )

        model.fit(series=outer_train_scaled, verbose=False)
        preds_scaled = model.predict(n=prediction_length, series=outer_test_scaled)

        actuals_fold, preds_fold = [], []
        for p, t in zip(preds_scaled, outer_targets_scaled):
            y_pred = scaler.inverse_transform(p.values().reshape(-1, 1))[0, 0]
            y_true = scaler.inverse_transform(t.values().reshape(-1, 1))[0, 0]
            preds_fold.append(y_pred)
            actuals_fold.append(y_true)

        metrics = bootstrap_metrics(np.array(actuals_fold), np.array(preds_fold))
        fold_mae = metrics["mae"]
        print(f"Outer Fold {fold_num+1} MAE: {fold_mae:.4f}")
        outer_results.append({"fold": fold_num + 1, "mae": fold_mae, "params": best_params})

        aggregated_actuals.extend(actuals_fold)
        aggregated_predictions.extend(preds_fold)

    final_metrics = bootstrap_metrics(np.array(aggregated_actuals), np.array(aggregated_predictions))
    best_overall = min(outer_results, key=lambda x: x["mae"])
    best_params = best_overall["params"]

    print("Best Overall Parameters (Lowest Outer-Fold MAE):")
    for k, v in best_params.items():
        print(f"  {k}: {v}")
    print(f"Best outer fold: {best_overall['fold']} (MAE={best_overall['mae']:.4f})")

    return final_metrics, best_params

def train_final_and_evaluate_external(internal_series_dict, external_series_dict,
                                      best_params, prediction_length=1):
    print("Retraining final LightGBM model using best overall parameters")
    internal_series = list(internal_series_dict.values())
    external_series = list(external_series_dict.values())

    # Fit global scaler on internal data only
    all_vals = np.concatenate([s.values().flatten() for s in internal_series])
    scaler = MinMaxScaler((0, 1))
    scaler.fit(all_vals.reshape(-1, 1))
    internal_scaled = [TimeSeries.from_values(scaler.transform(s.values())) for s in internal_series]
    external_scaled = [TimeSeries.from_values(scaler.transform(s.values())) for s in external_series]

    model = LightGBMModel(
        lags=best_params["lags"],
        output_chunk_length=prediction_length,
        random_state=42,
        verbose=-1,
        n_estimators=best_params["n_estimators"],
        learning_rate=best_params["learning_rate"],
        num_leaves=best_params["num_leaves"],
        feature_fraction=best_params["feature_fraction"],
        bagging_fraction=best_params["bagging_fraction"],
        max_depth=best_params["max_depth"],
        lambda_l1=best_params["lambda_l1"],
        lambda_l2=best_params["lambda_l2"],
        min_data_in_leaf=best_params["min_data_in_leaf"],
    )

    model.fit(series=internal_scaled, verbose=True)

    ext_input, ext_target = get_train_test_split(external_scaled, prediction_length)
    preds_scaled = model.predict(n=prediction_length, series=ext_input)

    preds, actuals = [], []
    for p, t in zip(preds_scaled, ext_target):
        y_pred = scaler.inverse_transform(p.values().reshape(-1, 1))[0, 0]
        y_true = scaler.inverse_transform(t.values().reshape(-1, 1))[0, 0]
        preds.append(y_pred)
        actuals.append(y_true)

    metrics = bootstrap_metrics(np.array(actuals), np.array(preds))
    print("Final External Evaluation:")
    print(f"MAE:   {metrics['mae']:.4f} (95% CI: [{metrics['mae_ci_lower']:.4f}, {metrics['mae_ci_upper']:.4f}])")
    print(f"RMSE:  {metrics['rmse']:.4f} (95% CI: [{metrics['rmse_ci_lower']:.4f}, {metrics['rmse_ci_upper']:.4f}])")
    print(f"MSE:   {metrics['mse']:.4f} (95% CI: [{metrics['mse_ci_lower']:.4f}, {metrics['mse_ci_upper']:.4f}])")
    print(f"MAPE:  {metrics['mape']:.2f}% (95% CI: [{metrics['mape_ci_lower']:.2f}%, {metrics['mape_ci_upper']:.2f}%])")
    print(f"sMAPE: {metrics['smape']:.2f}% (95% CI: [{metrics['smape_ci_lower']:.2f}%, {metrics['smape_ci_upper']:.2f}%])")
    return model, metrics


if __name__ == "__main__":
    print("Generating dummy internal and external datasets for test. Replace with your data.")
    valid_series_dict_full = {f"Encounter/{i}": TimeSeries.from_values(np.random.rand(20, 1) * 50) for i in range(30)}
    external_series_dict = {f"HospId/{i}": TimeSeries.from_values(np.random.rand(20, 1) * 50) for i in range(10)}






In [None]:
    # Step 1: Nested CV to find best parameters
    nested_metrics, best_params = nested_cross_validation(
        full_series_dict=valid_series_dict_full,
        n_outer_folds=10,
        n_trials=50
    )
  

In [None]:
    # Step 2: Retrain final model and evaluate on external data
    final_model, external_metrics = train_final_and_evaluate_external(
        internal_series_dict=valid_series_dict_full,
        external_series_dict=external_series_dict,
        best_params=best_params,
        prediction_length=prediction_length
    )