# Evaluation of Tuned Models on Validation Set

This notebook loads the best hyperparameters from tuning, trains the models on train set only, and evaluates on validation set.


In [None]:
import os
import sys
import pickle
import pandas as pd
import numpy as np
import xgboost as xgb
import lightgbm as lgb
import warnings

warnings.filterwarnings("ignore")

# Add paths for imports
sys.path.append("training")
sys.path.append("tuning")

# Import existing functions
from split_data import load_splits, rescale_data, print_results
from tune_xgb import get_base_xgb_params
from tune_lgb import get_base_lgb_params

In [None]:
# Configuration
config = {
    "tuning_results_dir": "tuning/results",
    "output_dir": "outputs",
    "splits_dir": "training/splits",
    "seed": 42,
    "enable_gpu": True,
    "models": ["xgb", "lgb"],
}

# Ensure output directory exists
os.makedirs(config["output_dir"], exist_ok=True)

print("Configuration:")
for key, value in config.items():
    print(f"  {key}: {value}")

## Load Data and Tuning Results

Load the data splits and the best hyperparameters from Optuna studies.


In [None]:
# Load data splits
print("Loading data splits...")
train_set, val_set, test_set, X_cols, y_cols = load_splits(config["splits_dir"])

print(f"Data loaded successfully:")
print(f"  Train set: {train_set.shape}")
print(f"  Validation set: {val_set.shape}")
print(f"  Test set: {test_set.shape}")
print(f"  Features: {len(X_cols)}")
print(f"  Target: {y_cols}")

In [None]:
def load_tuning_results(model_name, tuning_dir):
    """Load Optuna study results for a model"""
    study_path = f"{tuning_dir}/{model_name}_optuna_study.pkl"

    if not os.path.exists(study_path):
        print(f"Warning: No tuning results found for {model_name} at {study_path}")
        return None

    with open(study_path, "rb") as f:
        study = pickle.load(f)

    print(f"Loaded tuning results for {model_name.upper()}:")
    print(f"  Best MAE: {study.best_value:.4f}")
    print(f"  Number of trials: {len(study.trials)}")
    print(f"  Best parameters: {study.best_params}")

    return study


# Load tuning results for each model
tuning_results = {}
for model_name in config["models"]:
    print(f"\n{'='*50}")
    print(f"Loading tuning results for {model_name.upper()}")
    print(f"{'='*50}")

    study = load_tuning_results(model_name, config["tuning_results_dir"])
    if study is not None:
        tuning_results[model_name] = study

## Prepare Model Parameters

Combine base parameters with the best hyperparameters from tuning.


In [None]:
def get_final_params(model_name, best_params, seed, enable_gpu):
    """Get final parameters by combining base params with best tuning params"""
    if model_name == "xgb":
        base_params = get_base_xgb_params(seed, enable_gpu)
    elif model_name == "lgb":
        base_params = get_base_lgb_params(seed, enable_gpu)
    else:
        raise ValueError(f"Unsupported model: {model_name}")

    # Combine base and tuned parameters
    final_params = {**base_params, **best_params}
    return final_params


# Prepare final parameters for each model
model_params = {}
for model_name, study in tuning_results.items():
    print(f"\nPreparing parameters for {model_name.upper()}:")

    final_params = get_final_params(
        model_name, study.best_params, config["seed"], config["enable_gpu"]
    )

    model_params[model_name] = final_params

    print(f"  Final parameters: {final_params}")
    print(f"  Expected validation MAE: {study.best_value:.4f}")

## Train Models and Evaluate on Validation Set

Train each model on train set only and evaluate on validation set, similar to train_ml.py workflow.


In [None]:
def create_tuned_model(model_name, params):
    """Create model instance with tuned parameters"""
    if model_name == "xgb":
        return xgb.XGBRegressor(**params)
    elif model_name == "lgb":
        return lgb.LGBMRegressor(**params)
    else:
        raise ValueError(f"Unsupported model: {model_name}")


def evaluate_and_save_tuned_results(
    model, model_name, val_set, X_cols, y_cols, output_dir
):
    """Evaluate tuned model and save results (similar to train_ml.py logic)"""
    # Make predictions on validation set
    val_set_eval = val_set.copy()
    val_set_eval["y_pred"] = model.predict(val_set_eval[X_cols])

    # Prepare results (same logic as train_ml.py)
    val_set_eval = val_set_eval.rename(columns={y_cols[-1]: "target"})
    val_set_eval = rescale_data(val_set_eval, ["target", "y_pred"])

    # Select output columns
    output_columns = ["Timestamp", "Patient_ID", "bgClass", "target", "y_pred"]
    results = val_set_eval[output_columns]

    # Print evaluation results
    print(f"\nEvaluation results for {model_name.upper()} (tuned):")
    print_results(results)

    # Save results with "tuned" suffix to distinguish from standard models
    output_file = f"{output_dir}/{model_name}_tuned_output.csv"
    results.to_csv(output_file, index=False)
    print(f"\nResults saved to: {output_file}")

    return results


def save_tuned_model(model, model_name, output_dir):
    """Save the trained tuned model"""
    model_path = f"{output_dir}/{model_name}_tuned.pickle"
    with open(model_path, "wb") as f:
        pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"Tuned model saved to: {model_path}")
    return model_path

In [None]:
# Train and evaluate each tuned model
results_summary = {}

for model_name in model_params.keys():
    print(f"\n{'='*60}")
    print(f"TRAINING AND EVALUATION - {model_name.upper()} (TUNED)")
    print(f"{'='*60}")

    # Create model with tuned parameters
    print(f"Creating {model_name.upper()} model with tuned parameters...")
    model = create_tuned_model(model_name, model_params[model_name])

    # Train on train set only (not train+val like in final tuning evaluation)
    print(f"Training {model_name.upper()} on train set...")
    print(f"Train set size: {len(train_set)}")

    model.fit(train_set[X_cols], train_set[y_cols[-1]])
    print(f"Training completed for {model_name.upper()}")

    # Save the trained model
    model_path = save_tuned_model(model, model_name, config["output_dir"])

    # Evaluate on validation set and save results
    print(f"\nEvaluating {model_name.upper()} on validation set...")
    print(f"Validation set size: {len(val_set)}")

    results = evaluate_and_save_tuned_results(
        model, model_name, val_set, X_cols, y_cols, config["output_dir"]
    )

    # Store results for summary
    results_summary[model_name] = {
        "model": model,
        "results": results,
        "model_path": model_path,
        "expected_mae": tuning_results[model_name].best_value,
    }

    print(f"\n{model_name.upper()} processing completed!")

## Summary and Comparison

Compare the results with expected performance from tuning.


In [None]:
# Calculate actual MAE for comparison with tuning expectations
def calculate_patient_mae(results_df):
    """Calculate patient-based MAE from results dataframe"""
    patient_maes = []
    for patient_id in results_df["Patient_ID"].unique():
        patient_data = results_df[results_df["Patient_ID"] == patient_id]
        mae = np.mean(np.abs(patient_data["target"] - patient_data["y_pred"]))
        patient_maes.append(mae)
    return np.mean(patient_maes)


print(f"\n{'='*80}")
print("TUNED MODELS EVALUATION SUMMARY")
print(f"{'='*80}")

summary_data = []
for model_name, data in results_summary.items():
    actual_mae = calculate_patient_mae(data["results"])
    expected_mae = data["expected_mae"]
    difference = actual_mae - expected_mae

    summary_data.append(
        {
            "Model": f"{model_name.upper()} (tuned)",
            "Expected MAE": f"{expected_mae:.4f}",
            "Actual MAE": f"{actual_mae:.4f}",
            "Difference": f"{difference:+.4f}",
            "Output File": f"{model_name}_tuned_output.csv",
            "Model File": f"{model_name}_tuned.pickle",
        }
    )

    print(f"\n{model_name.upper()} (TUNED):")
    print(f"  Expected MAE (from tuning): {expected_mae:.4f}")
    print(f"  Actual MAE (train->val): {actual_mae:.4f}")
    print(f"  Difference: {difference:+.4f}")
    print(f"  Results saved: {config['output_dir']}/{model_name}_tuned_output.csv")
    print(f"  Model saved: {config['output_dir']}/{model_name}_tuned.pickle")

# Create summary DataFrame
summary_df = pd.DataFrame(summary_data)
print(f"\n{'='*80}")
print("SUMMARY TABLE:")
print(f"{'='*80}")
print(summary_df.to_string(index=False))

# Save summary
summary_path = f"{config['output_dir']}/tuned_models_summary.csv"
summary_df.to_csv(summary_path, index=False)
print(f"\nSummary saved to: {summary_path}")

In [None]:
# Verification: Check output files
print(f"\n{'='*60}")
print("OUTPUT FILES VERIFICATION")
print(f"{'='*60}")

output_files = (
    [f"{model_name}_tuned_output.csv" for model_name in config["models"]]
    + [f"{model_name}_tuned.pickle" for model_name in config["models"]]
    + ["tuned_models_summary.csv"]
)

for filename in output_files:
    filepath = f"{config['output_dir']}/{filename}"
    if os.path.exists(filepath):
        size = os.path.getsize(filepath)
        print(f"✓ {filename} - {size:,} bytes")
    else:
        print(f"✗ {filename} - NOT FOUND")

print(f"\nAll tuned models evaluation completed!")
print(f"Results are saved in: {config['output_dir']}")
print(f"Files with '_tuned' suffix distinguish them from standard models.")