In [None]:
# ============================================================
# üõ°Ô∏è Œª-Guard Multi-Model Experiment - Python Notebook
# ============================================================

# This notebook demonstrates the usage of the Œª-Guard package
# for detecting structural overfitting in Gradient Boosting
# and other tree-based models. It leverages your `opi.py`
# and `cusum.py` modules for computing overfitting indices
# (OFI) and detecting the change point where the model
# starts memorizing structure rather than learning signal.
# ============================================================

# -----------------------------
# Imports
# -----------------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor
import seaborn as sns

# Import functions from your lambdaguard package
from lambdaguard.ofi import (
    generalization_index,     # Computes alignment (A), complexity (C) and generalization index (GI)
    instability_index,        # Measures sensitivity of predictions to small input perturbations
    create_model,             # Factory function to create GBR, XGB, LGBM, or CAT models
    run_experiment_multi_model,# Runs full experiment across multiple hyperparameter combinations
    regression_test           # Optional: performs regression analysis (Gap vs OFI)
)
from lambdaguard.cusum import lambda_detect  # Detects structural overfitting change points using OFI

# -----------------------------
# Generate Synthetic Dataset
# -----------------------------
# We create a synthetic regression dataset to demonstrate multi-model overfitting detection.
X, y = make_regression(
    n_samples=1000,      # Number of samples
    n_features=10,       # Number of features
    n_informative=8,     # Features carrying signal
    noise=10.0,          # Noise level
    random_state=42
)

# -----------------------------
# Run Multi-Model Experiment
# -----------------------------
# This function evaluates multiple tree-based models (GBR, LGBM, CAT)
# across different hyperparameters. It computes:
# - A: alignment (signal captured by model)
# - C: structural complexity (how fragmented the feature space is)
# - S: instability (sensitivity to input perturbations)
# - OFI: Overfitting Index = (C / (A + C)) * S
df_multi = run_experiment_multi_model(
    X, y,
    dataset_name="Synthetic Database",
    model_names=["GBR","LGBM","CAT"]
)

print("Experiment dataframe preview:")
display(df_multi.head())

# -----------------------------
# Detect Structural Overfitting
# -----------------------------
# For each model type, we use Œª-Guard's `lambda_detect` function
# to identify the point where the model starts to overfit structurally.
# This uses the normalized OFI and cumulative sum (CUSUM) method.

list_model = df_multi['model'].unique()
for model_name in list_model:
    # Optional regression analysis for diagnostics
    regression_test(df_multi[df_multi['model'] == model_name])

    # Detect structural overfitting
    result = lambda_detect(df_multi, model_name=model_name)