In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold

In [2]:
import numpy as np
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, cross_validate
from ray import train, tune, init
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.search.bohb import TuneBOHB
from functools import partial
from typing import Tuple, Union

# Load a sample dataset (you can replace this with your own dataset)
data = load_breast_cancer()
X, y = data.data, data.target

MAX_T = 2

# TODO: Model and data and everything need to be somehow predefined
# Like in a constant script or something like that.
# Maybe we can turn this whole thing into a class and it will be easier? We can save model etc... But so many params


def evaluate(model, X, y, cv: int = 5, metric: str = "accuracy") -> dict:
    cv_scores = cross_validate(
        model, X, y, cv=cv, scoring=metric, n_jobs=-1, return_train_score=True
    )

    # Calculate mean and standard deviation of cross-validation scores
    metrics = {
        "mean_score_test": np.mean(cv_scores["test_score"]),
        "mean_score_train": np.mean(cv_scores["train_score"]),
        "std_score_test": np.std(cv_scores["test_score"]),
        "std_score_train": np.std(cv_scores["train_score"]),
        "std_score_all": np.std(
            np.hstack((cv_scores["train_score"], cv_scores["test_score"]))
        ),
    }
    return metrics


# Define the training function
def train_lightgbm(
    config: dict = {}, loss: str = "std_overfit_train", baseline_mean: float = 0.0
) -> float:
    model = lgb.LGBMClassifier(**config)

    metrics = evaluate(model, X, y, cv=5, metric="accuracy")

    # TODO: Somehow incorporate baseline as well !
    if loss == "std_overfit_train":
        score = (
            np.abs(metrics["mean_score_train"] - metrics["mean_score_test"])
            + metrics["std_score_all"]
        )
    elif loss == "std_overfit":
        score = (
            np.abs(metrics["mean_score_train"] - metrics["mean_score_test"])
            + metrics["std_score_test"]
        )
    elif loss == "std_train":
        score = metrics["std_score_all"]
    elif loss == "hybrid_std":
        pass
    elif loss == "hybrid_std_overfit":
        score = (
            (baseline_mean - metrics["mean_score_test"])  # Minimize
            + np.abs(
                metrics["mean_score_train"] - metrics["mean_score_test"]
            )  # Minimize
            + metrics["std_score_all"]  # Minimize
        )
    elif loss == "baseline_metric":
        score = baseline_mean - metrics["mean_score_test"]  # Minimize
    elif loss == "metric":
        score = metrics["mean_score_test"]
    elif loss == "hybrid_metric_overfit":
        score = (
            np.abs(metrics["mean_score_train"] - metrics["mean_score_test"])
            + metrics["mean_score_test"]
        )

    # Use both mean and std as the metric to minimize
    # tune.report(mean_accuracy=mean_score, std_accuracy=std_score)
    train.report(score=score)
    return score


def tune_model(loss: str = "baseline_metric") -> Tuple[dict, float]:
    
    init(num_cpus=6)
    
    # Define the search space for hyperparameters
    config_space = {
        "num_leaves": tune.choice([20, 30, 40, 50]),
        "learning_rate": tune.loguniform(1e-4, 1e-1),
        "subsample": tune.uniform(0.5, 1.0),
        "colsample_bytree": tune.uniform(0.5, 1.0),
        "reg_alpha": tune.loguniform(1e-4, 1e2),
        "reg_lambda": tune.loguniform(1e-4, 1e2),
    }

    # Define the BOHB scheduler
    bohb_hyperband = HyperBandForBOHB(
        time_attr="training_iteration", max_t=MAX_T, reduction_factor=2, stop_last_trials=True
    )

    # Baseline:
    # TODO: incorporate baseline loss for every loss function. So that we know what it is like without the baseline and with baseline etc..
    baseline_loss = train_lightgbm(loss=loss)
    baseline_mean = evaluate(model=lgb.LGBMClassifier(), X=X, y=y)

    # Set up the experiment configuration
    bohb_search = TuneBOHB(space=config_space, mode="min", metric="score")
    bohb_search = tune.search.ConcurrencyLimiter(bohb_search, max_concurrent=4)
    fmin_objective = partial(train_lightgbm, loss=loss, baseline_mean=baseline_mean)
    analysis = tune.run(
        fmin_objective,
        # config=config_space,
        num_samples=10,  # Number of hyperparameter samples
        metric="score",  # Metric to minimize
        mode="min",
        resources_per_trial={
            "cpu": 1
        },  # Change this to whatever resources you have available
        search_alg=bohb_search,
        scheduler=bohb_hyperband,
        local_dir="experiments",
    )

    # Get the best hyperparameters
    best_config = analysis.get_best_config(metric="score", mode="min")
    best_loss = analysis.get_best_logdir(metric="score")
    print("Best Hyperparameters:", best_config)
    return best_config, best_loss


def tune_exp() -> pd.DataFrame:
    best_configs = []
    best_losses = []
    df_metric = pd.DataFrame(
        {
            "mean_test": [],
            "mean_train": [],
            "std_test": [],
            "std_train": [],
            "loss_func": [],
            "loss": [],
        }
    )

    losses = [
        "std_overfit_train",
        "hybrid_std_overfit",
        "baseline_metric",
        "hybrid_metric_overfit",
    ]

    for loss in losses:
        best_config, best_loss = tune_model(loss=loss)
        best_configs.append(best_config)
        best_losses.append(best_loss)
    # Not the most elegant solution.
    # Log baseline scores
    best_configs.append({})
    best_losses.append("N/A")
    losses.append("BASELINE")

    for idx, config in enumerate(best_configs):
        model = lgb.LGBMClassifier(**config)
        metrics = evaluate(model, X, y)

        metrics["loss_func"] = losses[idx]
        metrics["loss"] = best_losses[idx]
        del metrics["std_all"]
        df_metric = df_metric.append(metrics, ignore_index=True)

    return df_metric

In [7]:
df_metric = tune_exp()
df_metric

2024-01-02 02:23:47,956	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-01-02 02:24:55
Running for:,00:01:07.60
Memory:,13.7/13.9 GiB

Trial name,# failures,error file
train_lightgbm_36c4ea15,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_36c4ea15_1_colsample_bytree=0.9643,learning_rate=0.0127,num_leaves=30,reg_alpha=0.0067,reg_lambda=0.8009,subsample=_2024-01-02_02-23-47\error.txt"
train_lightgbm_3e09bda0,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_3e09bda0_2_colsample_bytree=0.7441,learning_rate=0.0019,num_leaves=30,reg_alpha=0.0009,reg_lambda=0.0090,subsample=_2024-01-02_02-23-52\error.txt"
train_lightgbm_c1542caf,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_c1542caf_3_colsample_bytree=0.8591,learning_rate=0.0002,num_leaves=20,reg_alpha=0.0051,reg_lambda=0.0004,subsample=_2024-01-02_02-23-58\error.txt"
train_lightgbm_1e2c6265,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_1e2c6265_4_colsample_bytree=0.7882,learning_rate=0.0161,num_leaves=20,reg_alpha=7.1416,reg_lambda=0.0185,subsample=_2024-01-02_02-24-03\error.txt"
train_lightgbm_fa218678,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_fa218678_5_colsample_bytree=0.9247,learning_rate=0.0086,num_leaves=50,reg_alpha=0.0003,reg_lambda=0.1644,subsample=_2024-01-02_02-24-11\error.txt"
train_lightgbm_02597fae,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_02597fae_6_colsample_bytree=0.7557,learning_rate=0.0058,num_leaves=20,reg_alpha=0.0639,reg_lambda=0.0001,subsample=_2024-01-02_02-24-17\error.txt"
train_lightgbm_aae3cc9b,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_aae3cc9b_7_colsample_bytree=0.7808,learning_rate=0.0032,num_leaves=30,reg_alpha=0.0002,reg_lambda=1.3518,subsample=_2024-01-02_02-24-24\error.txt"
train_lightgbm_306f018b,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_306f018b_8_colsample_bytree=0.5748,learning_rate=0.0129,num_leaves=30,reg_alpha=0.0024,reg_lambda=0.0169,subsample=_2024-01-02_02-24-30\error.txt"
train_lightgbm_7348efee,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_7348efee_9_colsample_bytree=0.5223,learning_rate=0.0143,num_leaves=40,reg_alpha=0.7834,reg_lambda=4.3045,subsample=_2024-01-02_02-24-37\error.txt"
train_lightgbm_6b0447a6,1,"C:/Users/chris/ray_results/train_lightgbm_2024-01-02_02-23-47/train_lightgbm_6b0447a6_10_colsample_bytree=0.6807,learning_rate=0.0331,num_leaves=30,reg_alpha=0.0008,reg_lambda=64.8547,subsampl_2024-01-02_02-24-44\error.txt"

Trial name,status,loc,colsample_bytree,learning_rate,num_leaves,reg_alpha,reg_lambda,subsample
train_lightgbm_36c4ea15,ERROR,127.0.0.1:24604,0.964325,0.0126502,30,0.0067327,0.800902,0.730139
train_lightgbm_3e09bda0,ERROR,127.0.0.1:35620,0.744076,0.00188466,30,0.000944387,0.00903944,0.744011
train_lightgbm_c1542caf,ERROR,127.0.0.1:13792,0.85912,0.000231575,20,0.0050646,0.000420352,0.947184
train_lightgbm_1e2c6265,ERROR,127.0.0.1:27736,0.788193,0.0161023,20,7.14156,0.0185497,0.589273
train_lightgbm_fa218678,ERROR,127.0.0.1:20364,0.924693,0.0085792,50,0.000328604,0.164364,0.713718
train_lightgbm_02597fae,ERROR,127.0.0.1:38240,0.755655,0.00580353,20,0.0638635,0.000127723,0.692384
train_lightgbm_aae3cc9b,ERROR,127.0.0.1:15172,0.780786,0.00316731,30,0.000153809,1.35183,0.786215
train_lightgbm_306f018b,ERROR,127.0.0.1:26812,0.574762,0.012899,30,0.00241715,0.0168756,0.526597
train_lightgbm_7348efee,ERROR,127.0.0.1:16908,0.522286,0.0142903,40,0.783393,4.30455,0.828653
train_lightgbm_6b0447a6,ERROR,127.0.0.1:40504,0.680746,0.0330798,30,0.000849314,64.8547,0.857485


2024-01-02 02:23:56,208	ERROR tune_controller.py:1374 -- Trial task failed for trial train_lightgbm_36c4ea15
Traceback (most recent call last):
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\_private\auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\_private\worker.py", line 2624, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TypeError): [36mray::ImplicitFunc.train()[39m (pid=24604, ip=127.0.0.1, actor_id=633246410d2e42f2b375ee6501000000, repr=func)
  File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task


Trial name
train_lightgbm_02597fae
train_lightgbm_1e2c6265
train_lightgbm_306f018b
train_lightgbm_36c4ea15
train_lightgbm_3e09bda0
train_lightgbm_6b0447a6
train_lightgbm_7348efee
train_lightgbm_aae3cc9b
train_lightgbm_c1542caf
train_lightgbm_fa218678


2024-01-02 02:24:02,043	ERROR tune_controller.py:1374 -- Trial task failed for trial train_lightgbm_3e09bda0
Traceback (most recent call last):
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\_private\auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\chris\Anaconda3\envs\hedno_v2\lib\site-packages\ray\_private\worker.py", line 2624, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TypeError): [36mray::ImplicitFunc.train()[39m (pid=35620, ip=127.0.0.1, actor_id=1078422fb3c093feac812b1801000000, repr=func)
  File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task


TuneError: ('Trials did not complete', [train_lightgbm_36c4ea15, train_lightgbm_3e09bda0, train_lightgbm_c1542caf, train_lightgbm_1e2c6265, train_lightgbm_fa218678, train_lightgbm_02597fae, train_lightgbm_aae3cc9b, train_lightgbm_306f018b, train_lightgbm_7348efee, train_lightgbm_6b0447a6])

In [27]:
test_dict = {
    "a" : [1,2,3],
    "b" : [2,4,5],
    "c" : [23,4,5]
}

del test_dict['a']
test_dict

{'b': [2, 4, 5], 'c': [23, 4, 5]}

In [26]:
data = {"a": 1, "b": 2}

data.pop("a", None)

1

In [24]:
train_lightgbm(config={})

start
1.0
0.9701443875174661
0.019240524003817126
end
0.049096136486351


0.049096136486351

In [3]:
import numpy as np
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, cross_validate
from ray import tune
from ray.tune.schedulers import HyperBandForBOHB
from functools import partial

data = load_breast_cancer()
X, y = data.data, data.target

model = lgb.LGBMClassifier()

cv_scores = cross_validate(
    model, X, y, cv=5, scoring="accuracy", n_jobs=-1, return_train_score=True
)

In [4]:
cv_scores

{'fit_time': array([0.12932324, 0.13533711, 0.1248138 , 0.1248138 , 0.13232923]),
 'score_time': array([0.00201416, 0.00200152, 0.00300169, 0.00200009, 0.00300789]),
 'test_score': array([0.93859649, 0.96491228, 0.98245614, 0.98245614, 0.98230088]),
 'train_score': array([1., 1., 1., 1., 1.])}

In [6]:
cv_scores['train_score']

array([1., 1., 1., 1., 1.])

In [None]:
np.abs(cv_scores['train_score'] - cv_scores['test_score'])

In [5]:
cv_scores['test_score']

array([0.93859649, 0.96491228, 0.98245614, 0.98245614, 0.98230088])

In [9]:
np.mean(np.hstack((cv_scores["train_score"], cv_scores["test_score"])))

0.985072193758733

Get baseline score both for simple scoring metric and for the loss that we are going to create and try to beat that.

Compare multiple loss functions and check accuracy but as well as stability and debugging etc..

Compare and explain SOTA hyperparameter tuning algorithms and cite sources. Neptune AI explains why but we can cite their sources

For model debugging : Morris sensitivity analysis + Partial dependence plots + Permutation Importance + SHAP + Feature importance + DICE

Partial dependence plots to debug certain features and also to gain insights. We can also plot 2 features to extract insights and also add distributions or histograms or something to show the datapoins and their distribution. Although PDPs have disadvantages, check this article out https://christophm.github.io/interpretable-ml-book/pdp.html

Protodash for explainability

Stacking with H20, article etc etc.

Using drift score and prediction drift stuff when training and test splitting to determine best split of data or something like that

https://docs.deepchecks.com/stable/tabular/auto_checks/model_evaluation/plot_weak_segments_performance.html#tabular-weak-segments-performance

Deepchecks with Weak Segments Performance check

 - PiML for model stability with Robustness with feature permutation and metrics change https://selfexplainml.github.io/PiML-Toolbox/_build/html/guides/testing/robustness.html
 - PiML for reliability https://selfexplainml.github.io/PiML-Toolbox/_build/html/guides/testing/reliability.html
 - PiML for resilience

Brier Score is used to calculate calibration. There might vbe a better alternative