In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold

In [None]:
import numpy as np
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, cross_validate
from ray import tune
from ray.tune.schedulers import HyperBandForBOHB
from functools import partial

# Load a sample dataset (you can replace this with your own dataset)
data = load_breast_cancer()
X, y = data.data, data.target


# Define the training function
def train_lightgbm(config, loss='std'):
    model = lgb.LGBMClassifier(**config)

    cv_scores = cross_validate(model, X, y, cv=5, scoring="accuracy", n_jobs=-1, return_train_score=True)

    # Calculate mean and standard deviation of cross-validation scores
    mean_score = np.mean(cv_scores)
    std_score = np.std(cv_scores)
    
    if loss == 'std_train':
        score = std_score
    elif loss == 'std':
        score = std_score
    elif loss == 'hybrid':
        score = mean_score
    elif loss == 'metric':
        score = mean_score

    # Use both mean and std as the metric to minimize
    tune.report(mean_accuracy=mean_score, std_accuracy=std_score) 


# Define the search space for hyperparameters
config_space = {
    "num_leaves": tune.choice([20, 30, 40, 50]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "subsample": tune.uniform(0.5, 1.0),
    "colsample_bytree": tune.uniform(0.5, 1.0),
    "reg_alpha": tune.loguniform(1e-4, 1e2),
    "reg_lambda": tune.loguniform(1e-4, 1e2),
}

# Define the BOHB scheduler
bohb_hyperband = HyperBandForBOHB(
    time_attr="training_iteration", max_t=100, reduction_factor=2, grace_period=10
)

fmin_objective = partial(train_lightgbm, loss='std')
# Set up the experiment configuration
analysis = tune.run(
    train_lightgbm,
    config=config_space,
    num_samples=10,  # Number of hyperparameter samples
    metric="mean_accuracy",  # Metric to minimize
    mode="min",
    resources_per_trial={"cpu": 1},
    search_alg=tune.bohb.BOHB(config_space=config_space, max_concurrent=4),
    scheduler=bohb_hyperband,
)

# Get the best hyperparameters
best_config = analysis.get_best_config(metric="mean_accuracy", mode="min")
print("Best Hyperparameters:", best_config)

In [3]:
import numpy as np
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, cross_validate
from ray import tune
from ray.tune.schedulers import HyperBandForBOHB
from functools import partial

data = load_breast_cancer()
X, y = data.data, data.target

model = lgb.LGBMClassifier()

cv_scores = cross_validate(
    model, X, y, cv=5, scoring="accuracy", n_jobs=-1, return_train_score=True
)

In [4]:
cv_scores

{'fit_time': array([0.12932324, 0.13533711, 0.1248138 , 0.1248138 , 0.13232923]),
 'score_time': array([0.00201416, 0.00200152, 0.00300169, 0.00200009, 0.00300789]),
 'test_score': array([0.93859649, 0.96491228, 0.98245614, 0.98245614, 0.98230088]),
 'train_score': array([1., 1., 1., 1., 1.])}

In [6]:
cv_scores['train_score']

array([1., 1., 1., 1., 1.])

In [5]:
cv_scores['test_score']

array([0.93859649, 0.96491228, 0.98245614, 0.98245614, 0.98230088])

In [9]:
np.mean(np.hstack((cv_scores["train_score"], cv_scores["test_score"])))

0.985072193758733

Get baseline score both for simple scoring metric and for the loss that we are going to create and try to beat that.

Compare multiple loss functions and check accuracy but as well as stability and debugging etc..

Compare and explain SOTA hyperparameter tuning algorithms and cite sources. Neptune AI explains why but we can cite their sources

For model debugging : Morris sensitivity analysis + Partial dependence plots + Permutation Importance + SHAP + Feature importance + DICE

Partial dependence plots to debug certain features and also to gain insights. We can also plot 2 features to extract insights and also add distributions or histograms or something to show the datapoins and their distribution. Although PDPs have disadvantages, check this article out https://christophm.github.io/interpretable-ml-book/pdp.html