<a href="https://colab.research.google.com/github/ZEROYON04/myWorks/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install optuna
!pip install tabpfn




In [2]:
!git clone https://github.com/PriorLabs/tabpfn-community
!pip install -e tabpfn-community[post_hoc_ensembles,interpretability,hpo]

fatal: destination path 'tabpfn-community' already exists and is not an empty directory.
Obtaining file:///content/tabpfn-community
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: tabpfn-community
  Building editable for tabpfn-community (pyproject.toml) ... [?25l[?25hdone
  Created wheel for tabpfn-community: filename=tabpfn_community-0.0.4-0.editable-py3-none-any.whl size=11203 sha256=9fd67e56d72a3a471891a1282ba8973d4de2dfd15458f4e39de8598921088dbe
  Stored in directory: /tmp/pip-ephem-wheel-cache-5jqnplnq/wheels/4e/f8/2f/351b619acaed74cddeeec6d18bfba157b9630ff1a92ff10806
Successfully built tabpfn-community
Installing collected packages: tabpfn-community
  Attempting uninstall: tabpfn-community
    Found existing installation: tabpfn-comm

In [7]:
from pyexpat import model
import optuna
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.metrics import f1_score, mean_absolute_error
from tabpfn import TabPFNClassifier, TabPFNRegressor
from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNClassifier

device = "cuda" if torch.cuda.is_available() else "cpu"


In [14]:

class ParamPredictor_Classifier:
    def __init__(
        self,
        X,
        y,
        test_size,
        n_trials,
        random_state=4,
    ):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state
        )
        model_mapping = {
            "TabPFN": TabPFNClassifier,
            "RandomForest": RandomForestClassifier,
            "LightGBM": LGBMClassifier,
        }
        best_f1 = 0
        for model_name in model_mapping.keys():

            def objective(trial):

                if model_name == "RandomForest":
                    # RandomForestのハイパーパラメータ
                    n_estimators = trial.suggest_int("n_estimators", 200, 800)
                    max_depth = trial.suggest_int("max_depth", 150, 500)
                    max_featrues = trial.suggest_categorical(
                        "max_features", ["sqrt", "log2"]
                    )
                    min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
                    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)
                    criterion = trial.suggest_categorical(
                        "criterion", ["gini", "entropy","log_loss"]
                    )

                    clf = RandomForestClassifier(
                        n_estimators=n_estimators,
                        max_depth=max_depth,
                        max_features=max_featrues,
                        min_samples_split=min_samples_split,
                        min_samples_leaf=min_samples_leaf,
                        criterion=criterion,
                        random_state=random_state,
                    )

                elif model_name == "LightGBM":

                    # LightGBMのハイパーパラメータ
                    boosting_type = trial.suggest_categorical(
                        "boosting_type", ["gbdt", "dart"]
                    )
                    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
                    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
                    num_leaves = trial.suggest_int("num_leaves", 2, 256)
                    colsample_bytree = trial.suggest_loguniform(
                        "colsample_bytree", 0.4, 1.0
                    )
                    subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
                    subsample_freq = trial.suggest_int("subsample_freq", 1, 10)
                    min_child_samples = trial.suggest_int("min_child_samples", 5, 100)
                    max_depth = trial.suggest_int("max_depth", 3, 20)
                    learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)
                    n_estimators = trial.suggest_int("n_estimators", 10, 1000)

                    clf = LGBMClassifier(
                        boosting_type=boosting_type,
                        reg_alpha=reg_alpha,
                        reg_lambda=reg_lambda,
                        num_leaves=num_leaves,
                        colsample_bytree=colsample_bytree,
                        subsample=subsample,
                        subsample_freq=subsample_freq,
                        min_child_samples=min_child_samples,
                        max_depth=max_depth,
                        learning_rate=learning_rate,
                        n_estimators=n_estimators,
                        random_state=random_state,
                        n_jobs=-1,
                    )

                elif model_name == "TabPFN":
                    clf = TabPFNClassifier()
                elif model_name == "AutoTabPFN":
                    clf = AutoTabPFNClassifier(max_time=120)


                else:
                    raise ValueError("Invalid classifier name")

                clf.fit(X_train, y_train)
                y_pred = clf.predict(X_test)
                f1 = f1_score(y_test, y_pred, average="micro")
                return f1

            # Studyの作成と最適化の実行
            study = optuna.create_study(direction="maximize")
            study.optimize(objective, n_trials=(n_trials if model_name != "TabPFN" else 1))

            best_params = study.best_trial.params
            if best_f1 < study.best_trial.value:
                self.study = study
                self.model_name = model_name

        if model_name in model_mapping:
            self.model = model_mapping[model_name](**best_params)
            self.model.fit(X_train, y_train)
        else:
            raise ValueError(f"Invalid classifier name {model_name}")

        # 可視化 (必要に応じて)
        optuna.visualization.plot_optimization_history(self.study).show()
        optuna.visualization.plot_param_importances(self.study).show()

    def predict(self, X):
        return self.model.predict(X)


class ParamPredictor_Regressor:
    def __init__(self, X, y, test_size, n_trials, random_state=4):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state
        )
        model_mapping = {
            "RandomForest": RandomForestRegressor,
            "LightGBM": LGBMRegressor,

        }
        best_mae = 1000
        for model_name in model_mapping.keys():

            def objective(trial):

                if model_name == "RandomForest":
                    # RandomForestのハイパーパラメータ
                    n_estimators = trial.suggest_int("n_estimators", 50, 400)
                    max_depth = trial.suggest_int("max_depth", 3, 300)
                    min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
                    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)
                    max_featrues = trial.suggest_categorical(
                        "max_features", ["sqrt", "log2"]
                    )
                    criterion = trial.suggest_categorical(
                        "criterion", ["squared_error", "absolute_error", "poisson","friedman_mse"]
                    )

                    clf = RandomForestRegressor(
                        n_estimators=n_estimators,
                        max_depth=max_depth,
                        min_samples_split=min_samples_split,
                        min_samples_leaf=min_samples_leaf,
                        max_features=max_featrues,
                        criterion=criterion,
                        random_state=random_state,
                        n_jobs=-1,
                    )

                elif model_name == "LightGBM":

                    # LightGBMのハイパーパラメータ
                    boosting_type = trial.suggest_categorical(
                        "boosting_type", ["gbdt", "dart"]
                    )
                    reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
                    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
                    num_leaves = trial.suggest_int("num_leaves", 2, 256)
                    colsample_bytree = trial.suggest_loguniform(
                        "colsample_bytree", 0.4, 1.0
                    )
                    subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
                    subsample_freq = trial.suggest_int("subsample_freq", 1, 10)
                    min_child_samples = trial.suggest_int("min_child_samples", 5, 100)
                    max_depth = trial.suggest_int("max_depth", 3, 20)
                    learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)
                    n_estimators = trial.suggest_int("n_estimators", 10, 1000)

                    clf = LGBMRegressor(
                        boosting_type=boosting_type,
                        reg_alpha=reg_alpha,
                        reg_lambda=reg_lambda,
                        num_leaves=num_leaves,
                        colsample_bytree=colsample_bytree,
                        subsample=subsample,
                        subsample_freq=subsample_freq,
                        min_child_samples=min_child_samples,
                        max_depth=max_depth,
                        learning_rate=learning_rate,
                        n_estimators=n_estimators,
                        random_state=random_state,
                        n_jobs=-1,
                    )
                elif model_name == "TabPFN":
                    clf = TabPFNRegressor()

                else:
                    raise ValueError("Invalid classifier name")

                clf.fit(X_train, y_train)
                y_pred = clf.predict(X_test)
                mae = mean_absolute_error(y_test, y_pred,)
                return mae

            # Studyの作成と最適化の実行
            study = optuna.create_study(direction="minimize")
            study.optimize(objective, n_trials=n_trials)

            best_params = study.best_trial.params
            if best_mae > study.best_trial.value:
                self.study = study
                self.model_name = model_name

        if model_name in model_mapping:
            self.model = model_mapping[model_name](**best_params)
            self.model.fit(X_train, y_train)
        else:
            raise ValueError(f"Invalid classifier name {model_name}")

        # 可視化 (必要に応じて)
        optuna.visualization.plot_optimization_history(self.study).show()
        optuna.visualization.plot_param_importances(self.study).show()

    def predict(self, X):
        return self.model.predict(X)



In [None]:
def main():
    x_file = "./X.npy"
    x_data = np.load(x_file, allow_pickle=True)
    x_data = np.array(x_data)

    y_file = (
        "./Y_thread.npy"
    )
    y_data = np.load(y_file, allow_pickle=True)
    y_data = np.array(y_data)

    is_classifier = True

    x_train, x_test, y_train, y_test = train_test_split(
        x_data, y_data, test_size=0.3, random_state=42
    )

    if is_classifier:
        param_predictor = ParamPredictor_Classifier(
            x_train,
            y_train,
            n_trials=50,
            test_size=0.3,
            random_state=42,
        )
    else:
        param_predictor = ParamPredictor_Regressor(
            x_train,
            y_train,
            n_trials=50,
            test_size=0.3,
            random_state=42,
        )

    # 結果の表示
    print("Best trial:")
    trial = param_predictor.study.best_trial
    print(f"  Model: {param_predictor.model_name}")
    print(f"  Value: {trial.value}")

    print("  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    x_pred = param_predictor.predict(x_test)
    if is_classifier:
        print("F1: ", f1_score(y_test, x_pred, average="macro"))
    else:
        print("MAE: ", mean_absolute_error(y_test, x_pred))


if __name__ == "__main__":
    main()


[I 2025-02-06 22:18:08,441] A new study created in memory with name: no-name-258149ba-e526-444c-9c6b-effaf7bbdc5c
[I 2025-02-06 22:18:11,850] Trial 0 finished with value: 0.544973544973545 and parameters: {}. Best is trial 0 with value: 0.544973544973545.
[I 2025-02-06 22:18:11,853] A new study created in memory with name: no-name-5923939d-7a96-4059-85f7-0d09aed1d221
[I 2025-02-06 22:18:19,123] Trial 0 finished with value: 0.5105820105820106 and parameters: {'n_estimators': 516, 'max_depth': 159, 'max_features': 'sqrt', 'min_samples_split': 8, 'min_samples_leaf': 6, 'criterion': 'entropy'}. Best is trial 0 with value: 0.5105820105820106.
[I 2025-02-06 22:18:22,398] Trial 1 finished with value: 0.5072751322751323 and parameters: {'n_estimators': 359, 'max_depth': 263, 'max_features': 'log2', 'min_samples_split': 4, 'min_samples_leaf': 9, 'criterion': 'log_loss'}. Best is trial 0 with value: 0.5105820105820106.
[I 2025-02-06 22:18:32,963] Trial 2 finished with value: 0.5132275132275133 a

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000914 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:17,681] Trial 0 finished with value: 0.49074074074074076 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 0.7659118347711323, 'reg_lambda': 0.1458230505948919, 'num_leaves': 189, 'colsample_bytree': 0.7178078728980054, 'subsample': 0.5923082311132516, 'subsample_freq': 6, 'min_child_samples': 16, 'max_depth': 10, 'learning_rate': 0.6142597598222344, 'n_estimators': 856}. Best is trial 0 with value: 0.49074074074074076.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000851 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3732
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 21
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:23,099] Trial 1 finished with value: 0.4973544973544973 and parameters: {'boosting_type': 'dart', 'reg_alpha': 3.9748687776459264e-08, 'reg_lambda': 0.7512349358542776, 'num_leaves': 198, 'colsample_bytree': 0.6106182450453663, 'subsample': 0.41005883093105405, 'subsample_freq': 10, 'min_child_samples': 85, 'max_depth': 16, 'learning_rate': 0.6724801193820071, 'n_estimators': 404}. Best is trial 1 with value: 0.4973544973544973.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000937 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:24,388] Trial 2 finished with value: 0.3082010582010582 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 3.6438060083991495e-07, 'reg_lambda': 9.638529269205977e-06, 'num_leaves': 235, 'colsample_bytree': 0.8085232780891898, 'subsample': 0.7789232077102847, 'subsample_freq': 9, 'min_child_samples': 54, 'max_depth': 12, 'learning_rate': 1.1362744627421886e-05, 'n_estimators': 167}. Best is trial 1 with value: 0.4973544973544973.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:25,281] Trial 3 finished with value: 0.3082010582010582 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.009343701708825234, 'reg_lambda': 0.011021917743600508, 'num_leaves': 147, 'colsample_bytree': 0.919860197069008, 'subsample': 0.4092303084725091, 'subsample_freq': 4, 'min_child_samples': 12, 'max_depth': 11, 'learning_rate': 7.82026465633499e-08, 'n_estimators': 59}. Best is trial 1 with value: 0.4973544973544973.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:32,983] Trial 4 finished with value: 0.43783068783068785 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 6.072224175200672e-05, 'reg_lambda': 0.003222081071254861, 'num_leaves': 208, 'colsample_bytree': 0.4040531239379014, 'subsample': 0.6390694476012969, 'subsample_freq': 3, 'min_child_samples': 58, 'max_depth': 20, 'learning_rate': 9.687985585466605e-05, 'n_estimators': 987}. Best is trial 1 with value: 0.4973544973544973.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000873 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:33,267] Trial 5 finished with value: 0.3082010582010582 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.00022311559314698004, 'reg_lambda': 1.1647976767080318e-05, 'num_leaves': 214, 'colsample_bytree': 0.673657082631799, 'subsample': 0.5965266980220781, 'subsample_freq': 5, 'min_child_samples': 50, 'max_depth': 19, 'learning_rate': 0.0002569712716045798, 'n_estimators': 30}. Best is trial 1 with value: 0.4973544973544973.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000874 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)
[I 2025-02-06 22:25:35,917] Trial 6 finished with value: 0.5231481481481481 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 0.011536686446326173, 'reg_lambda': 7.534410799166829e-05, 'num_leaves': 27, 'colsample_bytree': 0.4987046841051214, 'subsample': 0.4095172970809031, 'subsample_freq': 1, 'min_child_samples': 10, 'max_depth': 18, 'learning_rate': 0.013333559139572798, 'n_estimators': 584}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000950 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:38,820] Trial 7 finished with value: 0.3941798941798942 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 4.9248728022371526e-06, 'reg_lambda': 0.0002416576350378141, 'num_leaves': 221, 'colsample_bytree': 0.6171306421238308, 'subsample': 0.47270554718617286, 'subsample_freq': 10, 'min_child_samples': 41, 'max_depth': 18, 'learning_rate': 9.103269951082703e-05, 'n_estimators': 458}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000910 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:39,613] Trial 8 finished with value: 0.3082010582010582 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 1.8260710802061326e-05, 'reg_lambda': 8.071169311091562e-09, 'num_leaves': 5, 'colsample_bytree': 0.8310460045758788, 'subsample': 0.6480140566020962, 'subsample_freq': 5, 'min_child_samples': 33, 'max_depth': 18, 'learning_rate': 6.312786660469033e-06, 'n_estimators': 411}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:47,101] Trial 9 finished with value: 0.4351851851851852 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 6.880944220388367e-05, 'reg_lambda': 0.002653997271792803, 'num_leaves': 195, 'colsample_bytree': 0.4363433651357774, 'subsample': 0.7337241490934471, 'subsample_freq': 8, 'min_child_samples': 52, 'max_depth': 10, 'learning_rate': 9.366304769059442e-05, 'n_estimators': 814}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000834 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3732
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 21
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:25:56,863] Trial 10 finished with value: 0.4675925925925926 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.028991033119812302, 'reg_lambda': 2.6281935090182912e-09, 'num_leaves': 26, 'colsample_bytree': 0.5120100360475703, 'subsample': 0.9571398137214778, 'subsample_freq': 1, 'min_child_samples': 100, 'max_depth': 3, 'learning_rate': 0.012597711099135368, 'n_estimators': 657}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000387 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3732
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 21
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:26:00,025] Trial 11 finished with value: 0.47883597883597884 and parameters: {'boosting_type': 'dart', 'reg_alpha': 2.6335545005486454e-09, 'reg_lambda': 8.076456931313516e-07, 'num_leaves': 71, 'colsample_bytree': 0.5483334185222012, 'subsample': 0.415850715430054, 'subsample_freq': 1, 'min_child_samples': 95, 'max_depth': 14, 'learning_rate': 0.8298659319131758, 'n_estimators': 290}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000840 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:26:14,207] Trial 12 finished with value: 0.49867724867724866 and parameters: {'boosting_type': 'dart', 'reg_alpha': 1.7699829012078039e-09, 'reg_lambda': 0.7355057476546504, 'num_leaves': 105, 'colsample_bytree': 0.49959923735323614, 'subsample': 0.4959831081965208, 'subsample_freq': 7, 'min_child_samples': 76, 'max_depth': 16, 'learning_rate': 0.01517472796707371, 'n_estimators': 546}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000959 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:26:29,019] Trial 13 finished with value: 0.5033068783068783 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.0035236298746479672, 'reg_lambda': 1.411396546536545e-06, 'num_leaves': 94, 'colsample_bytree': 0.4737226129843456, 'subsample': 0.5180045724967324, 'subsample_freq': 7, 'min_child_samples': 73, 'max_depth': 15, 'learning_rate': 0.008222680232318551, 'n_estimators': 614}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000935 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:26:32,492] Trial 14 finished with value: 0.49603174603174605 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 0.004375342057755451, 'reg_lambda': 7.784663902485193e-08, 'num_leaves': 57, 'colsample_bytree': 0.4642517332100551, 'subsample': 0.4990851500710868, 'subsample_freq': 3, 'min_child_samples': 70, 'max_depth': 15, 'learning_rate': 0.004984625565862234, 'n_estimators': 618}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000903 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:26:55,288] Trial 15 finished with value: 0.5191798941798942 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.5258048378739865, 'reg_lambda': 3.85214820163176e-07, 'num_leaves': 126, 'colsample_bytree': 0.5448628961897902, 'subsample': 0.526478591504251, 'subsample_freq': 7, 'min_child_samples': 27, 'max_depth': 7, 'learning_rate': 0.04757766165980778, 'n_estimators': 726}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000929 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:27:16,386] Trial 16 finished with value: 0.5185185185185185 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.2819692706558899, 'reg_lambda': 0.00018333852903196655, 'num_leaves': 154, 'colsample_bytree': 0.5508862981472956, 'subsample': 0.4526304046904091, 'subsample_freq': 2, 'min_child_samples': 23, 'max_depth': 6, 'learning_rate': 0.12915963815333775, 'n_estimators': 741}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:27:23,588] Trial 17 finished with value: 0.49272486772486773 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 0.06579897054050253, 'reg_lambda': 9.428084539067791e-08, 'num_leaves': 45, 'colsample_bytree': 0.563267819453352, 'subsample': 0.5466783481500712, 'subsample_freq': 7, 'min_child_samples': 8, 'max_depth': 7, 'learning_rate': 0.0008862741066458629, 'n_estimators': 991}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000928 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:27:48,032] Trial 18 finished with value: 0.5158730158730159 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.0005175492125581612, 'reg_lambda': 0.00011910569815347852, 'num_leaves': 119, 'colsample_bytree': 0.7083562574611044, 'subsample': 0.46158903877450824, 'subsample_freq': 4, 'min_child_samples': 25, 'max_depth': 7, 'learning_rate': 0.0723531584252656, 'n_estimators': 758}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000340 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:27:48,981] Trial 19 finished with value: 0.44312169312169314 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 0.09557301324888008, 'reg_lambda': 8.551133880483123e-07, 'num_leaves': 254, 'colsample_bytree': 0.40184733500825115, 'subsample': 0.5564962326324442, 'subsample_freq': 6, 'min_child_samples': 35, 'max_depth': 3, 'learning_rate': 0.0013392218707051742, 'n_estimators': 266}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:27:58,012] Trial 20 finished with value: 0.5079365079365079 and parameters: {'boosting_type': 'gbdt', 'reg_alpha': 0.0011992772901747236, 'reg_lambda': 6.332368202071655e-08, 'num_leaves': 166, 'colsample_bytree': 0.588701522817942, 'subsample': 0.7118535659275811, 'subsample_freq': 8, 'min_child_samples': 6, 'max_depth': 9, 'learning_rate': 0.056899916762053124, 'n_estimators': 532}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000923 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:28:16,691] Trial 21 finished with value: 0.5105820105820106 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.33586383026723327, 'reg_lambda': 0.0001690009590957551, 'num_leaves': 141, 'colsample_bytree': 0.5344100867861828, 'subsample': 0.45125785962431103, 'subsample_freq': 2, 'min_child_samples': 21, 'max_depth': 6, 'learning_rate': 0.13162627749785694, 'n_estimators': 715}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000821 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3734
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 22
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


[I 2025-02-06 22:28:35,738] Trial 22 finished with value: 0.5105820105820106 and parameters: {'boosting_type': 'dart', 'reg_alpha': 0.7427658279597547, 'reg_lambda': 3.3006945761731234e-05, 'num_leaves': 88, 'colsample_bytree': 0.481428517955798, 'subsample': 0.4423234252710256, 'subsample_freq': 2, 'min_child_samples': 28, 'max_depth': 4, 'learning_rate': 0.11336802107679542, 'n_estimators': 870}. Best is trial 6 with value: 0.5231481481481481.
  reg_alpha = trial.suggest_loguniform("reg_alpha", 1e-9, 1.0)
  reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-9, 1.0)
  colsample_bytree = trial.suggest_loguniform(
  subsample = trial.suggest_loguniform("subsample", 0.4, 1.0)
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1.0)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000415 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3737
[LightGBM] [Info] Number of data points in the train set: 3528, number of used features: 23
[LightGBM] [Info] Start training from score -1.102019
[LightGBM] [Info] Start training from score -1.075913
[LightGBM] [Info] Start training from score -1.118364


In [None]:
%pwd