In [1]:
import re
import tempfile

import lightgbm as lgb
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import mlflow
import numpy as np
import pandas as pd
import seaborn as sns
from hyperopt import STATUS_OK, SparkTrials, Trials, fmin, hp, tpe
from hyperopt.pyll.base import scope
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import ColSpec, Schema
from pyspark.sql import SparkSession
from sklearn.compose import ColumnTransformer
from sklearn.metrics import (
    accuracy_score,
    auc,
    confusion_matrix,
    f1_score,
    log_loss,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sqlalchemy import create_engine

from JapanHorseRaceAnalytics.models.features_20240120_v1 import Features
from JapanHorseRaceAnalytics.utilities.base import get_data_dir
from JapanHorseRaceAnalytics.utilities.metrics import (
    calculate_binary_classifier_statistics,
)
from JapanHorseRaceAnalytics.utilities.mlflow import get_colspecs
from JapanHorseRaceAnalytics.utilities.structured_logger import logger

pd.set_option("display.max_rows", 100)

# Load data

In [2]:
def read_sql_table(table_name, schema, use_cache=True):
    save_path = get_data_dir() / "sql_tables" / f"{table_name}.snappy.parquet"
    save_path.parent.mkdir(exist_ok=True, parents=True)
    if save_path.exists() and use_cache:
        logger.info(f"Read from parquet {save_path}")
        return pd.read_parquet(save_path)
    logger.info(f"Read from sql {schema}.{table_name}")
    engine = create_engine("postgresql://admin:admin@localhost:5432/jrdb")
    data = pd.read_sql_table(table_name, engine, schema=schema)
    data.to_parquet(save_path, index=False, compression="snappy")
    return data


data = read_sql_table("features_20240120_v1", schema="jrdb_curated").astype(
    Features.get_pandas_dtypes()
)

{"event": "Read from parquet /Users/hankehly/Projects/JapanHorseRaceAnalytics/data/sql_tables/features_20240120_v1.snappy.parquet", "level": "info", "timestamp": "2024-01-31T02:10:07.783982Z", "logger": "__main__"}


# Train/test split

In [3]:
X = data.drop(Features.get_label(), axis="columns")
y = data[Features.get_label()]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f"X_train: {X_train.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_train: {y_train.shape}")
print(f"y_test: {y_test.shape}")

X_train: (886532, 288)
X_test: (221634, 288)
y_train: (886532,)
y_test: (221634,)


# Define objective function

In [4]:
def create_objective_fn(
    X_train: pd.DataFrame,
    y_train: pd.Series,
    X_test: pd.DataFrame,
    y_test: pd.Series,
    df_payout: pd.DataFrame,
    experiment_name: str,
):
    """
    df_payout should align with X_test and y_test and have the following columns:
    * レースキー
    * 馬番
    * 距離
    * 年月日
    * 複勝払戻金
    * 年齢
    * レース条件_グレード
    * 場コード
    * payoff - amount won if betting 100 yen.
    """

    def train(params):
        def profit_loss(row, payoff_column_name, bet_amount=100):
            if row["pred"] and row["actual"]:
                payoff = row[payoff_column_name] * (bet_amount / 100)
                return payoff - bet_amount
            elif row["pred"] and not row["actual"]:
                return -bet_amount
            else:
                return 0

        mlflow.set_experiment(experiment_name=experiment_name)
        with mlflow.start_run():
            mlflow.log_params(params)

            numeric_features = X_train.select_dtypes("number").columns.tolist()
            categorical_features = X_train.select_dtypes("category").columns.tolist()
            preprocessor = ColumnTransformer(
                transformers=[
                    ("num", StandardScaler(), numeric_features),
                    (
                        "cat",
                        OneHotEncoder(handle_unknown="ignore"),
                        categorical_features,
                    ),
                ]
            )
            model = Pipeline(
                steps=[
                    ("preprocessor", preprocessor),
                    ("classifier", lgb.LGBMClassifier(**params)),
                ]
            )

            Xy_train = pd.concat((X_train, y_train), axis=1)
            dataset = mlflow.data.from_pandas(Xy_train, targets=y_train.name)
            mlflow.log_input(dataset, context="train")

            input_schema = Schema(get_colspecs(X_train))
            output_schema = Schema([ColSpec("double", y_train.name)])
            signature = ModelSignature(inputs=input_schema, outputs=output_schema)
            input_example = X_train.iloc[:25]
            model.fit(X_train, y_train)
            mlflow.sklearn.log_model(
                sk_model=model,
                signature=signature,
                input_example=input_example,
                artifact_path="model",
            )

            y_pred_proba = model.predict_proba(X_test)[:, 1]
            y_pred = model.predict(X_test)

            metrics = {
                "loss": log_loss(y_test, y_pred_proba),
                "accuracy": accuracy_score(y_test, y_pred),
                "precision": precision_score(y_test, y_pred),
                "recall": recall_score(y_test, y_pred),
                "f1": f1_score(y_test, y_pred),
                "roc_auc": roc_auc_score(y_test, y_pred),
            }
            mlflow.log_metrics(metrics)


            # Calculate payoff rates by group
            results = pd.concat([
                df_payout,
                pd.DataFrame(np.c_[y_test, y_pred, y_pred_proba], columns=["actual", "pred", "pred_proba_true"]),
            ], axis=1)
            payoff_all = calculate_binary_classifier_statistics(results, group_by=None, payoff_column_name="payoff")
            payoff_month = calculate_binary_classifier_statistics(results, group_by=results["年月日"].dt.month, payoff_column_name="payoff")
            payoff_distance = calculate_binary_classifier_statistics(results, group_by=pd.cut(results["距離"], bins=[0, 1400, 1800, 10000]), payoff_column_name="payoff")
            payoff_season = calculate_binary_classifier_statistics(results, group_by=results["年月日"].dt.month % 12 // 3, payoff_column_name="payoff")
            payoff_year = calculate_binary_classifier_statistics(results, group_by=results["年月日"].dt.year, payoff_column_name="payoff")
            payoff_age = calculate_binary_classifier_statistics(results, group_by=pd.cut(results["年齢"], bins=[0, 3, 6, 100]), payoff_column_name="payoff")
            payoff_grade = calculate_binary_classifier_statistics(results, group_by=results["レース条件_グレード"], payoff_column_name="payoff")
            payoff_racetrack = calculate_binary_classifier_statistics(results, group_by=results["場コード"], payoff_column_name="payoff")
            payoff = pd.concat(
                [
                    pd.DataFrame(payoff_all).T.assign(group="all"),
                    pd.DataFrame(payoff_month).T.assign(group="month"),
                    pd.DataFrame(payoff_distance).T.assign(group="distance"),
                    pd.DataFrame(payoff_season).T.assign(group="season"),
                    pd.DataFrame(payoff_year).T.assign(group="year"),
                    pd.DataFrame(payoff_age).T.assign(group="horse_age"),
                    pd.DataFrame(payoff_grade).T.assign(group="grade"),
                    pd.DataFrame(payoff_racetrack).T.assign(group="racetrack"),
                ],
                axis=0,
            ).rename_axis(index="part").reset_index()
            # Move "group" and "part" columns to the first position in this dataframe
            payoff = payoff[["group", "part"] + [c for c in payoff.columns if c not in ["group", "part"]]]

            # Save payoff rates as csv
            with tempfile.NamedTemporaryFile(prefix="payoff_rate_", suffix=".csv") as f:
                payoff.to_csv(f.name, index=False)
                mlflow.log_artifact(f.name)

            # Log payoff rates as metrics
            payoff_metrics = {}
            for group_name, group in payoff.groupby("group"):
                for i, row in group.iterrows():
                    key = re.sub(r"\W", "_", f"payoff_rate_{group_name}_{row['part']}")
                    payoff_metrics[key] = row["payoff_rate"]
            mlflow.log_metrics(payoff_metrics)

            # Plot payoff rates by group
            sns.set(style="whitegrid")
            fig, axes = plt.subplots(2, 4, figsize=(20, 10))
            for (group, df), ax in zip(payoff.groupby("group"), axes.flatten()):
                sns.barplot(x="part", y="payoff_rate", data=df, ax=ax)
                ax.set_title(group)
                ax.set_ylim(0, 150)
                ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
                ax.set_ylabel("Payoff rate")
                ax.set_xlabel("")
                ax.yaxis.set_major_formatter(ticker.PercentFormatter())
            with tempfile.NamedTemporaryFile(prefix="payoff_rate_", suffix=".png") as f:
                plt.tight_layout()
                plt.savefig(f.name)
                plt.close()
                mlflow.log_artifact(f.name)


            # Plot bank balance over time
            results["profit_loss"] = results.apply(profit_loss, args=("payoff", 100), axis=1)
            daily_profit_loss = results.groupby("年月日")["profit_loss"].sum()
            bank_balance = daily_profit_loss.cumsum()
            plt.figure(figsize=(10, 10))
            ax = plt.subplot(1, 1, 1)
            ax.plot(bank_balance.index, bank_balance.values)
            ax.set_title("Bank Balance")
            ax.set_xlabel("Date")
            ax.set_ylabel("Bank Balance")
            ax.grid(True)
            ax.yaxis.set_major_formatter(ticker.StrMethodFormatter("{x:,.0f}"))
            with tempfile.NamedTemporaryFile(prefix="bank_balance_", suffix=".png") as f:
                plt.tight_layout()
                plt.savefig(f.name)
                plt.close()
                mlflow.log_artifact(f.name)


            # Confusion Matrix
            conf_matrix = confusion_matrix(y_test, y_pred)
            _, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
            sns.heatmap(conf_matrix, annot=True, fmt="g", cmap="Blues", ax=ax1)
            ax1.set_xlabel("Predicted")
            ax1.set_ylabel("Actual")
            ax1.set_title("Confusion Matrix")
            sns.heatmap(
                conf_matrix / conf_matrix.sum(axis=1)[:, None],
                annot=True,
                fmt=".2%",
                cmap="Blues",
                ax=ax2,
            )
            ax2.set_xlabel("Predicted")
            ax2.set_ylabel("Actual")
            ax2.set_title("Normalized Confusion Matrix")
            with tempfile.NamedTemporaryFile(
                prefix="confusion_matrix_", suffix=".png"
            ) as f:
                plt.tight_layout()
                plt.savefig(f.name)
                plt.close()
                mlflow.log_artifact(f.name)


            # ROC Curve
            fpr, tpr, _ = roc_curve(y_test, y_pred)
            roc_auc = auc(fpr, tpr)
            _, ax = plt.subplots(figsize=(10, 10))
            ax.plot(fpr, tpr, color="darkorange", lw=2, label="ROC curve (area = %0.2f)" % roc_auc)
            ax.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
            ax.set_xlim([0.0, 1.0])
            ax.set_ylim([0.0, 1.0])
            ax.set_xlabel("False Positive Rate")
            ax.set_ylabel("True Positive Rate")
            ax.set_title("Receiver Operating Characteristic")
            ax.legend(loc="lower right")
            with tempfile.NamedTemporaryFile(prefix="roc_curve_", suffix=".png") as f:
                plt.tight_layout()
                plt.savefig(f.name)
                plt.close()
                mlflow.log_artifact(f.name)


            # Feature Importances
            feature_importances = zip(
                model.named_steps["preprocessor"].get_feature_names_out(),
                model.named_steps["classifier"].feature_importances_,
            )
            feature_importances_df = (
                pd.DataFrame(
                    data=feature_importances, columns=["feature", "importance"]
                )
                .sort_values("importance", ascending=False)
                .reset_index(drop=True)
            )
            with tempfile.NamedTemporaryFile(
                prefix="feature_importance_", suffix=".csv"
            ) as f:
                feature_importances_df.to_csv(f.name, index=False)
                mlflow.log_artifact(f.name)

            font_properties = fm.FontProperties(
                fname="/System/Library/Fonts/ヒラギノ角ゴシック W3.ttc"
            )
            sns.set(style="whitegrid")
            plt.figure(figsize=(10, 12))
            ax = sns.barplot(
                x="importance", y="feature", data=feature_importances_df.iloc[:50]
            )
            ax.set_title("Feature Importances (Top 50)", fontproperties=font_properties)
            ax.set_xlabel("Importance", fontproperties=font_properties)
            ax.set_ylabel("Features", fontproperties=font_properties)
            for label in ax.get_yticklabels():
                label.set_fontproperties(font_properties)
            with tempfile.NamedTemporaryFile(
                prefix="feature_importance_", suffix=".png"
            ) as f:
                plt.tight_layout()
                plt.savefig(f.name)
                plt.close()
                mlflow.log_artifact(f.name)

            return {"status": STATUS_OK, "params": params, "model": model, **metrics}

    return train

# Train classifiers

In [6]:
space = {
    "boosting_type": hp.choice("boosting_type", ["gbdt", "dart", "goss"]),
    "learning_rate": hp.loguniform("learning_rate", -5, 0),  # between e^-5 and 1
    "n_estimators": scope.int(hp.quniform("n_estimators", 100, 1000, 1)),
    "max_depth": scope.int(hp.quniform("max_depth", 3, 10, 1)),
    "num_leaves": scope.int(hp.quniform("num_leaves", 20, 150, 1)),
    "min_child_samples": scope.int(hp.quniform("min_child_samples", 20, 500, 1)),
    "feature_fraction": hp.uniform("feature_fraction", 0.5, 1.0),
    "lambda_l1": hp.uniform("lambda_l1", 0, 5),
    "lambda_l2": hp.uniform("lambda_l2", 0, 5),
    "min_split_gain": hp.uniform("min_split_gain", 0, 1),
    "min_child_weight": hp.uniform("min_child_weight", 0.001, 10),
    "subsample": hp.uniform("subsample", 0.5, 1),
    "colsample_bytree": hp.uniform("colsample_bytree", 0.5, 1),
    "reg_alpha": hp.uniform("reg_alpha", 0.0, 1.0),
    "reg_lambda": hp.uniform("reg_lambda", 0.0, 1.0),
    "objective": "binary",
    "class_weight": "balanced",
    "verbose": -1,
    "seed": 80,
}

In [7]:
# http://localhost:4040
spark = (
    SparkSession.builder
    .config("spark.driver.memory", "21g")
    .getOrCreate()
)

24/01/31 11:10:38 WARN Utils: Your hostname, Hanks-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 192.168.40.105 instead (on interface en0)
24/01/31 11:10:38 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/01/31 11:10:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


## Turf

In [7]:
X_train_turf = X_train[X_train["トラック種別"] == "芝"][Features.get_feature_names_by_tag("芝")]
y_train_turf = y_train[X_train["トラック種別"] == "芝"]
X_test_turf = X_test[X_test["トラック種別"] == "芝"][Features.get_feature_names_by_tag("芝")]
y_test_turf = y_test[X_test["トラック種別"] == "芝"]

df_payout_turf = data.iloc[X_test_turf.index].reset_index(drop=True)[
    ["レースキー", "馬番", "距離", "年月日", "複勝払戻金", "年齢", "レース条件_グレード", "場コード"]
].rename(columns={"複勝払戻金": "payoff"})

fn_turf = create_objective_fn(
    X_train_turf,
    y_train_turf,
    X_test_turf,
    y_test_turf,
    df_payout=df_payout_turf,
    experiment_name="20240126_binary_weather__turf",
)

In [8]:
trials_turf = SparkTrials(parallelism=3, spark_session=spark)
fmin(fn=fn_turf, space=space, algo=tpe.suggest, max_evals=50, trials=trials_turf)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

build_posterior_wrapper took 0.002211 seconds
TPE using 0 trials
build_posterior_wrapper took 0.001867 seconds
TPE using 1/1 trials with best loss inf
build_posterior_wrapper took 0.001949 seconds
TPE using 2/2 trials with best loss inf
build_posterior_wrapper took 0.001930 seconds
TPE using 3/3 trials with best loss inf
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.7904726659658795, 'feature_fraction': 0.5019532468167057, 'lambda_l1': 2.3786721315951813, 'lambda_l2': 3.2628428213185967, 'learning_rate': 0.029719259373175457, 'max_depth': 4, 'min_child_samples': 132, 'min_child_weight': 9.570406959400078, 'min_split_gain': 0.027299036166272717, 'n_estimators': 516, 'num_leaves': 112, 'objective': 'binary', 'reg_alpha': 0.5720272070067735, 'reg_lambda': 0.7662373586369929, 'seed': 80, 'subsample': 0.7876113451617477, 'verbose': -1}
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.5580550539253586, 'feature_fraction': 0.71645093184

  2%|▏         | 1/50 [01:09<56:34, 69.28s/trial, best loss: 0.4841962467879715]

build_posterior_wrapper took 0.001718 seconds
TPE using 4/4 trials with best loss 0.484196
Closing down clientserver connection                                            
[Stage 1:>                                                          (0 + 1) / 1]

  4%|▍         | 2/50 [01:10<23:17, 29.12s/trial, best loss: 0.47867083257749593]

build_posterior_wrapper took 0.001691 seconds
TPE using 5/5 trials with best loss 0.478671
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.5853108223579935, 'feature_fraction': 0.8922071016423263, 'lambda_l1': 1.146013937399073, 'lambda_l2': 2.1351993926504313, 'learning_rate': 0.006749143218245603, 'max_depth': 10, 'min_child_samples': 268, 'min_child_weight': 9.794635569537697, 'min_split_gain': 0.07444145431403859, 'n_estimators': 949, 'num_leaves': 115, 'objective': 'binary', 'reg_alpha': 0.8745835036595828, 'reg_lambda': 0.0575594813968161, 'seed': 80, 'subsample': 0.971569578132058, 'verbose': -1}
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.8864485856805864, 'feature_fraction': 0.7768132673528465, 'lambda_l1': 4.139463264632079, 'lambda_l2': 1.1255864011791195, 'learning_rate': 0.04181543028232509, 'max_depth': 5, 'min_child_samples': 280, 'min_child_weight': 1.6334596418113934, 'min_split_gain': 0.8738919950129121, 'n_e

  6%|▌         | 3/50 [01:31<19:56, 25.46s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.015003 secondse 4:>                  (0 + 1) / 1]
TPE using 6/6 trials with best loss 0.462719
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.8929936855766141, 'feature_fraction': 0.8658417769262625, 'lambda_l1': 4.779664298566313, 'lambda_l2': 2.7973608070254343, 'learning_rate': 0.01164147899857619, 'max_depth': 8, 'min_child_samples': 166, 'min_child_weight': 5.404625541513295, 'min_split_gain': 0.07773507909727462, 'n_estimators': 502, 'num_leaves': 89, 'objective': 'binary', 'reg_alpha': 0.9347861615287069, 'reg_lambda': 0.32821012808380023, 'seed': 80, 'subsample': 0.8841669939264953, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


  8%|▊         | 4/50 [02:34<31:03, 40.51s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.001827 secondse 5:>                  (0 + 1) / 1]
TPE using 7/7 trials with best loss 0.462719
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.9613034033936487, 'feature_fraction': 0.9851504005572174, 'lambda_l1': 3.0436100916362, 'lambda_l2': 0.32649189285143654, 'learning_rate': 0.01876423897076063, 'max_depth': 3, 'min_child_samples': 321, 'min_child_weight': 7.941888461228998, 'min_split_gain': 0.048620316290217236, 'n_estimators': 301, 'num_leaves': 47, 'objective': 'binary', 'reg_alpha': 0.8583607747911524, 'reg_lambda': 0.4881342611288777, 'seed': 80, 'subsample': 0.8746828928934344, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 3:>                  (0 + 1) / 1][Stage 5:>                  (0 + 1) / 1]

 10%|█         | 5/50 [03:43<37:53, 50.53s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.001686 seconds
TPE using 8/8 trials with best loss 0.462719
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.7987126072664537, 'feature_fraction': 0.5125755955107701, 'lambda_l1': 0.577282990571994, 'lambda_l2': 0.010619973451780895, 'learning_rate': 0.010457094793571563, 'max_depth': 3, 'min_child_samples': 246, 'min_child_weight': 9.136041405898444, 'min_split_gain': 0.725452992594342, 'n_estimators': 365, 'num_leaves': 30, 'objective': 'binary', 'reg_alpha': 0.24463657800749783, 'reg_lambda': 0.21809426802656662, 'seed': 80, 'subsample': 0.8731234117101018, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 3:>                  (0 + 1) / 1][Stage 5:>                  (0 + 1) / 1]

 12%|█▏        | 6/50 [04:38<38:14, 52.14s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.001821 seconds
TPE using 9/9 trials with best loss 0.462719
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.8680579398142589, 'feature_fraction': 0.5564523622118337, 'lambda_l1': 3.6392550367142102, 'lambda_l2': 1.385392778731609, 'learning_rate': 0.9024085384386732, 'max_depth': 9, 'min_child_samples': 64, 'min_child_weight': 3.3575052246542003, 'min_split_gain': 0.0977130921379501, 'n_estimators': 579, 'num_leaves': 143, 'objective': 'binary', 'reg_alpha': 0.3461685725445287, 'reg_lambda': 0.6411327050303577, 'seed': 80, 'subsample': 0.815428392698144, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 14%|█▍        | 7/50 [04:48<27:30, 38.38s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.002581 secondse 8:>                  (0 + 1) / 1]
TPE using 10/10 trials with best loss 0.462719
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.8877034586248438, 'feature_fraction': 0.6168860692853875, 'lambda_l1': 4.251811997885999, 'lambda_l2': 3.838737623497755, 'learning_rate': 0.013537988963605434, 'max_depth': 6, 'min_child_samples': 425, 'min_child_weight': 0.828521410713625, 'min_split_gain': 0.4767512648768536, 'n_estimators': 546, 'num_leaves': 67, 'objective': 'binary', 'reg_alpha': 0.02865813931821881, 'reg_lambda': 0.2715579382424472, 'seed': 80, 'subsample': 0.6774905721888246, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 8:>                  (0 + 1) / 1][Stage 9:>                  (0 + 1) / 1]

 16%|█▌        | 8/50 [05:05<22:07, 31.60s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.001956 seconds
TPE using 11/11 trials with best loss 0.462719
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.7294612342131842, 'feature_fraction': 0.9849383703780609, 'lambda_l1': 0.19636813386159913, 'lambda_l2': 2.9007350363189843, 'learning_rate': 0.00884221039842917, 'max_depth': 9, 'min_child_samples': 238, 'min_child_weight': 2.4341590759670733, 'min_split_gain': 0.5307197032993004, 'n_estimators': 830, 'num_leaves': 30, 'objective': 'binary', 'reg_alpha': 0.7207376197601789, 'reg_lambda': 0.7183741427218695, 'seed': 80, 'subsample': 0.9175765770742261, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 18%|█▊        | 9/50 [06:22<31:09, 45.59s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.001963 secondse 10:>                 (0 + 1) / 1]
TPE using 12/12 trials with best loss 0.462719
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.702416744794118, 'feature_fraction': 0.6624219248860517, 'lambda_l1': 2.860741320554983, 'lambda_l2': 4.705838903999382, 'learning_rate': 0.868181853546259, 'max_depth': 4, 'min_child_samples': 344, 'min_child_weight': 8.785753822725564, 'min_split_gain': 0.25978537778450694, 'n_estimators': 341, 'num_leaves': 124, 'objective': 'binary', 'reg_alpha': 0.022812477971287826, 'reg_lambda': 0.7702366343658242, 'seed': 80, 'subsample': 0.6755151780913133, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 20%|██        | 10/50 [07:11<31:08, 46.71s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.001994 secondse 11:>                 (0 + 1) / 1]
TPE using 13/13 trials with best loss 0.462719
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.7873482547515422, 'feature_fraction': 0.7213047331754678, 'lambda_l1': 4.983538749354361, 'lambda_l2': 4.413216159946537, 'learning_rate': 0.16022846455760315, 'max_depth': 8, 'min_child_samples': 149, 'min_child_weight': 0.23924690648964003, 'min_split_gain': 0.36951118322134113, 'n_estimators': 838, 'num_leaves': 93, 'objective': 'binary', 'reg_alpha': 0.9138215117630123, 'reg_lambda': 0.6966360144452923, 'seed': 80, 'subsample': 0.8905382340354766, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 22%|██▏       | 11/50 [07:41<27:03, 41.64s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.002773 secondse 12:>                 (0 + 1) / 1]
TPE using 14/14 trials with best loss 0.462719
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.5689941751594574, 'feature_fraction': 0.8851835658340381, 'lambda_l1': 4.459766705874408, 'lambda_l2': 3.819998545471604, 'learning_rate': 0.30678226133776787, 'max_depth': 7, 'min_child_samples': 100, 'min_child_weight': 7.209623331139664, 'min_split_gain': 0.24393771954138366, 'n_estimators': 255, 'num_leaves': 119, 'objective': 'binary', 'reg_alpha': 0.957106536677059, 'reg_lambda': 0.8567923190178491, 'seed': 80, 'subsample': 0.738561240050267, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 24%|██▍       | 12/50 [09:17<36:55, 58.31s/trial, best loss: 0.46271864532393175]

build_posterior_wrapper took 0.005428 secondse 13:>                 (0 + 1) / 1]
TPE using 15/15 trials with best loss 0.462719
Closing down clientserver connection                                            
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6674818151335365, 'feature_fraction': 0.662210460438191, 'lambda_l1': 4.268147411940093, 'lambda_l2': 1.9295130965324114, 'learning_rate': 0.01720169617516588, 'max_depth': 5, 'min_child_samples': 315, 'min_child_weight': 1.397070530385169, 'min_split_gain': 0.6630207397566381, 'n_estimators': 697, 'num_leaves': 72, 'objective': 'binary', 'reg_alpha': 0.9952258156222533, 'reg_lambda': 0.4221035702554543, 'seed': 80, 'subsample': 0.7626306956883547, 'verbose': -1}


 26%|██▌       | 13/50 [09:22<26:00, 42.17s/trial, best loss: 0.4613880364105149] 

  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
build_posterior_wrapper took 0.001664 seconds
TPE using 16/16 trials with best loss 0.461388
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.9611056330822032, 'feature_fraction': 0.6929991556186283, 'lambda_l1': 3.999555060263136, 'lambda_l2': 1.5150982526567403, 'learning_rate': 0.20282683089168824, 'max_depth': 5, 'min_child_samples': 175, 'min_child_weight': 4.242476691731571, 'min_split_gain': 0.09923824916133472, 'n_estimators': 297, 'num_leaves': 71, 'objective': 'binary', 'reg_alpha': 0.6427375097454283, 'reg_lambda': 0.25675784657590095, 'seed': 80, 'subsample': 0.5394698203166565, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 14:>                 (0 + 1) / 1][Stage 15:>                 (0 + 1) / 1]

 28%|██▊       | 14/50 [09:33<19:39, 32.77s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.001702 seconds
TPE using 17/17 trials with best loss 0.435481
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.9476756436898445, 'feature_fraction': 0.9204579807037468, 'lambda_l1': 1.4723237567456944, 'lambda_l2': 0.4898977719343123, 'learning_rate': 0.0444175825165788, 'max_depth': 9, 'min_child_samples': 483, 'min_child_weight': 1.5898077739493601, 'min_split_gain': 0.9255536736431571, 'n_estimators': 188, 'num_leaves': 125, 'objective': 'binary', 'reg_alpha': 0.26819219889552415, 'reg_lambda': 0.11231686493159931, 'seed': 80, 'subsample': 0.5671692861738452, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 30%|███       | 15/50 [10:19<21:18, 36.52s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.002151 secondse 16:>                 (0 + 1) / 1]
TPE using 18/18 trials with best loss 0.435481
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.5379898919602251, 'feature_fraction': 0.5628927686741144, 'lambda_l1': 1.4901388626452683, 'lambda_l2': 1.6768746113569544, 'learning_rate': 0.012012874845256443, 'max_depth': 5, 'min_child_samples': 405, 'min_child_weight': 0.4952274259604197, 'min_split_gain': 0.3785458263227518, 'n_estimators': 763, 'num_leaves': 26, 'objective': 'binary', 'reg_alpha': 0.8702170883588163, 'reg_lambda': 0.23876074526130497, 'seed': 80, 'subsample': 0.8577115445462388, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 32%|███▏      | 16/50 [10:40<18:03, 31.88s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.001736 secondse 17:>                 (0 + 1) / 1]
TPE using 19/19 trials with best loss 0.435481
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.8067196380007391, 'feature_fraction': 0.7388384824606553, 'lambda_l1': 0.6530482695252071, 'lambda_l2': 4.70752332089435, 'learning_rate': 0.12228156145333145, 'max_depth': 6, 'min_child_samples': 321, 'min_child_weight': 7.052768139371294, 'min_split_gain': 0.15454103397749042, 'n_estimators': 354, 'num_leaves': 139, 'objective': 'binary', 'reg_alpha': 0.2061763202173632, 'reg_lambda': 0.19155849716576556, 'seed': 80, 'subsample': 0.9758306695181256, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 34%|███▍      | 17/50 [11:04<16:14, 29.54s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.003163 secondse 18:>                 (0 + 1) / 1]
TPE using 20/20 trials with best loss 0.435481
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.5339284068199746, 'feature_fraction': 0.5117939673641019, 'lambda_l1': 1.980847182316165, 'lambda_l2': 0.39253715404161615, 'learning_rate': 0.009764043453157935, 'max_depth': 6, 'min_child_samples': 187, 'min_child_weight': 6.116755286834556, 'min_split_gain': 0.7662008425695781, 'n_estimators': 649, 'num_leaves': 130, 'objective': 'binary', 'reg_alpha': 0.22790613783806724, 'reg_lambda': 0.17569480375324287, 'seed': 80, 'subsample': 0.9101620797425627, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
Closing down clientserver connection


 38%|███▊      | 19/50 [11:41<12:39, 24.49s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.001913 seconds                       (0 + 1) / 1]
TPE using 21/21 trials with best loss 0.435481
build_posterior_wrapper took 0.001681 seconds
TPE using 22/22 trials with best loss 0.435481
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.6285810846784619, 'feature_fraction': 0.8110642580325372, 'lambda_l1': 4.902749707071153, 'lambda_l2': 4.0302603018571785, 'learning_rate': 0.39796505495609613, 'max_depth': 7, 'min_child_samples': 24, 'min_child_weight': 7.787085196767387, 'min_split_gain': 0.24664977514257164, 'n_estimators': 219, 'num_leaves': 100, 'objective': 'binary', 'reg_alpha': 0.9919563078282967, 'reg_lambda': 0.9527697861397324, 'seed': 80, 'subsample': 0.6128704386005981, 'verbose': -1}
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6306455453328172, 'feature_fraction': 0.826478946417056, 'lambda_l1': 3.4297527078898815, 'lambda_l2': 4.095521552971976, 'learning_rate': 0.3840313508708587

 40%|████      | 20/50 [12:34<15:48, 31.63s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.002034 seconds
TPE using 23/23 trials with best loss 0.435481
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.843805768134387, 'feature_fraction': 0.7782271111222382, 'lambda_l1': 4.935096485360809, 'lambda_l2': 4.988446883178382, 'learning_rate': 0.3487979018002433, 'max_depth': 8, 'min_child_samples': 26, 'min_child_weight': 5.686697581833384, 'min_split_gain': 0.3478026499129396, 'n_estimators': 995, 'num_leaves': 98, 'objective': 'binary', 'reg_alpha': 0.4499580717466838, 'reg_lambda': 0.8704382125075835, 'seed': 80, 'subsample': 0.6744778351264757, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 21:>                 (0 + 1) / 1][Stage 22:>                 (0 + 1) / 1]

 42%|████▏     | 21/50 [13:12<16:07, 33.35s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.001754 seconds
TPE using 24/24 trials with best loss 0.435481
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.5026137994210316, 'feature_fraction': 0.930467656901991, 'lambda_l1': 3.5999851009301964, 'lambda_l2': 3.3962237645218227, 'learning_rate': 0.0813966022700543, 'max_depth': 7, 'min_child_samples': 121, 'min_child_weight': 3.3083419464075154, 'min_split_gain': 0.4941577675264467, 'n_estimators': 439, 'num_leaves': 53, 'objective': 'binary', 'reg_alpha': 0.7208951824411926, 'reg_lambda': 0.6120452764244098, 'seed': 80, 'subsample': 0.8031582716604928, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 44%|████▍     | 22/50 [13:28<13:14, 28.38s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.001880 secondse 23:>                 (0 + 1) / 1]
TPE using 25/25 trials with best loss 0.435481
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.6314929476653863, 'feature_fraction': 0.8071630114441903, 'lambda_l1': 4.823163485805343, 'lambda_l2': 4.282014202049706, 'learning_rate': 0.5589044240328516, 'max_depth': 10, 'min_child_samples': 33, 'min_child_weight': 8.27495974671147, 'min_split_gain': 0.21389155078336972, 'n_estimators': 635, 'num_leaves': 103, 'objective': 'binary', 'reg_alpha': 0.9817736251100619, 'reg_lambda': 0.986017544434064, 'seed': 80, 'subsample': 0.5965108793091208, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 22:>                 (0 + 1) / 1][Stage 24:>                 (0 + 1) / 1]

 46%|████▌     | 23/50 [14:38<18:03, 40.13s/trial, best loss: 0.4354805139895824]

build_posterior_wrapper took 0.001857 seconds
TPE using 26/26 trials with best loss 0.435481
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6805836629282799, 'feature_fraction': 0.8127772657710398, 'lambda_l1': 3.49483653982332, 'lambda_l2': 4.49090021229913, 'learning_rate': 0.20798141965478883, 'max_depth': 8, 'min_child_samples': 202, 'min_child_weight': 4.48034917630471, 'min_split_gain': 0.4012724301072693, 'n_estimators': 910, 'num_leaves': 80, 'objective': 'binary', 'reg_alpha': 0.7576890778527182, 'reg_lambda': 0.852479930490424, 'seed': 80, 'subsample': 0.7122227344299017, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 48%|████▊     | 24/50 [16:48<28:35, 65.96s/trial, best loss: 0.43532813899715767]

build_posterior_wrapper took 0.001928 secondse 24:>                 (0 + 1) / 1]
TPE using 27/27 trials with best loss 0.435328
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.766463104819285, 'feature_fraction': 0.8607257204203294, 'lambda_l1': 3.100070197827928, 'lambda_l2': 4.959392203425274, 'learning_rate': 0.5424853718202938, 'max_depth': 7, 'min_child_samples': 74, 'min_child_weight': 3.272044240050347, 'min_split_gain': 0.5650016949064407, 'n_estimators': 885, 'num_leaves': 86, 'objective': 'binary', 'reg_alpha': 0.5634533988410516, 'reg_lambda': 0.9205280152549611, 'seed': 80, 'subsample': 0.8341284899778506, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 50%|█████     | 25/50 [18:17<30:11, 72.47s/trial, best loss: 0.43532813899715767]

build_posterior_wrapper took 0.001698 secondse 24:>                 (0 + 1) / 1]
TPE using 28/28 trials with best loss 0.435328
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6913229938073665, 'feature_fraction': 0.744082650921905, 'lambda_l1': 2.3987794378153477, 'lambda_l2': 4.497350810306454, 'learning_rate': 0.20015612484325063, 'max_depth': 8, 'min_child_samples': 230, 'min_child_weight': 0.012998673770242775, 'min_split_gain': 0.4420211664356464, 'n_estimators': 987, 'num_leaves': 47, 'objective': 'binary', 'reg_alpha': 0.7906131452851602, 'reg_lambda': 0.6474749457383249, 'seed': 80, 'subsample': 0.9384555908469187, 'verbose': -1}
Closing down clientserver connection                                            
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]


 52%|█████▏    | 26/50 [18:22<21:05, 52.73s/trial, best loss: 0.43532813899715767]

build_posterior_wrapper took 0.001674 seconds
TPE using 29/29 trials with best loss 0.435328
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.8283787116097534, 'feature_fraction': 0.6934864190176482, 'lambda_l1': 3.7805101258930067, 'lambda_l2': 3.4131840224404804, 'learning_rate': 0.07605511170502853, 'max_depth': 8, 'min_child_samples': 197, 'min_child_weight': 4.047456238226003, 'min_split_gain': 0.3200438073169177, 'n_estimators': 742, 'num_leaves': 59, 'objective': 'binary', 'reg_alpha': 0.6306927542785307, 'reg_lambda': 0.7787405969665895, 'seed': 80, 'subsample': 0.7765233035207197, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 54%|█████▍    | 27/50 [20:06<25:58, 67.74s/trial, best loss: 0.43532813899715767]

build_posterior_wrapper took 0.005810 seconds
TPE using 30/30 trials with best loss 0.435328
Closing down clientserver connection                                            
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.7681047889805136, 'feature_fraction': 0.7759261541920912, 'lambda_l1': 3.2792501191388768, 'lambda_l2': 2.3979284898973408, 'learning_rate': 0.19696460807260854, 'max_depth': 10, 'min_child_samples': 147, 'min_child_weight': 2.7906694558117255, 'min_split_gain': 0.42290948740515816, 'n_estimators': 881, 'num_leaves': 76, 'objective': 'binary', 'reg_alpha': 0.5403230156400805, 'reg_lambda': 0.8363099133153092, 'seed': 80, 'subsample': 0.6373530776274022, 'verbose': -1}
[Stage 27:>                 (0 + 1) / 1][Stage 29:>                 (0 + 1) / 1]

 56%|█████▌    | 28/50 [20:11<18:02, 49.19s/trial, best loss: 0.43532813899715767]

build_posterior_wrapper took 0.001737 seconds
TPE using 31/31 trials with best loss 0.435328
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6604852208190072, 'feature_fraction': 0.6333631481184456, 'lambda_l1': 2.724049616629838, 'lambda_l2': 3.113070276958827, 'learning_rate': 0.11973058901268174, 'max_depth': 9, 'min_child_samples': 211, 'min_child_weight': 0.013459183262305302, 'min_split_gain': 0.16613771226452528, 'n_estimators': 820, 'num_leaves': 108, 'objective': 'binary', 'reg_alpha': 0.45542133352012426, 'reg_lambda': 0.5597805344706341, 'seed': 80, 'subsample': 0.7079427865163092, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 58%|█████▊    | 29/50 [20:23<13:21, 38.16s/trial, best loss: 0.43532813899715767]

build_posterior_wrapper took 0.001825 secondse 30:>                 (0 + 1) / 1]
TPE using 32/32 trials with best loss 0.435328
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.7211398439118566, 'feature_fraction': 0.7162541877346603, 'lambda_l1': 1.9717498090388752, 'lambda_l2': 3.6612014501563586, 'learning_rate': 0.05403673819663576, 'max_depth': 8, 'min_child_samples': 134, 'min_child_weight': 5.219394395849746, 'min_split_gain': 0.5839705134837191, 'n_estimators': 761, 'num_leaves': 61, 'objective': 'binary', 'reg_alpha': 0.6570776125057138, 'reg_lambda': 0.7027104207558045, 'seed': 80, 'subsample': 0.7968332145962282, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 29:>                 (0 + 1) / 1][Stage 30:>                 (0 + 1) / 1]

 60%|██████    | 30/50 [21:57<18:14, 54.73s/trial, best loss: 0.43532813899715767]

build_posterior_wrapper took 0.001899 seconds
TPE using 33/33 trials with best loss 0.435328
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6044299764252568, 'feature_fraction': 0.5776534738401967, 'lambda_l1': 4.544303469492055, 'lambda_l2': 4.451043822493469, 'learning_rate': 0.16968794104274562, 'max_depth': 9, 'min_child_samples': 284, 'min_child_weight': 0.9944888525526654, 'min_split_gain': 0.00028505503410253263, 'n_estimators': 970, 'num_leaves': 95, 'objective': 'binary', 'reg_alpha': 0.9089125252189225, 'reg_lambda': 0.7135903041435163, 'seed': 80, 'subsample': 0.8314527073332705, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 62%|██████▏   | 31/50 [22:20<14:20, 45.31s/trial, best loss: 0.43193733304133497]

build_posterior_wrapper took 0.001952 secondse 32:>                 (0 + 1) / 1]
TPE using 34/34 trials with best loss 0.431937
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.9955740034912712, 'feature_fraction': 0.8360609336271964, 'lambda_l1': 3.9449235607413575, 'lambda_l2': 4.810874744086842, 'learning_rate': 0.028641954236639955, 'max_depth': 10, 'min_child_samples': 386, 'min_child_weight': 2.2927343446183013, 'min_split_gain': 0.6636439560636352, 'n_estimators': 888, 'num_leaves': 80, 'objective': 'binary', 'reg_alpha': 0.8015408091106311, 'reg_lambda': 0.8125052474267079, 'seed': 80, 'subsample': 0.7008382399767011, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 64%|██████▍   | 32/50 [22:36<10:58, 36.59s/trial, best loss: 0.4296883308644132] 

build_posterior_wrapper took 0.002416 secondse 33:>                 (0 + 1) / 1]
TPE using 35/35 trials with best loss 0.429688
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.7641984742782131, 'feature_fraction': 0.7692480967896649, 'lambda_l1': 3.2888346193881492, 'lambda_l2': 2.379444054864621, 'learning_rate': 0.24988725464707923, 'max_depth': 10, 'min_child_samples': 156, 'min_child_weight': 2.782752489204091, 'min_split_gain': 0.43072687360819084, 'n_estimators': 465, 'num_leaves': 79, 'objective': 'binary', 'reg_alpha': 0.364972407740482, 'reg_lambda': 0.9010877430750017, 'seed': 80, 'subsample': 0.6396842336871774, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 66%|██████▌   | 33/50 [23:57<14:06, 49.79s/trial, best loss: 0.4296883308644132]

build_posterior_wrapper took 0.002030 secondse 33:>                 (0 + 1) / 1]
TPE using 36/36 trials with best loss 0.429688
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6506063254341224, 'feature_fraction': 0.6166845309748348, 'lambda_l1': 2.721306349847701, 'lambda_l2': 2.944961259761126, 'learning_rate': 0.12883185836541747, 'max_depth': 10, 'min_child_samples': 208, 'min_child_weight': 3.75528684773529, 'min_split_gain': 0.16879847633031778, 'n_estimators': 614, 'num_leaves': 111, 'objective': 'binary', 'reg_alpha': 0.15123903470849115, 'reg_lambda': 0.5675942761937514, 'seed': 80, 'subsample': 0.511597411949451, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 68%|██████▊   | 34/50 [24:33<12:12, 45.76s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.002026 secondse 35:>                 (0 + 1) / 1]
TPE using 37/37 trials with best loss 0.423271
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.9138221966649583, 'feature_fraction': 0.6503452522554583, 'lambda_l1': 2.1234923490711592, 'lambda_l2': 2.411855463776733, 'learning_rate': 0.10818380930954277, 'max_depth': 10, 'min_child_samples': 263, 'min_child_weight': 2.1177553526644615, 'min_split_gain': 0.31263418887438255, 'n_estimators': 673, 'num_leaves': 149, 'objective': 'binary', 'reg_alpha': 0.4925603372488791, 'reg_lambda': 0.515180454628012, 'seed': 80, 'subsample': 0.6321533870125992, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 70%|███████   | 35/50 [24:47<09:03, 36.27s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.001804 secondse 36:>                 (0 + 1) / 1]
TPE using 38/38 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.5964977555212246, 'feature_fraction': 0.5489336758255299, 'lambda_l1': 4.51308476860476, 'lambda_l2': 3.0392560925116214, 'learning_rate': 0.0678789597541911, 'max_depth': 9, 'min_child_samples': 285, 'min_child_weight': 1.033176948363262, 'min_split_gain': 0.011889904318948605, 'n_estimators': 994, 'num_leaves': 107, 'objective': 'binary', 'reg_alpha': 0.40340342445166216, 'reg_lambda': 0.36183248102070237, 'seed': 80, 'subsample': 0.8409772771890719, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 72%|███████▏  | 36/50 [25:52<10:26, 44.75s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.001983 secondse 37:>                 (0 + 1) / 1]
TPE using 39/39 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.596114211026241, 'feature_fraction': 0.5802422948821309, 'lambda_l1': 0.011152649771711154, 'lambda_l2': 3.1416866534019094, 'learning_rate': 0.6216047249279809, 'max_depth': 9, 'min_child_samples': 356, 'min_child_weight': 0.8873289198214215, 'min_split_gain': 0.02146223930115676, 'n_estimators': 803, 'num_leaves': 132, 'objective': 'binary', 'reg_alpha': 0.11824130157505802, 'reg_lambda': 0.44238963061838205, 'seed': 80, 'subsample': 0.7666995350571719, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 74%|███████▍  | 37/50 [26:47<10:23, 47.96s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.002181 secondse 38:>                 (0 + 1) / 1]
TPE using 40/40 trials with best loss 0.423271
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.5076602690297195, 'feature_fraction': 0.5941606239748867, 'lambda_l1': 2.6667973698285548, 'lambda_l2': 0.8962174758520853, 'learning_rate': 0.03468660215287333, 'max_depth': 9, 'min_child_samples': 450, 'min_child_weight': 0.234453725713471, 'min_split_gain': 0.1632782986223128, 'n_estimators': 948, 'num_leaves': 93, 'objective': 'binary', 'reg_alpha': 0.47995675444379976, 'reg_lambda': 0.5502005447275198, 'seed': 80, 'subsample': 0.9991373954458656, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 76%|███████▌  | 38/50 [27:18<08:35, 42.97s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.004626 secondse 39:>                 (0 + 1) / 1]
TPE using 41/41 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.5638656098142945, 'feature_fraction': 0.534098247400467, 'lambda_l1': 1.2055231000497941, 'lambda_l2': 2.651964956856497, 'learning_rate': 0.10900875453303421, 'max_depth': 9, 'min_child_samples': 372, 'min_child_weight': 1.7932085679358807, 'min_split_gain': 0.12018068291192363, 'n_estimators': 734, 'num_leaves': 117, 'objective': 'binary', 'reg_alpha': 0.3202428022860184, 'reg_lambda': 0.008648400607394247, 'seed': 80, 'subsample': 0.9540906936532196, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 78%|███████▊  | 39/50 [28:04<08:00, 43.71s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.002144 secondse 40:>                 (0 + 1) / 1]
TPE using 42/42 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.60627047057157, 'feature_fraction': 0.6364357218735145, 'lambda_l1': 4.554047712189081, 'lambda_l2': 2.071393677887818, 'learning_rate': 0.05909703903449289, 'max_depth': 9, 'min_child_samples': 293, 'min_child_weight': 0.7962354505823473, 'min_split_gain': 0.9960953638646249, 'n_estimators': 528, 'num_leaves': 137, 'objective': 'binary', 'reg_alpha': 0.41246015842860234, 'reg_lambda': 0.33241558626271833, 'seed': 80, 'subsample': 0.8142299032270565, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 40:>                 (0 + 1) / 1][Stage 41:>                 (0 + 1) / 1]

 80%|████████  | 40/50 [29:04<08:06, 48.68s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.002238 seconds
TPE using 43/43 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.5385190547769023, 'feature_fraction': 0.5279056062205134, 'lambda_l1': 1.6335919111555761, 'lambda_l2': 3.5713578431503956, 'learning_rate': 0.15375671362709784, 'max_depth': 10, 'min_child_samples': 224, 'min_child_weight': 1.5219026610547979, 'min_split_gain': 0.05150642001994109, 'n_estimators': 107, 'num_leaves': 110, 'objective': 'binary', 'reg_alpha': 0.6108337504091514, 'reg_lambda': 0.46495597110814335, 'seed': 80, 'subsample': 0.7517776345167264, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 82%|████████▏ | 41/50 [29:20<05:50, 38.91s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.001743 secondse 42:>                 (0 + 1) / 1]
TPE using 44/44 trials with best loss 0.423271
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.7149394392133364, 'feature_fraction': 0.5955173833632523, 'lambda_l1': 0.7949017630401836, 'lambda_l2': 2.732910342139041, 'learning_rate': 0.7128326800418877, 'max_depth': 4, 'min_child_samples': 252, 'min_child_weight': 0.02437160249967984, 'min_split_gain': 0.006695563681899441, 'n_estimators': 581, 'num_leaves': 39, 'objective': 'binary', 'reg_alpha': 0.5259854056937321, 'reg_lambda': 0.6614314155305413, 'seed': 80, 'subsample': 0.7005996379501644, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 42:>                 (0 + 1) / 1][Stage 43:>                 (0 + 1) / 1]

 84%|████████▍ | 42/50 [29:39<04:23, 32.97s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.001945 seconds
TPE using 45/45 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.6563329428388023, 'feature_fraction': 0.6814938913731096, 'lambda_l1': 2.2526735586864266, 'lambda_l2': 3.271572666753756, 'learning_rate': 0.09718331357187818, 'max_depth': 6, 'min_child_samples': 303, 'min_child_weight': 9.772942590165538, 'min_split_gain': 0.0864965661135268, 'n_estimators': 849, 'num_leaves': 148, 'objective': 'binary', 'reg_alpha': 0.90527778671653, 'reg_lambda': 0.38469803362900323, 'seed': 80, 'subsample': 0.6667899534271904, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 86%|████████▌ | 43/50 [30:01<03:27, 29.71s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.006913 secondse 44:>                 (0 + 1) / 1]
TPE using 46/46 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.7428360301567729, 'feature_fraction': 0.6419560179246602, 'lambda_l1': 0.32924750201032005, 'lambda_l2': 4.145792809564453, 'learning_rate': 0.2907080994661053, 'max_depth': 8, 'min_child_samples': 271, 'min_child_weight': 0.4631113428593622, 'min_split_gain': 0.2134644305842467, 'n_estimators': 958, 'num_leaves': 105, 'objective': 'binary', 'reg_alpha': 0.3149521993245591, 'reg_lambda': 0.5820754002296569, 'seed': 80, 'subsample': 0.8371230728231799, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 88%|████████▊ | 44/50 [30:19<02:35, 25.93s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.002137 secondse 45:>                 (0 + 1) / 1]
TPE using 47/47 trials with best loss 0.423271
{'boosting_type': 'dart', 'class_weight': 'balanced', 'colsample_bytree': 0.5724707110142917, 'feature_fraction': 0.5722847268588326, 'lambda_l1': 1.7267151964196423, 'lambda_l2': 3.8842647219680093, 'learning_rate': 0.46588774290311014, 'max_depth': 9, 'min_child_samples': 450, 'min_child_weight': 2.7898123016180016, 'min_split_gain': 0.1904994674398312, 'n_estimators': 795, 'num_leaves': 94, 'objective': 'binary', 'reg_alpha': 0.08651463773861362, 'reg_lambda': 0.502590609462088, 'seed': 80, 'subsample': 0.5754991365630028, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 90%|█████████ | 45/50 [31:08<02:44, 32.93s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.005034 secondse 46:>                 (0 + 1) / 1]
TPE using 48/48 trials with best loss 0.423271
{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.5169124815443892, 'feature_fraction': 0.5014709880238885, 'lambda_l1': 3.0215014991302014, 'lambda_l2': 1.2771954103188083, 'learning_rate': 0.022872956285499346, 'max_depth': 3, 'min_child_samples': 340, 'min_child_weight': 1.310230428432404, 'min_split_gain': 0.8334737109203264, 'n_estimators': 954, 'num_leaves': 123, 'objective': 'binary', 'reg_alpha': 0.6855686109982567, 'reg_lambda': 0.7395675964972824, 'seed': 80, 'subsample': 0.9019912018797706, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 92%|█████████▏| 46/50 [32:13<02:50, 42.64s/trial, best loss: 0.42327127087647803]

build_posterior_wrapper took 0.002531 secondse 46:>                 (0 + 1) / 1]
TPE using 49/49 trials with best loss 0.423271
{'boosting_type': 'goss', 'class_weight': 'balanced', 'colsample_bytree': 0.676633731494638, 'feature_fraction': 0.6028281962628526, 'lambda_l1': 1.2124231622742547, 'lambda_l2': 0.7526764679959683, 'learning_rate': 0.04504027818965552, 'max_depth': 7, 'min_child_samples': 178, 'min_child_weight': 6.358718543666708, 'min_split_gain': 0.12807106217164116, 'n_estimators': 704, 'num_leaves': 114, 'objective': 'binary', 'reg_alpha': 0.6053977120887823, 'reg_lambda': 0.2959895559797405, 'seed': 80, 'subsample': 0.7421294831631704, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 94%|█████████▍| 47/50 [32:32<01:46, 35.58s/trial, best loss: 0.42327127087647803]

{'boosting_type': 'gbdt', 'class_weight': 'balanced', 'colsample_bytree': 0.7053734961585213, 'feature_fraction': 0.6761611350208424, 'lambda_l1': 2.5682050113261248, 'lambda_l2': 2.207463967211427, 'learning_rate': 0.1556879977781491, 'max_depth': 9, 'min_child_samples': 494, 'min_child_weight': 2.008951830459337, 'min_split_gain': 0.2890264220782229, 'n_estimators': 853, 'num_leaves': 63, 'objective': 'binary', 'reg_alpha': 0.1876486567244402, 'reg_lambda': 0.6112506044742235, 'seed': 80, 'subsample': 0.7845573235373587, 'verbose': -1}
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 96%|█████████▌| 48/50 [33:59<01:41, 50.82s/trial, best loss: 0.42327127087647803]

Closing down clientserver connection                                            


 98%|█████████▊| 49/50 [34:27<00:44, 44.04s/trial, best loss: 0.42327127087647803]

Closing down clientserver connection                                            


100%|██████████| 50/50 [35:41<00:00, 42.84s/trial, best loss: 0.42327127087647803]

Queue empty, exiting run.





Closing down clientserver connection
Total Trials: 50: 50 succeeded, 0 failed, 0 cancelled.
Total Trials: 50: 50 succeeded, 0 failed, 0 cancelled.


{'boosting_type': 0,
 'colsample_bytree': 0.6044299764252568,
 'feature_fraction': 0.5776534738401967,
 'lambda_l1': 4.544303469492055,
 'lambda_l2': 4.451043822493469,
 'learning_rate': 0.16968794104274562,
 'max_depth': 9.0,
 'min_child_samples': 284.0,
 'min_child_weight': 0.9944888525526654,
 'min_split_gain': 0.00028505503410253263,
 'n_estimators': 970.0,
 'num_leaves': 95.0,
 'reg_alpha': 0.9089125252189225,
 'reg_lambda': 0.7135903041435163,
 'subsample': 0.8314527073332705}

## Dirt

In [8]:
X_train_dirt = X_train[X_train["トラック種別"] == "ダート"][Features.get_feature_names_by_tag("ダート")]
y_train_dirt = y_train[X_train["トラック種別"] == "ダート"]
X_test_dirt = X_test[X_test["トラック種別"] == "ダート"][Features.get_feature_names_by_tag("ダート")]
y_test_dirt = y_test[X_test["トラック種別"] == "ダート"]

df_payout_dirt = data.iloc[X_test_dirt.index].reset_index(drop=True)[
    ["レースキー", "馬番", "距離", "年月日", "複勝払戻金", "年齢", "レース条件_グレード", "場コード"]
].rename(columns={"複勝払戻金": "payoff"})

fn_dirt = create_objective_fn(
    X_train_dirt,
    y_train_dirt,
    X_test_dirt,
    y_test_dirt,
    df_payout=df_payout_dirt,
    experiment_name="20240126_binary_weather__dirt",
)

In [9]:
trials_dirt = SparkTrials(parallelism=3, spark_session=spark)
fmin(fn=fn_dirt, space=space, algo=tpe.suggest, max_evals=50, trials=trials_dirt)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

build_posterior_wrapper took 0.001833 seconds
TPE using 0 trials
build_posterior_wrapper took 0.001685 seconds
TPE using 1/1 trials with best loss inf
build_posterior_wrapper took 0.062542 seconds
TPE using 2/2 trials with best loss inf
build_posterior_wrapper took 0.004135 seconds
TPE using 3/3 trials with best loss inf
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 1:>                  (0 + 1) / 1][Stage 2:>                  (0 + 1) / 1]

  2%|▏         | 1/50 [01:20<1:05:41, 80.44s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.002014 seconds
TPE using 4/4 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


  4%|▍         | 2/50 [02:34<1:01:28, 76.84s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.002307 seconds
TPE using 5/5 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


  6%|▌         | 3/50 [02:52<39:10, 50.02s/trial, best loss: 0.4602567205211563]  

build_posterior_wrapper took 0.010345 seconds
TPE using 6/6 trials with best loss 0.460257
Closing down clientserver connection                                            


  8%|▊         | 4/50 [02:56<24:25, 31.86s/trial, best loss: 0.4602567205211563]

  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
build_posterior_wrapper took 0.002253 secondse 5:>                  (0 + 1) / 1]
TPE using 7/7 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 10%|█         | 5/50 [03:42<27:30, 36.67s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001936 secondse 6:>                  (0 + 1) / 1]
TPE using 8/8 trials with best loss 0.460257
Closing down clientserver connection                                            


 12%|█▏        | 6/50 [03:46<18:45, 25.57s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001724 seconds
TPE using 9/9 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 14%|█▍        | 7/50 [04:19<20:06, 28.05s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.003165 seconds
TPE using 10/10 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 16%|█▌        | 8/50 [04:52<20:46, 29.68s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001876 secondse 9:>                  (0 + 1) / 1]
TPE using 11/11 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 18%|█▊        | 9/50 [05:08<17:22, 25.43s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001723 secondse 10:>                 (0 + 1) / 1]
TPE using 12/12 trials with best loss 0.460257
Closing down clientserver connection                                            


 20%|██        | 10/50 [05:12<12:32, 18.82s/trial, best loss: 0.4602567205211563]

  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
build_posterior_wrapper took 0.001881 secondse 11:>                 (0 + 1) / 1]
TPE using 13/13 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 22%|██▏       | 11/50 [06:35<25:04, 38.58s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001893 secondse 11:>                 (0 + 1) / 1]
TPE using 14/14 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 24%|██▍       | 12/50 [07:17<24:56, 39.37s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001774 seconds
TPE using 15/15 trials with best loss 0.460257
Closing down clientserver connection                                            
[Stage 13:>                 (0 + 1) / 1][Stage 14:>                 (0 + 1) / 1]

 26%|██▌       | 13/50 [07:20<17:29, 28.36s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001899 seconds
TPE using 16/16 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 13:>                 (0 + 1) / 1][Stage 14:>                 (0 + 1) / 1]

 28%|██▊       | 14/50 [08:32<24:58, 41.64s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001725 seconds
TPE using 17/17 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 30%|███       | 15/50 [09:53<31:16, 53.62s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.001700 seconds
TPE using 18/18 trials with best loss 0.460257
Closing down clientserver connection                                            


 32%|███▏      | 16/50 [09:54<21:24, 37.78s/trial, best loss: 0.4602567205211563]

build_posterior_wrapper took 0.010425 seconds                       (0 + 1) / 1]
TPE using 19/19 trials with best loss 0.460257
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 34%|███▍      | 17/50 [10:40<22:02, 40.06s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.009186 secondse 18:>                 (0 + 1) / 1]
TPE using 20/20 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 36%|███▌      | 18/50 [11:09<19:38, 36.82s/trial, best loss: 0.4586761366837478]

Closing down clientserver connection                                            


 38%|███▊      | 19/50 [11:10<13:28, 26.07s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.001723 seconds
TPE using 21/21 trials with best loss 0.458676
build_posterior_wrapper took 0.001826 seconds                       (0 + 1) / 1]
TPE using 22/22 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 40%|████      | 20/50 [12:12<18:21, 36.72s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.001879 secondse 20:>                 (0 + 1) / 1]
TPE using 23/23 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 19:>                 (0 + 1) / 1][Stage 22:>                 (0 + 1) / 1]

 42%|████▏     | 21/50 [12:25<14:19, 29.64s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.001849 seconds
TPE using 24/24 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 44%|████▍     | 22/50 [12:41<11:56, 25.59s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.002026 secondse 23:>                 (0 + 1) / 1]
TPE using 25/25 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 46%|████▌     | 23/50 [13:25<14:03, 31.24s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.001830 secondse 24:>                 (0 + 1) / 1]
TPE using 26/26 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 22:>                 (0 + 1) / 1][Stage 24:>                 (0 + 1) / 1]

 48%|████▊     | 24/50 [14:54<21:00, 48.49s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.001807 seconds
TPE using 27/27 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 24:>                 (0 + 1) / 1][Stage 26:>                 (0 + 1) / 1]

 50%|█████     | 25/50 [16:11<23:43, 56.94s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.001773 seconds
TPE using 28/28 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 52%|█████▏    | 26/50 [17:07<22:43, 56.80s/trial, best loss: 0.4586761366837478]

build_posterior_wrapper took 0.002014 secondse 26:>                 (0 + 1) / 1]
TPE using 29/29 trials with best loss 0.458676
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 54%|█████▍    | 27/50 [18:48<26:50, 70.02s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.002214 secondse 28:>                 (0 + 1) / 1]
TPE using 30/30 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 26:>                 (0 + 1) / 1][Stage 29:>                 (0 + 1) / 1]

 56%|█████▌    | 28/50 [19:50<24:50, 67.77s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.005203 seconds
TPE using 31/31 trials with best loss 0.454997
Closing down clientserver connection                                            


 58%|█████▊    | 29/50 [19:56<17:08, 48.96s/trial, best loss: 0.4549970172745504]

  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
build_posterior_wrapper took 0.001660 secondse 30:>                 (0 + 1) / 1]
TPE using 32/32 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 60%|██████    | 30/50 [20:52<17:04, 51.21s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.001810 secondse 31:>                 (0 + 1) / 1]
TPE using 33/33 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 62%|██████▏   | 31/50 [23:22<25:37, 80.92s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.002786 secondse 32:>                 (0 + 1) / 1]
TPE using 34/34 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 31:>                 (0 + 1) / 1][Stage 32:>                 (0 + 1) / 1]

 64%|██████▍   | 32/50 [24:47<24:37, 82.06s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.002728 seconds
TPE using 35/35 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 66%|██████▌   | 33/50 [25:17<18:50, 66.52s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.005930 seconds
TPE using 36/36 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 68%|██████▊   | 34/50 [25:59<15:44, 59.01s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.003326 seconds
TPE using 37/37 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 70%|███████   | 35/50 [27:35<17:31, 70.07s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.001801 secondse 35:>                 (0 + 1) / 1]
TPE using 38/38 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 35:>                 (0 + 1) / 1][Stage 37:>                 (0 + 1) / 1]

 72%|███████▏  | 36/50 [30:49<25:03, 107.42s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.003467 seconds
TPE using 39/39 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 74%|███████▍  | 37/50 [31:05<17:20, 80.04s/trial, best loss: 0.4549970172745504] 

build_posterior_wrapper took 0.001941 secondse 38:>                 (0 + 1) / 1]
TPE using 40/40 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 76%|███████▌  | 38/50 [31:29<12:35, 62.99s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.002204 secondse 39:>                 (0 + 1) / 1]
TPE using 41/41 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 78%|███████▊  | 39/50 [36:01<23:03, 125.74s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.001759 seconds
TPE using 42/42 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 39:>                 (0 + 1) / 1][Stage 41:>                 (0 + 1) / 1]

 80%|████████  | 40/50 [36:46<16:56, 101.63s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.005715 seconds
TPE using 43/43 trials with best loss 0.454997
Closing down clientserver connection                                            


 82%|████████▏ | 41/50 [36:49<10:48, 72.05s/trial, best loss: 0.4549970172745504] 

build_posterior_wrapper took 0.001620 secondse 42:>                 (0 + 1) / 1]
TPE using 44/44 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 84%|████████▍ | 42/50 [39:07<12:15, 91.89s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.001889 secondse 42:>                 (0 + 1) / 1]
TPE using 45/45 trials with best loss 0.454997
Closing down clientserver connection                                            
[Stage 42:>                                                         (0 + 1) / 1]

 86%|████████▌ | 43/50 [39:10<07:36, 65.23s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.001672 seconds
TPE using 46/46 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 88%|████████▊ | 44/50 [41:27<08:40, 86.81s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.001856 secondse 45:>                 (0 + 1) / 1]
TPE using 47/47 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 90%|█████████ | 45/50 [43:27<08:02, 96.49s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.002010 secondse 45:>                 (0 + 1) / 1]
TPE using 48/48 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 44:>                 (0 + 1) / 1][Stage 47:>                 (0 + 1) / 1]

 92%|█████████▏| 46/50 [44:58<06:20, 95.08s/trial, best loss: 0.4549970172745504]

build_posterior_wrapper took 0.002108 seconds
TPE using 49/49 trials with best loss 0.454997
  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            
[Stage 44:>                 (0 + 1) / 1][Stage 47:>                 (0 + 1) / 1]

 94%|█████████▍| 47/50 [46:59<05:08, 102.88s/trial, best loss: 0.4549970172745504]

  string_columns = trimmed_df.columns[(df.applymap(type) == str).all(0)]
Closing down clientserver connection                                            


 96%|█████████▌| 48/50 [49:39<03:59, 119.82s/trial, best loss: 0.4549970172745504]

Closing down clientserver connection                                            


 98%|█████████▊| 49/50 [50:08<01:32, 92.66s/trial, best loss: 0.4500801935471009] 

Closing down clientserver connection                                            


100%|██████████| 50/50 [51:38<00:00, 61.97s/trial, best loss: 0.4500801935471009]

Queue empty, exiting run.





Closing down clientserver connection
Total Trials: 50: 50 succeeded, 0 failed, 0 cancelled.
Total Trials: 50: 50 succeeded, 0 failed, 0 cancelled.


{'boosting_type': 1,
 'colsample_bytree': 0.8903844711073379,
 'feature_fraction': 0.6538208911205116,
 'lambda_l1': 2.6061090788957655,
 'lambda_l2': 3.5614591431149867,
 'learning_rate': 0.1280990621940118,
 'max_depth': 9.0,
 'min_child_samples': 401.0,
 'min_child_weight': 0.0636944166777571,
 'min_split_gain': 0.2757051791113606,
 'n_estimators': 924.0,
 'num_leaves': 119.0,
 'reg_alpha': 0.7691782560727092,
 'reg_lambda': 0.25396168261862184,
 'subsample': 0.5745954998267446}