In [1]:
import sys
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import optuna
import xgboost as xgb
import lightgbm as lgbm
import statistics
from sklearn.linear_model import Ridge
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PowerTransformer
from functools import partial
from openfe import OpenFE, transform
import warnings

warnings.filterwarnings('ignore')


In [2]:
sys.path.append(os.path.abspath("/home/bk_anupam/code/ML/ML_UTILS/"))

In [3]:
import train_tabular_utils as tt
import cv_split_utils
import enums
import data_utils

In [4]:
class Config:
    RUN_MODE = "LOCAL"
    RANDOM_SEED = 42
    NUM_FOLDS = 5
    TARGET_COL_NAME = "Rings"    
    SKEW_THRESHOLD = 0.5
    EARLY_STOPPING = 500
    RESULTS_FILE = "model_execution_results.pkl"
    MODEL_TYPE = enums.ModelName.XGBoost
    REMOVE_OUTLIERS = True
    POWER_TRANSFORM = False
    NORMALIZE_DATA = True
    SCALER = enums.Scaler.StandardScaler
    METRIC = enums.Metrics.RMSLE
    NUM_TUNING_TRIALS = 25
    TUNE_ON_SINGLE_FOLD = True
    GENERATE_AUTO_FEATURES = True
    NUM_NEW_FEATURES = 20

COLS_TO_LEAVE = ["Rings", "kfold"]
CPU_COUNT = os.cpu_count()

DATA_READPATH = "./data/"
DATA_WRITEPATH = "./data/"
if Config.RUN_MODE == "KAGGLE":
    DATA_READPATH = "/kaggle/input/playground-series-s4e4/"
    DATA_WRITEPATH = "/kaggle/working/"

In [5]:
# import train dataset locally from data folder
df_train = pd.read_csv(DATA_READPATH + "train.csv")
# import test dataset locally from data folder
df_test = pd.read_csv(DATA_READPATH + "test.csv")
# drop id column
df_train = df_train.drop("id", axis=1)
df_test = df_test.drop("id", axis=1)
# keep a copy of original train and test data for later use
df_train_orig = df_train.copy()
df_test_orig = df_test.copy()

In [6]:
feature_cols_for_fe = [x for x in df_train.columns if x not in COLS_TO_LEAVE]

In [7]:
def generate_new_features(df_train, df_test, feature_cols, NUM_NEW_FEATURES=10):
    train_X = df_train[feature_cols] 
    test_X = df_test[feature_cols]   
    train_y = df_train[Config.TARGET_COL_NAME]
    ofe = OpenFE()
    features = ofe.fit(data=train_X, label=train_y, n_jobs=CPU_COUNT, verbose=False)  # generate new features    
    # OpenFE recommends a list of new features. We include the top 10
    # generated features to see how they influence the model performance
    train_X, test_X = transform(train_X, test_X, ofe.new_features_list[:NUM_NEW_FEATURES], n_jobs=CPU_COUNT)        
    return train_X, test_X, features

In [8]:
df_train, df_test, new_features = generate_new_features(df_train, df_test, feature_cols_for_fe, Config.NUM_NEW_FEATURES)  
df_train_labels = df_train_orig[[Config.TARGET_COL_NAME]]
# Add the label data to the dataframe
df_train = pd.concat([df_train, df_train_labels], axis=1)
# save the new train and test data with openfe features to csv files for later use
df_train.to_csv(DATA_WRITEPATH + "train_openfe.csv", index=False)
df_test.to_csv(DATA_WRITEPATH + "test_openfe.csv", index=False)

  0%|          | 0/30 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000155 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 125
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000183 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000144 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 255[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points 

  3%|▎         | 1/30 [00:02<01:06,  2.29s/it]






































You can set `force_col_wise=true` to remove the overhead.








[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000155 seconds.

You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000168 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1

[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Total Bins 4









You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, t

 10%|█         | 3/30 [00:03<00:24,  1.08it/s]



You can set `force_col_wise=true` to remove the overhead.






[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.


[LightGBM] [Info] Total Bins 254


[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000182 seconds.


















You can set `force_col_wise=true` to remove the overhead.









You can set `force_col_wise=true` to remove the overhead.




















[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000161 seconds.









[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000160 seconds.
















[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000159 seconds.








 13%|█▎        | 4/30 [00:03<00:22,  1.17it/s]








 17%|█▋        | 5/30 [00:04<00:15,  1.61it/s]

























[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000157 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 0



[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1







[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000164 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1



[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1


You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] A

 20%|██        | 6/30 [00:05<00:18,  1.31it/s]

[LightGBM] [Info] Total Bins 46

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000173 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 45[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 4
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000484 seconds.

[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000179 

 23%|██▎       | 7/30 [00:05<00:13,  1.67it/s]


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000065 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 87

[LightGBM] [Info] Total Bins 100


You can set `force_col_wise=true` to remove the overhead.



 27%|██▋       | 8/30 [00:05<00:10,  2.15it/s]


[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000154 seconds.

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002961 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000205 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Total Bin

 30%|███       | 9/30 [00:05<00:09,  2.23it/s]


You can set `force_row_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000238 seconds.
You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 0




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


You can set `force_col_wise=true` to remove the overhead.




[LightGBM] [Info] Total Bins 4
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1


 33%|███▎      | 10/30 [00:06<00:07,  2.71it/s]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003760 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000135 seconds.
You can set `force_col_wise=true` to remove the overhead.








You can set `force_col_wise=true` to remove the overhead.





[LightGBM] [Info] Total Bins 255








[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1














You can set `force_col_wise=true` to remove the overhead.







































You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000183 seconds.
You can set `force_col_wise=true` to remove the overhead.




[LightGBM] [Info] Total Bins 4































[LightGBM] [Info] Number of data points in the train set: 9061, number of us

 37%|███▋      | 11/30 [00:08<00:19,  1.05s/it]




[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1












You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Total Bins 255










You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255




[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000149 seconds.

[LightGBM] [Info] Total Bins 255[LightGBM] [Info] Total Bins 4


[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead 

 40%|████      | 12/30 [00:09<00:16,  1.07it/s]


[LightGBM] [Info] Total Bins 125




[LightGBM] [Info] Total Bins 255














[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000204 seconds.




















 43%|████▎     | 13/30 [00:09<00:12,  1.38it/s]








[LightGBM] [Info] Total Bins 175








You can set `force_col_wise=true` to remove the overhead.


















[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000198 seconds.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000191 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Total Bins 45

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1



















[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000177 seconds.



[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1












You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000160 seconds.




You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Total Bins 10

 47%|████▋     | 14/30 [00:10<00:12,  1.28it/s]










You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Total Bins 125

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1[LightGBM] [Info] Total Bins 255




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000316 seconds.
You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.

[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1

You can set `force_col_wise=true` to remove the overhead.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000192 seconds.
You can set `force_col_wise=true` to remove the ov

 50%|█████     | 15/30 [00:11<00:12,  1.18it/s]

























You can set `force_col_wise=true` to remove the overhead.






[LightGBM] [Info] Total Bins 4
































[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000084 seconds.
You can set `force_row_wise=true` to remove the overhead.




[LightGBM] [Info] Total Bins 4


































































[LightGBM] [Info] Total Bins 46

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000187 seconds.









































[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.












You can set `force_col_wise=true` to remove the overhead.





[LightGBM] [Info] Total Bins 46













[LightGBM] [Info] Number of data poi

 57%|█████▋    | 17/30 [00:12<00:10,  1.28it/s]



You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000151 seconds.

[LightGBM] [Info] Total Bins 46






 60%|██████    | 18/30 [00:13<00:07,  1.58it/s]




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000126 seconds.
You can set `force_row_wise=true` to remove the overhead.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000406 seconds.
You can set `force_col_wise=true` to remove the overhead.

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4





[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000259 seconds.
[LightGBM] [Info] Total Bins 255





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000263 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000268 seconds.
You can set `force_col_wise=true` to re

 63%|██████▎   | 19/30 [00:13<00:06,  1.61it/s]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000176 seconds.
You can set `force_col_wise=true` to remove the overhead.



[LightGBM] [Info] Total Bins 255





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000201 seconds.

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1

You can set `force_col_wise=true` to remove the overhead.



You can set `force_col_wise=true` to remove the overhead.



[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1




You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=

 67%|██████▋   | 20/30 [00:14<00:05,  1.67it/s]












[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_col_wise=true` to remove the overhead.







You can set `force_col_wise=true` to remove the overhead.













 70%|███████   | 21/30 [00:14<00:04,  2.02it/s]































You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1






[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds.



You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Total Bins 255





[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1







 73%|███████▎  | 22/30 [00:15<00:03,  2.03it/s]





























[LightGBM] [Info] Total Bins 252










[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000051 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[LightGBM] [Info] Total Bins 4







[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000158 seconds.



[LightGBM] [Info] Total Bins 255






You can set `force_col_wise=true` to remove the overhead.






[LightGBM] [Info] Total Bins 96
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000235 seconds.


[LightGBM] [Info] Total Bins 255[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000368 seconds.








You can set `force_col_wise=true` to remove the overhead.




[LightGBM] [I

 77%|███████▋  | 23/30 [00:15<00:03,  2.07it/s]








You can set `force_col_wise=true` to remove the overhead.


















You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1







[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.




















[LightGBM] [Info] Total Bins 255


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 34
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000244 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-

 80%|████████  | 24/30 [00:16<00:03,  1.70it/s]


[LightGBM] [Info] Total Bins 255

You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 3








[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000354 seconds.







You can set `force_col_wise=true` to remove the overhead.





























You can set `force_col_wise=true` to remove the overhead.































[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000173 seconds.
[LightGBM] [Info] Total Bins 125
























[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001488 seconds.
You can set `force_col_wise=true` to remove the overhead.



[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1






















You can set `force_col_wise=true` to remove the overhead.














































 87%|████████▋ | 26/30 [00:17<00:02,  1.97it/s]
















[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.

[LightGBM] [Info] Total Bins 255




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000088 seconds.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_col_wise=true` to remove the overhead.


You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000178 seconds.





 93%|█████████▎| 28/30 [00:17<00:00,  2.38it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000126 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4
[LightGBM] [Info] Number of data points in the train set: 9061, number of used features: 1


100%|██████████| 30/30 [00:18<00:00,  1.66it/s]
  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001097 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001266 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 180[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001422 seconds.
You can set `force_col_wise=true` t

  3%|▎         | 1/32 [00:03<01:53,  3.66s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 58
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001275 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001197 seconds.
You can set `force_col_wise=true` to

  6%|▋         | 2/32 [00:06<01:33,  3.11s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001383 seconds.

[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002204 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001257 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Total Bins 151
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001143 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 112
[

 12%|█▎        | 4/32 [00:07<00:39,  1.41s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001192 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001214 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001179 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1

[LightGBM] [Info] Total Bins 255[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001218 seconds.
You can set `force_col_wise=true` t

 16%|█▌        | 5/32 [00:08<00:35,  1.30s/it]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001344 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Total Bins 42
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001675 seconds.
You can set `force_col_wise=true` t

 19%|█▉        | 6/32 [00:09<00:35,  1.38s/it]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000754 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.

[LightGBM] [Info] Total Bins 255[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 22%|██▏       | 7/32 [00:10<00:26,  1.08s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001420 seconds.
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001403 seconds.
You can set `force_col_wise=true` to remove the overhead.


 25%|██▌       | 8/32 [00:10<00:19,  1.26it/s]

[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 28%|██▊       | 9/32 [00:10<00:14,  1.55it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001637 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 56
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001297 seconds.
You can set `force_col_wise=true` to

 31%|███▏      | 10/32 [00:11<00:18,  1.17it/s]


[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 56
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001858 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 190
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255[LightGBM] [Info] Auto-choosing col-wise multi-t

 34%|███▍      | 11/32 [00:13<00:25,  1.19s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001463 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001319 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Number of data points in the train set: 72492, numbe

 41%|████      | 13/32 [00:15<00:20,  1.07s/it]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001538 seconds.

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001378 seconds.
You can set `force_col_wise=true` to remove the overhead.

You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012942 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1









 44%|████▍     | 14/32 [00:17<00:20,  1.15s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001407 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255



[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 0


 47%|████▋     | 15/32 [00:17<00:16,  1.06it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005595 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 45
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001536 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004833 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001463 seconds.
You can set `force_col_wise=true` to

 50%|█████     | 16/32 [00:20<00:24,  1.53s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001401 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001303 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 54

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001314 seconds.
You can set `force_col_wise=true` to 

 53%|█████▎    | 17/32 [00:21<00:18,  1.25s/it]

[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002622 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001431 seconds.
You can set `force_col_wise=true` to remove the overhead.

 56%|█████▋    | 18/32 [00:22<00:15,  1.13s/it]


[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Total Bins 255

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001384 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 230
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001351 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001236 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001290 seconds.
You can set `force_col_wise=true` t

 59%|█████▉    | 19/32 [00:23<00:14,  1.11s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001408 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002873 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 50
[LightGBM] [Info] Total Bins 47

[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001441 seconds.
You can set `force_col_wise=true` to r

 62%|██████▎   | 20/32 [00:24<00:12,  1.06s/it]


[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 66%|██████▌   | 21/32 [00:24<00:09,  1.19it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001189 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 225
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 69%|██████▉   | 22/32 [00:24<00:07,  1.42it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008979 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001185 seconds.
You can set `force_col_wise=true` to remove the overhead.


[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001380 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 55
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001406 seconds.
You can set `force_col_wise=true` to

 72%|███████▏  | 23/32 [00:25<00:07,  1.23it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003806 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 112
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002158 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001214 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002660 seconds.
You can set `force_col_wise=true` t

 75%|███████▌  | 24/32 [00:29<00:12,  1.59s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 78%|███████▊  | 25/32 [00:29<00:08,  1.19s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 81%|████████▏ | 26/32 [00:29<00:05,  1.08it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001405 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001978 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 84%|████████▍ | 27/32 [00:30<00:04,  1.17it/s]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006939 seconds.
You can set `force_col_wise=true` to remove the overhead.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002072 seconds.



[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001487 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 49
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the 

 88%|████████▊ | 28/32 [00:31<00:04,  1.03s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001077 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
You can set `force_col_wise=true` to remove the overhead.

[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 232
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1



 91%|█████████ | 29/32 [00:32<00:02,  1.22it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001777 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001397 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000647 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 94%|█████████▍| 30/32 [00:33<00:01,  1.15it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000775 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 234
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000611 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 58
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


 97%|█████████▋| 31/32 [00:34<00:00,  1.08it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000663 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 56
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000627 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 255
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 1


100%|██████████| 32/32 [00:35<00:00,  1.12s/it]
100%|██████████| 32/32 [00:17<00:00,  1.87it/s]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.053437 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 38955
[LightGBM] [Info] Number of data points in the train set: 72492, number of used features: 195


In [9]:
print(f"Number of new features = {len(new_features)}")  
print(new_features)

Number of new features = 188
[<openfe.FeatureGenerator.Node object at 0x7f8958e71d30>, <openfe.FeatureGenerator.Node object at 0x7f8958e88040>, <openfe.FeatureGenerator.Node object at 0x7f8958e77580>, <openfe.FeatureGenerator.Node object at 0x7f8958e8a820>, <openfe.FeatureGenerator.Node object at 0x7f8958e75f70>, <openfe.FeatureGenerator.Node object at 0x7f8958e88100>, <openfe.FeatureGenerator.Node object at 0x7f8958e7e6a0>, <openfe.FeatureGenerator.Node object at 0x7f8958e8a190>, <openfe.FeatureGenerator.Node object at 0x7f8951f7fd30>, <openfe.FeatureGenerator.Node object at 0x7f8958e8a610>, <openfe.FeatureGenerator.Node object at 0x7f8951feaf40>, <openfe.FeatureGenerator.Node object at 0x7f8951f8c8b0>, <openfe.FeatureGenerator.Node object at 0x7f8958e7cdf0>, <openfe.FeatureGenerator.Node object at 0x7f8958061160>, <openfe.FeatureGenerator.Node object at 0x7f8958061d90>, <openfe.FeatureGenerator.Node object at 0x7f8958e8a8e0>, <openfe.FeatureGenerator.Node object at 0x7f8958e77730>, <

In [10]:
df_train.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Whole weight.1,Whole weight.2,Shell weight,autoFE_f_0,autoFE_f_1,...,autoFE_f_11,autoFE_f_12,autoFE_f_13,autoFE_f_14,autoFE_f_15,autoFE_f_16,autoFE_f_17,autoFE_f_18,autoFE_f_19,Rings
0,F,0.55,0.43,0.15,0.7715,0.3285,0.1465,0.24,2.291667,1.36875,...,-0.0935,0.24,0.575,3.666667,0.557356,0.175106,5.266212,0.7715,0.24918,11
1,F,0.63,0.49,0.145,1.13,0.458,0.2765,0.32,1.96875,1.43125,...,-0.0435,0.32,0.585,4.344828,0.433628,0.175106,4.086799,0.13,0.567989,11
2,I,0.16,0.11,0.025,0.021,0.0055,0.003,0.005,32.0,1.1,...,-0.002,0.025,0.185,6.4,5.238095,0.117992,7.0,0.021,0.003235,6
3,M,0.595,0.475,0.15,0.9145,0.3755,0.2055,0.25,2.38,1.502,...,-0.0445,0.25,0.575,3.966667,0.51941,0.184416,4.450122,0.9145,0.416416,10
4,I,0.555,0.425,0.13,0.782,0.3695,0.16,0.1975,2.810127,1.870886,...,-0.0375,0.1975,0.585,4.269231,0.543478,0.117992,4.8875,0.782,0.937733,9
