In [1]:
import pandas as pd
import numpy as np

import xgboost as xgb
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import roc_auc_score
import optuna
from lifelines.utils import concordance_index

import matplotlib.pyplot as plt

2023-05-15 04:17:25,346 - numexpr.utils - INFO - NumExpr defaulting to 8 threads.


In [2]:
SEED = 69

import os
os.environ['PYTHONHASHSEED']=str(SEED)

import random
random.seed(SEED)

import numpy as np
np.random.seed(SEED)

In [3]:
data = pd.read_parquet("data/data_with_feats_4xgb.parquet.gzip")

In [4]:
target_cols = [col for col in data.columns if "target" in col] + ['term']
target_cols

['target', 'target_month', 'target_xgb', 'term']

In [5]:
features = ["annual_inc", "loan_amnt", "fico_average", "emp_length", "acc_open_past_24mths", "avg_cur_bal" , "sub_grade", "int_rate", "revol_util"]

features += [col for col in data.columns if "home_ownership_ohe_" in col]

data_used_cols = data[features + target_cols].dropna()
# assert data.shape[0] == data_used_cols.shape[0]

features

['annual_inc',
 'loan_amnt',
 'fico_average',
 'emp_length',
 'acc_open_past_24mths',
 'avg_cur_bal',
 'sub_grade',
 'int_rate',
 'revol_util',
 'home_ownership_ohe_ANY',
 'home_ownership_ohe_MORTGAGE',
 'home_ownership_ohe_NONE',
 'home_ownership_ohe_OTHER',
 'home_ownership_ohe_OWN',
 'home_ownership_ohe_RENT']

In [6]:
data["target_xgb"].value_counts()

0    1723625
1     133750
Name: target_xgb, dtype: int64

In [7]:
data_true = data_used_cols.loc[data_used_cols["target_xgb"] == 1]
data_false = data_used_cols.loc[data_used_cols["target_xgb"] == 0]

In [8]:
X, y = data_false[features], data_false[target_cols]

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.6, random_state=SEED)

In [9]:
X_train_val = pd.concat([X_train_val, data_true[features]], axis=0)
y_train_val = pd.concat([y_train_val, data_true[target_cols]], axis=0)

In [10]:
(y_train_val["target_xgb"] == 1).sum()

130489

# Predict

In [11]:
def get_proba_for_month(proba, n_months):
    # PDn = 1 - (1-pd12)^(n/12)

    return 1 - (1 - proba) ** (n_months / 12)
    

In [12]:
def objective(trial):

    VERBOSE_PLOTS = False
    skf = StratifiedKFold(n_splits=5, shuffle=True)

    params = {
        'booster': 'gbtree',
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'n_estimators': 5000,
        'subsample': trial.suggest_float('subsample', 0.25, 0.7, log=True),
        'min_child_weight': 30,
        'early_stopping_rounds': 10,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10, log=True),
        'seed': SEED,
    }

    # Train and test model on each fold
    metrics = []
    for train_index, val_index in list(skf.split(X_train_val, y_train_val["target_xgb"]))[:3]:
        X_train, X_val = X_train_val.iloc[train_index].copy(), X_train_val.iloc[val_index].copy()
        y_train, y_val = y_train_val.iloc[train_index].copy(), y_train_val.iloc[val_index].copy()
        
        # Train model
        params['scale_pos_weight'] = (y_train["target_xgb"] == 0).sum() / (y_train["target_xgb"] == 1).sum()

        model = xgb.XGBClassifier(**params)
        model.fit(
            X_train, y_train["target_xgb"], 
            eval_set=[(X_train, y_train["target_xgb"]), (X_val, y_val["target_xgb"])],
            verbose=False
        )

        if VERBOSE_PLOTS == True:
            print(params, features)
            results = model.evals_result()
            epochs = len(results['validation_0']['auc'])
            x_axis = range(0, epochs)
            fig, ax = plt.subplots()
            ax.plot(x_axis, results['validation_0']['auc'], label='Train')
            ax.plot(x_axis, results['validation_1']['auc'], label='Test')
            ax.legend()
            plt.ylabel('AUC')
            plt.title('XGBoost AUC')
            plt.show()
    
        # Test model
        
        y_val['y_pred'] = model.predict_proba(X_val)[:,1]
        y_val['prob_for_month'] = y_val.apply(lambda x: get_proba_for_month(x.y_pred, x.term), axis=1)

        c_ind = concordance_index(
            event_times=y_val["target_month"], predicted_scores=-y_val['prob_for_month'], event_observed=y_val['target']
        )

        # print("C_INDEX", c_ind)
        # metric = roc_auc_score(y_val["target_xgb"], y_val['y_pred'])
        metrics.append(
            c_ind
        )
        print(
            "C_INDEX", round(
                c_ind, 5
            )
        )

        # if len(metrics) == 2 and max(metrics) < 0.72:
        #     break
        
    # print(
    #     f"{round(np.mean(metrics), 5)} +- {round(np.std(metrics), 5)}"
    # )

    return np.mean(metrics)

In [13]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[32m[I 2023-05-15 04:17:30,587][0m A new study created in memory with name: no-name-977483b6-0a0d-44f4-8e4d-5b3d75574cee[0m


C_INDEX 0.68715
C_INDEX 0.68629


[32m[I 2023-05-15 04:19:46,333][0m Trial 0 finished with value: 0.6871648349953875 and parameters: {'subsample': 0.2627331936320325, 'learning_rate': 0.1524645552177892, 'max_depth': 7}. Best is trial 0 with value: 0.6871648349953875.[0m


C_INDEX 0.68805
C_INDEX 0.68926
C_INDEX 0.68725


[32m[I 2023-05-15 04:27:02,155][0m Trial 1 finished with value: 0.6887082608591736 and parameters: {'subsample': 0.3649299538199618, 'learning_rate': 0.07692002043266608, 'max_depth': 4}. Best is trial 1 with value: 0.6887082608591736.[0m


C_INDEX 0.68962
C_INDEX 0.69115
C_INDEX 0.68921


[32m[I 2023-05-15 04:35:35,653][0m Trial 2 finished with value: 0.6903432490429346 and parameters: {'subsample': 0.5115568993050522, 'learning_rate': 0.04625704018497025, 'max_depth': 8}. Best is trial 2 with value: 0.6903432490429346.[0m


C_INDEX 0.69067
C_INDEX 0.6898
C_INDEX 0.6885


[32m[I 2023-05-15 04:38:51,184][0m Trial 3 finished with value: 0.6891458098271775 and parameters: {'subsample': 0.2845833754063056, 'learning_rate': 0.10278200310039085, 'max_depth': 6}. Best is trial 2 with value: 0.6903432490429346.[0m


C_INDEX 0.68914
C_INDEX 0.69117
C_INDEX 0.69006


[32m[I 2023-05-15 05:08:51,191][0m Trial 4 finished with value: 0.6906842876645779 and parameters: {'subsample': 0.5591171202159539, 'learning_rate': 0.016354938592165642, 'max_depth': 5}. Best is trial 4 with value: 0.6906842876645779.[0m


C_INDEX 0.69082
C_INDEX 0.68875
C_INDEX 0.68662


[32m[I 2023-05-15 05:28:07,462][0m Trial 5 finished with value: 0.6879759578209663 and parameters: {'subsample': 0.39993232332101647, 'learning_rate': 0.030639417316651443, 'max_depth': 3}. Best is trial 4 with value: 0.6906842876645779.[0m


C_INDEX 0.68856
C_INDEX 0.68893
C_INDEX 0.68948


[32m[I 2023-05-15 06:01:30,606][0m Trial 6 finished with value: 0.6889830825282509 and parameters: {'subsample': 0.6021262765516662, 'learning_rate': 0.014356550896528342, 'max_depth': 4}. Best is trial 4 with value: 0.6906842876645779.[0m


C_INDEX 0.68853
C_INDEX 0.68971
C_INDEX 0.68854


[32m[I 2023-05-15 06:40:00,388][0m Trial 7 finished with value: 0.6898611809526342 and parameters: {'subsample': 0.6467714378615524, 'learning_rate': 0.02004270616146569, 'max_depth': 3}. Best is trial 4 with value: 0.6906842876645779.[0m


C_INDEX 0.69134
C_INDEX 0.68981
C_INDEX 0.68941


[32m[I 2023-05-15 06:55:42,137][0m Trial 8 finished with value: 0.6897295198662209 and parameters: {'subsample': 0.25632001068523025, 'learning_rate': 0.018708780632461865, 'max_depth': 7}. Best is trial 4 with value: 0.6906842876645779.[0m


C_INDEX 0.68997
C_INDEX 0.68991
C_INDEX 0.69031


[32m[I 2023-05-15 07:11:40,802][0m Trial 9 finished with value: 0.6899193610647704 and parameters: {'subsample': 0.338043599713452, 'learning_rate': 0.026091768726815925, 'max_depth': 5}. Best is trial 4 with value: 0.6906842876645779.[0m


C_INDEX 0.68954
C_INDEX 0.68959
C_INDEX 0.69304


[32m[I 2023-05-15 07:35:19,742][0m Trial 10 finished with value: 0.6910000866549075 and parameters: {'subsample': 0.4981527623116705, 'learning_rate': 0.010140173080653486, 'max_depth': 10}. Best is trial 10 with value: 0.6910000866549075.[0m


C_INDEX 0.69038
C_INDEX 0.6929
C_INDEX 0.6901


[32m[I 2023-05-15 08:00:06,448][0m Trial 11 finished with value: 0.6912821586556568 and parameters: {'subsample': 0.49713000281165315, 'learning_rate': 0.011137678848817107, 'max_depth': 10}. Best is trial 11 with value: 0.6912821586556568.[0m


C_INDEX 0.69085
C_INDEX 0.69177
C_INDEX 0.69309


[32m[I 2023-05-15 08:24:01,652][0m Trial 12 finished with value: 0.6920630744533852 and parameters: {'subsample': 0.4822178498359528, 'learning_rate': 0.011705757185608078, 'max_depth': 10}. Best is trial 12 with value: 0.6920630744533852.[0m


C_INDEX 0.69133
C_INDEX 0.69014
C_INDEX 0.69157


[32m[I 2023-05-15 08:45:23,042][0m Trial 13 finished with value: 0.6908232895870592 and parameters: {'subsample': 0.4740363706682452, 'learning_rate': 0.010474744506985236, 'max_depth': 10}. Best is trial 12 with value: 0.6920630744533852.[0m


C_INDEX 0.69076
C_INDEX 0.6914
C_INDEX 0.69361


[32m[I 2023-05-15 09:11:36,568][0m Trial 14 finished with value: 0.6920197526948213 and parameters: {'subsample': 0.6937462195706028, 'learning_rate': 0.010211254151134545, 'max_depth': 10}. Best is trial 12 with value: 0.6920630744533852.[0m


C_INDEX 0.69104
C_INDEX 0.69155
C_INDEX 0.69109


[32m[I 2023-05-15 09:38:27,820][0m Trial 15 finished with value: 0.6910653655251305 and parameters: {'subsample': 0.667385124941243, 'learning_rate': 0.013978344135004228, 'max_depth': 8}. Best is trial 12 with value: 0.6920630744533852.[0m


C_INDEX 0.69056
C_INDEX 0.69254
C_INDEX 0.69144


[32m[I 2023-05-15 09:51:58,665][0m Trial 16 finished with value: 0.6920631989302913 and parameters: {'subsample': 0.6647914335903077, 'learning_rate': 0.025066311555882063, 'max_depth': 9}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.6922
C_INDEX 0.69164
C_INDEX 0.69031


[32m[I 2023-05-15 10:04:23,239][0m Trial 17 finished with value: 0.6914905193130402 and parameters: {'subsample': 0.5818993148229622, 'learning_rate': 0.02831532619297098, 'max_depth': 8}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.69253
C_INDEX 0.6892
C_INDEX 0.69296


[32m[I 2023-05-15 10:13:03,953][0m Trial 18 finished with value: 0.6909275133654381 and parameters: {'subsample': 0.42921962485430065, 'learning_rate': 0.03688519413400227, 'max_depth': 9}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.69063
C_INDEX 0.69216
C_INDEX 0.6937


[32m[I 2023-05-15 12:39:21,186][0m Trial 19 finished with value: 0.6916349212817979 and parameters: {'subsample': 0.5567791681252895, 'learning_rate': 0.02349358698817277, 'max_depth': 9}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.68905
C_INDEX 0.69143
C_INDEX 0.69153


[32m[I 2023-05-15 14:21:14,368][0m Trial 20 finished with value: 0.691552244909141 and parameters: {'subsample': 0.6350946457799223, 'learning_rate': 0.02017256150033759, 'max_depth': 6}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.6917
C_INDEX 0.69057
C_INDEX 0.69152


[32m[I 2023-05-15 14:43:58,669][0m Trial 21 finished with value: 0.6912824571504362 and parameters: {'subsample': 0.6923981898123439, 'learning_rate': 0.014152072193762482, 'max_depth': 10}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.69176
C_INDEX 0.69191
C_INDEX 0.69047


[32m[I 2023-05-15 15:18:20,858][0m Trial 22 finished with value: 0.6910756234843444 and parameters: {'subsample': 0.6962856216706544, 'learning_rate': 0.01258169801143125, 'max_depth': 9}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.69085
C_INDEX 0.69225
C_INDEX 0.69243


[32m[I 2023-05-15 15:38:04,780][0m Trial 23 finished with value: 0.6915678954521466 and parameters: {'subsample': 0.6238441796628047, 'learning_rate': 0.01708041626618617, 'max_depth': 9}. Best is trial 16 with value: 0.6920631989302913.[0m


C_INDEX 0.69003
C_INDEX 0.69033
C_INDEX 0.69201


[33m[W 2023-05-15 17:17:45,940][0m Trial 24 failed with parameters: {'subsample': 0.5369865275041035, 'learning_rate': 0.01006959837507321, 'max_depth': 10} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/Users/annakorytnik/opt/anaconda3/envs/kol_debug/lib/python3.8/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/n2/gg5n0_w95pv9ntx938ggdsx40000gn/T/ipykernel_36792/3613890265.py", line 29, in objective
    model.fit(
  File "/Users/annakorytnik/opt/anaconda3/envs/kol_debug/lib/python3.8/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/annakorytnik/opt/anaconda3/envs/kol_debug/lib/python3.8/site-packages/xgboost/sklearn.py", line 1490, in fit
    self._Booster = train(
  File "/Users/annakorytnik/opt/anaconda3/envs/kol_debug/lib/python3.8/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
 

KeyboardInterrupt: 

In [14]:
study.best_trial

FrozenTrial(number=16, state=TrialState.COMPLETE, values=[0.6920631989302913], datetime_start=datetime.datetime(2023, 5, 15, 9, 38, 27, 821838), datetime_complete=datetime.datetime(2023, 5, 15, 9, 51, 58, 665023), params={'subsample': 0.6647914335903077, 'learning_rate': 0.025066311555882063, 'max_depth': 9}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'subsample': FloatDistribution(high=0.7, log=True, low=0.25, step=None), 'learning_rate': FloatDistribution(high=0.2, log=True, low=0.01, step=None), 'max_depth': IntDistribution(high=10, log=True, low=3, step=1)}, trial_id=16, value=None)

[FrozenTrial(number=2, state=TrialState.COMPLETE, values=[0.5706904511659537], datetime_start=datetime.datetime(2023, 5, 13, 18, 25, 38, 19301), datetime_complete=datetime.datetime(2023, 5, 13, 18, 27, 1, 321098), params={'subsample': 0.06435314059498777, 'learning_rate': 0.036202388856127116}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'subsample': FloatDistribution(high=1.0, log=True, low=0.001, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None)}, trial_id=2, value=None)]

[FrozenTrial(number=2, state=TrialState.COMPLETE, values=[0.5841330582073612], datetime_start=datetime.datetime(2023, 5, 13, 18, 34, 44, 410982), datetime_complete=datetime.datetime(2023, 5, 13, 18, 36, 35, 19502), params={'subsample': 0.6701497743342159, 'learning_rate': 0.18413715344103385}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'subsample': FloatDistribution(high=1.0, log=True, low=0.001, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None)}, trial_id=2, value=None)]

FrozenTrial(number=18, state=TrialState.COMPLETE, values=[0.6288033226144973], datetime_start=datetime.datetime(2023, 5, 13, 19, 37, 31, 754674), datetime_complete=datetime.datetime(2023, 5, 13, 19, 39, 37, 640029), params={'subsample': 0.36848188910366186, 'learning_rate': 0.0314119110035605, 'features': ('annual_inc', 'loan_amnt', 'fico_average', 'emp_length')}, user_attrs={}, system_attrs={}, intermediate_values={}, 

distributions={'subsample': FloatDistribution(high=1.0, log=True, low=0.001, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None), 'features': CategoricalDistribution(choices=(('annual_inc', 'loan_amnt'), ('annual_inc', 'loan_amnt', 'fico_average'), ('annual_inc', 'loan_amnt', 'fico_average', 'emp_length')))}, trial_id=18, value=None)

FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.6414780581888492], datetime_start=datetime.datetime(2023, 5, 13, 20, 4, 14, 72369), datetime_complete=datetime.datetime(2023, 5, 13, 20, 5, 23, 355636), params={'subsample': 0.22582945179691508, 'learning_rate': 0.014041463069293294}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'subsample': FloatDistribution(high=1.0, log=True, low=0.001, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None)}, trial_id=0, value=None)

FrozenTrial(number=4, state=TrialState.COMPLETE, values=[0.6478027939799046], datetime_start=datetime.datetime(2023, 5, 13, 20, 43, 15, 532172), datetime_complete=datetime.datetime(2023, 5, 13, 20, 48, 4, 931532), params={'n_estimators': 297, 'subsample': 0.5023651754403868, 'learning_rate': 0.013297707588788659}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=600, log=True, low=100, step=1), 'subsample': FloatDistribution(high=1.0, log=True, low=0.01, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None)}, trial_id=4, value=None)

FrozenTrial(number=2, state=TrialState.COMPLETE, values=[0.7056628191146204], datetime_start=datetime.datetime(2023, 5, 13, 20, 57, 13, 758039), datetime_complete=datetime.datetime(2023, 5, 13, 21, 3, 26, 599971), params={'n_estimators': 386, 'subsample': 0.5802227354327434, 'learning_rate': 0.018175234298120842}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=400, log=True, low=150, step=1), 'subsample': FloatDistribution(high=1.0, log=True, low=0.01, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None)}, trial_id=2, value=None)

["annual_inc", "loan_amnt", "fico_average", "emp_length", "acc_open_past_24mths", "avg_cur_bal" , "sub_grade", "int_rate"]

FrozenTrial(number=2, state=TrialState.COMPLETE, values=[0.7319637194257594], datetime_start=datetime.datetime(2023, 5, 14, 20, 37, 11, 356350), datetime_complete=datetime.datetime(2023, 5, 14, 20, 40, 40, 309404), params={'subsample': 0.5036186170641379, 'learning_rate': 0.07280828054330878, 'max_depth': 8}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'subsample': FloatDistribution(high=0.6, log=True, low=0.1, step=None), 'learning_rate': FloatDistribution(high=0.2, log=True, low=0.01, step=None), 'max_depth': IntDistribution(high=10, log=True, low=3, step=1)}, trial_id=2, value=None)

features = ["annual_inc", "loan_amnt", "fico_average", "emp_length", "acc_open_past_24mths", "avg_cur_bal" , "sub_grade", "int_rate", "revol_util"]
features += [col for col in data.columns if "home_ownership_ohe_" in col]

FrozenTrial(number=4, state=TrialState.COMPLETE, values=[0.7338525205589255], datetime_start=datetime.datetime(2023, 5, 14, 21, 52, 16, 244266), datetime_complete=datetime.datetime(2023, 5, 14, 22, 1, 47, 415475), params={'subsample': 0.2619685319453447, 'learning_rate': 0.01739293623497205, 'max_depth': 10}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'subsample': FloatDistribution(high=0.7, log=True, low=0.25, step=None), 'learning_rate': FloatDistribution(high=0.2, log=True, low=0.01, step=None), 'max_depth': IntDistribution(high=10, log=True, low=3, step=1)}, trial_id=4, value=None)

features = ["annual_inc", "loan_amnt", "fico_average", "emp_length", "acc_open_past_24mths", "avg_cur_bal" , "sub_grade", "int_rate", "revol_util"]

features += [col for col in data.columns if "home_ownership_ohe_" in col]

FrozenTrial(number=16, state=TrialState.COMPLETE, values=[0.6920631989302913], datetime_start=datetime.datetime(2023, 5, 15, 9, 38, 27, 821838), datetime_complete=datetime.datetime(2023, 5, 15, 9, 51, 58, 665023), params={'subsample': 0.6647914335903077, 'learning_rate': 0.025066311555882063, 'max_depth': 9}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'subsample': FloatDistribution(high=0.7, log=True, low=0.25, step=None), 'learning_rate': FloatDistribution(high=0.2, log=True, low=0.01, step=None), 'max_depth': IntDistribution(high=10, log=True, low=3, step=1)}, trial_id=16, value=None)

# Wrong

["annual_inc", "loan_amnt", "fico_average", "emp_length", "acc_open_past_24mths", "avg_cur_bal" , "sub_grade", "dti"]

FrozenTrial(number=2, state=TrialState.COMPLETE, values=[0.7042695763788417], datetime_start=datetime.datetime(2023, 5, 13, 21, 23, 14, 409457), datetime_complete=datetime.datetime(2023, 5, 13, 21, 26, 2, 806772), params={'n_estimators': 195, 'subsample': 0.050919411040719585, 'learning_rate': 0.02744129835382001}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=500, log=True, low=150, step=1), 'subsample': FloatDistribution(high=1.0, log=True, low=0.01, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None)}, trial_id=2, value=None)



["annual_inc", "loan_amnt", "fico_average", "emp_length", "acc_open_past_24mths", "avg_cur_bal" , "sub_grade", "initial_list_status"]

FrozenTrial(number=4, state=TrialState.COMPLETE, values=[0.7055814865786371], datetime_start=datetime.datetime(2023, 5, 13, 22, 0, 25, 924283), datetime_complete=datetime.datetime(2023, 5, 13, 22, 6, 5, 514102), params={'n_estimators': 369, 'subsample': 0.5376649220043659, 'learning_rate': 0.049542498705915004}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=500, log=True, low=150, step=1), 'subsample': FloatDistribution(high=0.6, log=True, low=0.05, step=None), 'learning_rate': FloatDistribution(high=0.3, log=True, low=0.01, step=None)}, trial_id=4, value=None)

["annual_inc", "loan_amnt", "fico_average", "emp_length", "acc_open_past_24mths", "avg_cur_bal" , "sub_grade", "initial_list_status"]