In [5]:
import os

import numpy as np
import pandas as pd
from scipy.stats import median_absolute_deviation
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import xgboost as xgb
import matplotlib
from sklearn.preprocessing import StandardScaler
import random
from sklearn.metrics import roc_auc_score
from xgboost import plot_tree
import matplotlib.pyplot as plt
import optuna

random_state = 10
import warnings
warnings.filterwarnings('ignore')

In [2]:
train_test_indices = np.load(r"D:\NIH\Mutational-Spectrum-master\Data\BCAST_CV_indices.npy", allow_pickle = True)
import os
os.environ["PATH"] += os.pathsep + 'D:/Program Files (x86)/Graphviz2.38/bin/'

data_dir = r"D:\NIH\Mutational-Spectrum-master\Data\BCAST_train"



In [3]:
def list_files_in_dir(dirname):
    dir_files = list()
    for root, _, files in os.walk(dirname):
        for file in files:
            dir_files.append(os.path.join(root, file))
    
    return dir_files

def build_XGBoost_model(X_train, y_train, X_val, y_val, eta, max_depth, min_child_weight, gamma, subsample, colsample_bytree, reg_alpha, n_estimators, reg_lambda, scale_pos_weight):
    param = {
     'learning_rate': eta,
     'n_estimators': int(n_estimators),
     'max_depth': int(max_depth),
     'min_child_weight': min_child_weight,
     'gamma':gamma,
     'subsample':subsample,
     'colsample_bytree':colsample_bytree,
     'reg_alpha' : reg_alpha,
     'reg_lambda': reg_lambda,
     'eval_metric':'auc',
     'nthread':4,
     'scale_pos_weight':scale_pos_weight,
     'seed':27}
    clf = xgb.XGBClassifier(**param)

    clf.fit(X_train, y_train,
            eval_set=[(X_val, y_val)],
            early_stopping_rounds=20,
            verbose=False)
    
    return clf

def get_row_indices_with_sum_zero(X):
    return X.index[(X.sum(axis=1) == 0)].tolist()

def dataset_generator(data_dir, num_files, y_col):

  for datafile in random.sample(list_files_in_dir(data_dir), num_files):
      data = pd.read_csv(datafile)
#       data = data.loc[(data!=0).any(axis=1)]

      X = data.iloc[:, :96]
      y = (data[y_col]).astype(np.int_)
      X.columns = X.columns.str.replace('[', '').str.replace(']', '').str.replace('>', '')
      X = scale_rows(X)
      yield X, y
        
def scale_rows(X):
    return X.div(X.sum(axis=1), axis=0)


def train_val_test_generator(data_dir, y_col, test_frac=0.2):
    val_frac = test_frac/(1.0 - test_frac)
    
    for X, y in dataset_generator(data_dir, 1, y_col):
        fold_data = list()
        skf = StratifiedKFold(n_splits=10)
        for train_val_index, test_index in skf.split(X, y):
            X_test, y_test = X.iloc[test_index, :], y.iloc[test_index]
            X_train, X_val, y_train, y_val = train_test_split(
                X.iloc[train_val_index,:], y.iloc[train_val_index], 
                test_size=val_frac, 
                random_state=random_state, 
                stratify=y.iloc[train_val_index]
            )
            #X_train, X_val, X_test = scale_columns(X_train, X_val, X_test)
            fold_data.append(((X_train, y_train), 
                              (X_val, y_val), 
                              (X_test, y_test)))
        yield fold_data
        
def train_val_test_generator_default(data_dir, y_col, test_frac=0.1):
    
    val_frac = test_frac/(1.0 - test_frac)
    
    for X, y in dataset_generator(data_dir, 1, y_col):
        fold_data = list()
        for train_val_index, test_index in train_test_indices:
            X_test, y_test = X.iloc[test_index, :], y.iloc[test_index]
            X_train, X_val, y_train, y_val = train_test_split(
                X.iloc[train_val_index,:], y.iloc[train_val_index], 
                test_size=val_frac, 
                random_state=random_state, 
                stratify=y.iloc[train_val_index]
            )
#             print(train_val_index, test_index)
            
            #X_train, X_val, X_test = scale_columns(X_train, X_val, X_test)
            fold_data.append(((X_train, y_train), 
                              (X_val, y_val), 
                              (X_test, y_test)))
            
        yield fold_data
        
def compute_mlp_performance(trial, data_dir=data_dir, num_samples = 1, n_folds=10, y_col='is_sig3_10'):
    aucs = list()
    models = list()

    for folds_data in train_val_test_generator(data_dir, y_col=y_col):
        fold_aucs = list()
        fold_models = list()
        
        for fold_data in folds_data:
            # get data
            (X_train, y_train), (X_val, y_val), (X_test, y_test) = fold_data

            # build model and ensure that parameters passed in are within the normal range
            # if we don't type cast as integers, bayesian optimizer will guess float values
            model = build_XGBoost_model(X_train, y_train, X_val, y_val, 
                                        trial.suggest_float("eta", 1e-9, 1e-1, log=False), 
                                        trial.suggest_int("max_depth", 6, 40), 
                                        trial.suggest_float("min_child_weight", 0, 10), 
                                        trial.suggest_float("gamma", 0, 10), 
                                        trial.suggest_float("subsample", 0, 1),
                                        trial.suggest_float("colsample_bytree", 0, 1), 
                                        trial.suggest_float("reg_alpha", 0, 1), 
                                        trial.suggest_int("n_estimators", 300, 1500), 
                                        trial.suggest_float("reg_lambda", 0, 10),
                                        scale_pos_weight = trial.suggest_float('scale_pos_weight', 0, 5))
            
            # evaluate
            preds = [x[1] for x in model.predict_proba(X_test)]
            fold_aucs.append(roc_auc_score(y_test, preds))
            fold_models.append(model)
            
        aucs.append(fold_aucs)
        models.append(fold_models)
        
    aucs = np.array(aucs)
    
    median_of_median_auc = np.median(np.median(aucs, axis=1))
    
    # Gets median index value for all the different samples (rows)  
    medianIndices = [indices[len(aucs[0])//2] for indices in np.argsort(aucs, axis=1)]
    medianValues = [values[index] for values, index in zip(aucs, medianIndices)]
    
    # Gets the file which contains the median of median value
    fileInd = np.argsort(medianValues)[len(medianValues)//2]
    
    median_of_median_model = models[fileInd][medianIndices[fileInd]]
    mad_of_mad_auc = median_absolute_deviation(median_absolute_deviation(aucs, axis=1))
    
#     return median_of_median_auc, median_absolute_deviation(aucs, axis=1)
    return median_of_median_auc


    

In [6]:
study_is_sig3 = optuna.create_study(direction="maximize")
study_is_sig3.optimize(compute_mlp_performance, n_trials=300)

[32m[I 2022-07-15 11:15:03,730][0m A new study created in memory with name: no-name-dba3060d-6dda-4a2d-9c86-e63cde582502[0m
[32m[I 2022-07-15 11:15:04,106][0m Trial 0 finished with value: 0.5 and parameters: {'eta': 0.024431081670030833, 'max_depth': 22, 'min_child_weight': 4.814205131292647, 'gamma': 0.18812991737081952, 'subsample': 0.020350183393876398, 'colsample_bytree': 0.9038292242893445, 'reg_alpha': 0.7451876586598417, 'n_estimators': 736, 'reg_lambda': 2.9748002023711773, 'scale_pos_weight': 1.4143803525073344}. Best is trial 0 with value: 0.5.[0m
[32m[I 2022-07-15 11:15:04,740][0m Trial 1 finished with value: 0.6483516483516483 and parameters: {'eta': 0.09851702291987378, 'max_depth': 22, 'min_child_weight': 7.56902627258593, 'gamma': 2.633187005184255, 'subsample': 0.5547447601880571, 'colsample_bytree': 0.5500157617432326, 'reg_alpha': 0.4382696208153962, 'n_estimators': 1073, 'reg_lambda': 5.120839145763444, 'scale_pos_weight': 3.5672325769700604}. Best is trial 1

[32m[I 2022-07-15 11:15:14,663][0m Trial 18 finished with value: 0.5967216117216116 and parameters: {'eta': 0.040005869864551624, 'max_depth': 27, 'min_child_weight': 8.695987623426289, 'gamma': 8.849592846738453, 'subsample': 0.6929600749255264, 'colsample_bytree': 0.45666658976804964, 'reg_alpha': 0.31052348965771037, 'n_estimators': 451, 'reg_lambda': 4.1318475173568565, 'scale_pos_weight': 2.177156881772683}. Best is trial 14 with value: 0.6858974358974359.[0m
[32m[I 2022-07-15 11:15:15,191][0m Trial 19 finished with value: 0.6682600732600733 and parameters: {'eta': 0.08655107250627775, 'max_depth': 30, 'min_child_weight': 3.627730715027603, 'gamma': 4.027862269336609, 'subsample': 0.3745733430423217, 'colsample_bytree': 0.19479227606764815, 'reg_alpha': 0.6875798482423703, 'n_estimators': 1153, 'reg_lambda': 7.028894440253925, 'scale_pos_weight': 4.445271897691988}. Best is trial 14 with value: 0.6858974358974359.[0m
[32m[I 2022-07-15 11:15:15,943][0m Trial 20 finished wit

[32m[I 2022-07-15 11:15:25,200][0m Trial 36 finished with value: 0.6539468864468865 and parameters: {'eta': 0.08972871920608723, 'max_depth': 21, 'min_child_weight': 7.513937761778037, 'gamma': 4.9604089108844605, 'subsample': 0.6155472817183223, 'colsample_bytree': 0.5870028230385241, 'reg_alpha': 0.4521351698858849, 'n_estimators': 728, 'reg_lambda': 3.486062837269537, 'scale_pos_weight': 3.4154121162172126}. Best is trial 35 with value: 0.6888736263736264.[0m
[32m[I 2022-07-15 11:15:25,715][0m Trial 37 finished with value: 0.663919413919414 and parameters: {'eta': 0.08238835809193859, 'max_depth': 16, 'min_child_weight': 6.442742591306173, 'gamma': 2.200031053858381, 'subsample': 0.622871548538265, 'colsample_bytree': 0.08652790795232826, 'reg_alpha': 0.9275196659723881, 'n_estimators': 851, 'reg_lambda': 5.0580393933514705, 'scale_pos_weight': 3.0429247308511904}. Best is trial 35 with value: 0.6888736263736264.[0m
[32m[I 2022-07-15 11:15:26,360][0m Trial 38 finished with v

[32m[I 2022-07-15 11:15:35,993][0m Trial 54 finished with value: 0.6881868131868132 and parameters: {'eta': 0.09129979626599363, 'max_depth': 6, 'min_child_weight': 5.960938974959167, 'gamma': 2.6593516628865306, 'subsample': 0.8141073602540085, 'colsample_bytree': 0.3632220845001832, 'reg_alpha': 0.1814984319034404, 'n_estimators': 805, 'reg_lambda': 5.442146509391065, 'scale_pos_weight': 2.6234626739369604}. Best is trial 53 with value: 0.6913919413919415.[0m
[32m[I 2022-07-15 11:15:36,679][0m Trial 55 finished with value: 0.6658516483516483 and parameters: {'eta': 0.09107409325911636, 'max_depth': 6, 'min_child_weight': 6.569173588557194, 'gamma': 2.675987310165612, 'subsample': 0.716140950028266, 'colsample_bytree': 0.924484476922927, 'reg_alpha': 0.3265888241945009, 'n_estimators': 1496, 'reg_lambda': 5.533680450081869, 'scale_pos_weight': 2.2541549814127206}. Best is trial 53 with value: 0.6913919413919415.[0m
[32m[I 2022-07-15 11:15:37,268][0m Trial 56 finished with valu

[32m[I 2022-07-15 11:15:46,573][0m Trial 72 finished with value: 0.6966575091575091 and parameters: {'eta': 0.083353335665419, 'max_depth': 12, 'min_child_weight': 5.385877690963279, 'gamma': 3.083543599475763, 'subsample': 0.8495166742824072, 'colsample_bytree': 0.2495112123393171, 'reg_alpha': 0.0641309964320646, 'n_estimators': 927, 'reg_lambda': 0.21009254837375013, 'scale_pos_weight': 3.5655387924421547}. Best is trial 72 with value: 0.6966575091575091.[0m
[32m[I 2022-07-15 11:15:47,131][0m Trial 73 finished with value: 0.626006474146009 and parameters: {'eta': 0.09267815430838162, 'max_depth': 12, 'min_child_weight': 5.423079418114519, 'gamma': 0.45195475812340824, 'subsample': 0.841442180245602, 'colsample_bytree': 0.2393164183034301, 'reg_alpha': 0.057860187517424444, 'n_estimators': 785, 'reg_lambda': 0.2744408673667331, 'scale_pos_weight': 3.587397369738308}. Best is trial 72 with value: 0.6966575091575091.[0m
[32m[I 2022-07-15 11:15:47,648][0m Trial 74 finished with 

[32m[I 2022-07-15 11:15:57,255][0m Trial 90 finished with value: 0.6804029304029304 and parameters: {'eta': 0.08663673427580586, 'max_depth': 11, 'min_child_weight': 5.352170853921335, 'gamma': 0.458070197123681, 'subsample': 0.5127740861987603, 'colsample_bytree': 0.3362905731883817, 'reg_alpha': 0.08560395403739049, 'n_estimators': 685, 'reg_lambda': 5.656535581043093, 'scale_pos_weight': 3.2984593599219556}. Best is trial 84 with value: 0.7023809523809523.[0m
[32m[I 2022-07-15 11:15:57,862][0m Trial 91 finished with value: 0.6437728937728937 and parameters: {'eta': 0.08856909883320034, 'max_depth': 6, 'min_child_weight': 9.965210393898346, 'gamma': 2.6159303207435514, 'subsample': 0.8602202414969231, 'colsample_bytree': 0.36936918730483587, 'reg_alpha': 0.0020103612438144153, 'n_estimators': 825, 'reg_lambda': 1.2428493011116661, 'scale_pos_weight': 2.7788223036168853}. Best is trial 84 with value: 0.7023809523809523.[0m
[32m[I 2022-07-15 11:15:58,527][0m Trial 92 finished w

[32m[I 2022-07-15 11:16:08,497][0m Trial 108 finished with value: 0.6703296703296704 and parameters: {'eta': 0.07942612253256268, 'max_depth': 9, 'min_child_weight': 5.890846738284456, 'gamma': 2.863031606227173, 'subsample': 0.5603196690543667, 'colsample_bytree': 0.4664135377330252, 'reg_alpha': 0.12645162443797647, 'n_estimators': 838, 'reg_lambda': 2.071391204874459, 'scale_pos_weight': 3.353723336783605}. Best is trial 84 with value: 0.7023809523809523.[0m
[32m[I 2022-07-15 11:16:09,070][0m Trial 109 finished with value: 0.6820054945054945 and parameters: {'eta': 0.09147938182261099, 'max_depth': 10, 'min_child_weight': 5.292415494456528, 'gamma': 2.7230023581856573, 'subsample': 0.7018476056074912, 'colsample_bytree': 0.21795873832656912, 'reg_alpha': 0.8552680554497063, 'n_estimators': 625, 'reg_lambda': 0.47100539205125347, 'scale_pos_weight': 3.4669467776388014}. Best is trial 84 with value: 0.7023809523809523.[0m
[32m[I 2022-07-15 11:16:09,653][0m Trial 110 finished w

[32m[I 2022-07-15 11:16:19,512][0m Trial 126 finished with value: 0.6680402930402931 and parameters: {'eta': 0.08884441664388452, 'max_depth': 26, 'min_child_weight': 4.74917843975533, 'gamma': 6.165247490546678, 'subsample': 0.9139938869014058, 'colsample_bytree': 0.384563799665892, 'reg_alpha': 0.10029975164074803, 'n_estimators': 829, 'reg_lambda': 0.3916473926025165, 'scale_pos_weight': 3.5077916823203226}. Best is trial 84 with value: 0.7023809523809523.[0m
[32m[I 2022-07-15 11:16:20,120][0m Trial 127 finished with value: 0.6662087912087913 and parameters: {'eta': 0.09845009920942374, 'max_depth': 13, 'min_child_weight': 5.489247966633446, 'gamma': 2.5161471780918694, 'subsample': 0.7135356170168381, 'colsample_bytree': 0.22678121454338587, 'reg_alpha': 0.12074010253548068, 'n_estimators': 609, 'reg_lambda': 0.19154078704211522, 'scale_pos_weight': 3.908145940940693}. Best is trial 84 with value: 0.7023809523809523.[0m
[32m[I 2022-07-15 11:16:20,715][0m Trial 128 finished 

[32m[I 2022-07-15 11:16:30,079][0m Trial 144 finished with value: 0.6854395604395604 and parameters: {'eta': 0.03189134636113485, 'max_depth': 8, 'min_child_weight': 6.828126024175919, 'gamma': 1.6649801301945186, 'subsample': 0.8596803902440883, 'colsample_bytree': 0.6310227223967066, 'reg_alpha': 0.7608027638868826, 'n_estimators': 915, 'reg_lambda': 1.0864945186290438, 'scale_pos_weight': 2.7221754652234096}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:16:30,892][0m Trial 145 finished with value: 0.6495054945054944 and parameters: {'eta': 0.029333953866390435, 'max_depth': 7, 'min_child_weight': 7.318647499383723, 'gamma': 1.7193217153177502, 'subsample': 0.8576385285744517, 'colsample_bytree': 0.6717423303247756, 'reg_alpha': 0.7881237093934743, 'n_estimators': 1109, 'reg_lambda': 6.17221805266926, 'scale_pos_weight': 2.6261785325257394}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:16:31,502][0m Trial 146 finished 

[32m[I 2022-07-15 11:16:41,298][0m Trial 162 finished with value: 0.6652930402930404 and parameters: {'eta': 0.030510850846389676, 'max_depth': 6, 'min_child_weight': 5.668953273475734, 'gamma': 0.22851182386642863, 'subsample': 0.8810521294080483, 'colsample_bytree': 0.7039951394808781, 'reg_alpha': 0.9379165955863, 'n_estimators': 1190, 'reg_lambda': 2.315877291436945, 'scale_pos_weight': 2.8369813585608363}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:16:41,910][0m Trial 163 finished with value: 0.6813186813186813 and parameters: {'eta': 0.03740761268960895, 'max_depth': 8, 'min_child_weight': 5.416858379375926, 'gamma': 0.6856746185955244, 'subsample': 0.8317113905976135, 'colsample_bytree': 0.6135384903193356, 'reg_alpha': 0.8119759342201244, 'n_estimators': 1058, 'reg_lambda': 2.6934807143658963, 'scale_pos_weight': 2.700991830681282}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:16:42,540][0m Trial 164 finished w

[32m[I 2022-07-15 11:16:52,755][0m Trial 180 finished with value: 0.668956043956044 and parameters: {'eta': 0.04459201800208108, 'max_depth': 10, 'min_child_weight': 5.690060239904533, 'gamma': 1.4912131373213966, 'subsample': 0.7878043968463033, 'colsample_bytree': 0.6005452548459064, 'reg_alpha': 0.8218059890742149, 'n_estimators': 969, 'reg_lambda': 0.5148303944478477, 'scale_pos_weight': 1.732288850230247}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:16:53,358][0m Trial 181 finished with value: 0.6591117216117216 and parameters: {'eta': 0.08482410650722678, 'max_depth': 9, 'min_child_weight': 5.296576221245753, 'gamma': 3.069097849153685, 'subsample': 0.8879032289160544, 'colsample_bytree': 0.2631349878176259, 'reg_alpha': 0.8608418424086764, 'n_estimators': 903, 'reg_lambda': 0.6118018353528077, 'scale_pos_weight': 3.4466453843731304}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:16:54,008][0m Trial 182 finished wi

[32m[I 2022-07-15 11:17:06,055][0m Trial 198 finished with value: 0.674908424908425 and parameters: {'eta': 0.04052185408774885, 'max_depth': 12, 'min_child_weight': 6.925991339028417, 'gamma': 1.8767117751860694, 'subsample': 0.8383017334996097, 'colsample_bytree': 0.8942421718787217, 'reg_alpha': 0.911532699970075, 'n_estimators': 844, 'reg_lambda': 0.2573319641753634, 'scale_pos_weight': 3.6939710838443864}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:17:06,593][0m Trial 199 finished with value: 0.6751373626373627 and parameters: {'eta': 0.042513551613190856, 'max_depth': 36, 'min_child_weight': 7.4250652191537565, 'gamma': 2.458901297377989, 'subsample': 0.9156115823386994, 'colsample_bytree': 0.2332310222872755, 'reg_alpha': 0.4130135057150475, 'n_estimators': 719, 'reg_lambda': 0.7333848949048344, 'scale_pos_weight': 3.3313846786037242}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:17:07,481][0m Trial 200 finished

[32m[I 2022-07-15 11:17:18,119][0m Trial 216 finished with value: 0.6433150183150184 and parameters: {'eta': 0.06924116219123388, 'max_depth': 7, 'min_child_weight': 3.8589302451830223, 'gamma': 8.651050729942597, 'subsample': 0.9200460164948422, 'colsample_bytree': 0.9729908197468786, 'reg_alpha': 0.9993999529962398, 'n_estimators': 994, 'reg_lambda': 1.8696685089693927, 'scale_pos_weight': 2.0001061354902707}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:17:18,841][0m Trial 217 finished with value: 0.6888736263736264 and parameters: {'eta': 0.043437615219602495, 'max_depth': 6, 'min_child_weight': 4.695017219202306, 'gamma': 1.2884498469300756, 'subsample': 0.9338725685472297, 'colsample_bytree': 0.9990826361154913, 'reg_alpha': 0.14876245922002068, 'n_estimators': 1035, 'reg_lambda': 1.4737465185833176, 'scale_pos_weight': 2.241379233115784}. Best is trial 132 with value: 0.7062087912087913.[0m
[32m[I 2022-07-15 11:17:19,642][0m Trial 218 finishe

[32m[I 2022-07-15 11:17:31,964][0m Trial 234 finished with value: 0.6776556776556777 and parameters: {'eta': 0.04895551054536243, 'max_depth': 6, 'min_child_weight': 4.23247580865028, 'gamma': 0.7146216249687531, 'subsample': 0.9989144911835155, 'colsample_bytree': 0.9498457364015506, 'reg_alpha': 0.235944474300817, 'n_estimators': 1008, 'reg_lambda': 3.6580994311954402, 'scale_pos_weight': 1.7801249295331878}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:17:32,792][0m Trial 235 finished with value: 0.6943681318681318 and parameters: {'eta': 0.05356158423112069, 'max_depth': 7, 'min_child_weight': 4.534881482677193, 'gamma': 0.6300779788494855, 'subsample': 0.9998272305355401, 'colsample_bytree': 0.9616667510799402, 'reg_alpha': 0.20981035342867485, 'n_estimators': 963, 'reg_lambda': 4.335797679509524, 'scale_pos_weight': 1.6930879769699885}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:17:33,547][0m Trial 236 finished w

[32m[I 2022-07-15 11:17:46,315][0m Trial 252 finished with value: 0.6831501831501832 and parameters: {'eta': 0.04728215183952944, 'max_depth': 8, 'min_child_weight': 4.030047526556514, 'gamma': 0.33578906505270756, 'subsample': 0.9734153075583933, 'colsample_bytree': 0.9723417411413857, 'reg_alpha': 0.18478426817283825, 'n_estimators': 945, 'reg_lambda': 3.569397519158736, 'scale_pos_weight': 1.9207193006814212}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:17:47,365][0m Trial 253 finished with value: 0.6794871794871795 and parameters: {'eta': 0.05189935585192014, 'max_depth': 7, 'min_child_weight': 3.608533723400838, 'gamma': 0.602816352231824, 'subsample': 0.997516927799455, 'colsample_bytree': 0.9281700031602659, 'reg_alpha': 0.1593210818839113, 'n_estimators': 997, 'reg_lambda': 4.8254133178299075, 'scale_pos_weight': 1.4598488603812654}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:17:48,117][0m Trial 254 finished w

[32m[I 2022-07-15 11:18:02,639][0m Trial 270 finished with value: 0.711996336996337 and parameters: {'eta': 0.05345814652852066, 'max_depth': 9, 'min_child_weight': 3.6129771592373263, 'gamma': 1.180886832685039, 'subsample': 0.9801627235944367, 'colsample_bytree': 0.9627322586432966, 'reg_alpha': 0.13939465578932603, 'n_estimators': 976, 'reg_lambda': 3.471811330727066, 'scale_pos_weight': 1.1889694844375271}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:18:03,698][0m Trial 271 finished with value: 0.6808608058608059 and parameters: {'eta': 0.05324484630416799, 'max_depth': 10, 'min_child_weight': 2.972551539811833, 'gamma': 1.1217678815736232, 'subsample': 0.9726745707649118, 'colsample_bytree': 0.9609176680508219, 'reg_alpha': 0.15714778779603528, 'n_estimators': 973, 'reg_lambda': 3.4900759662132574, 'scale_pos_weight': 1.1519210058598468}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:18:04,488][0m Trial 272 finished

[32m[I 2022-07-15 11:18:19,923][0m Trial 288 finished with value: 0.7058150183150184 and parameters: {'eta': 0.05250469962352797, 'max_depth': 8, 'min_child_weight': 3.443950792903473, 'gamma': 1.355827356725595, 'subsample': 0.9693582545564888, 'colsample_bytree': 0.9487277342396727, 'reg_alpha': 0.12858145307274316, 'n_estimators': 912, 'reg_lambda': 3.461551116733168, 'scale_pos_weight': 1.4932255691150877}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:18:20,962][0m Trial 289 finished with value: 0.6895329670329671 and parameters: {'eta': 0.05308931775900999, 'max_depth': 10, 'min_child_weight': 3.3007859646656272, 'gamma': 1.2412192232731634, 'subsample': 0.9685036134648635, 'colsample_bytree': 0.9454099338377324, 'reg_alpha': 0.14625679030113065, 'n_estimators': 899, 'reg_lambda': 3.900829939284331, 'scale_pos_weight': 1.3493533427484}. Best is trial 224 with value: 0.7186355311355312.[0m
[32m[I 2022-07-15 11:18:21,865][0m Trial 290 finished wi

In [64]:
study_is_sig3.best_trial

FrozenTrial(number=712, values=[0.7471988795518207], datetime_start=datetime.datetime(2022, 7, 14, 12, 35, 10, 606435), datetime_complete=datetime.datetime(2022, 7, 14, 12, 35, 11, 260685), params={'eta': 0.031823595825773356, 'max_depth': 24, 'min_child_weight': 0.1728822866001327, 'gamma': 0.20081507300890442, 'subsample': 0.6641624109216824, 'colsample_bytree': 0.45680259927388467, 'reg_alpha': 0.037297434517833086, 'n_estimators': 770, 'reg_lambda': 3.0324480990813454, 'scale_pos_weight': 1.27915216198194}, distributions={'eta': UniformDistribution(high=0.1, low=1e-09), 'max_depth': IntUniformDistribution(high=40, low=6, step=1), 'min_child_weight': UniformDistribution(high=10.0, low=0.0), 'gamma': UniformDistribution(high=10.0, low=0.0), 'subsample': UniformDistribution(high=1.0, low=0.0), 'colsample_bytree': UniformDistribution(high=1.0, low=0.0), 'reg_alpha': UniformDistribution(high=1.0, low=0.0), 'n_estimators': IntUniformDistribution(high=1500, low=300, step=1), 'reg_lambda':