In [55]:
import math

import numpy as np
import pandas as pd

import lightgbm as lgb
import catboost as cat
from catboost import Pool
import xgboost as xgb

import itertools
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection._split import _BaseKFold, _RepeatedSplits, BaseShuffleSplit, _validate_shuffle_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.metrics import log_loss

from sklearn.utils import check_random_state
from sklearn.utils.validation import _num_samples, check_array
from sklearn.utils.multiclass import type_of_target

from scipy import stats

import eli5
from IPython.display import display
from eli5.permutation_importance import get_score_importances
from eli5.sklearn import PermutationImportance

import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import seaborn as sns

import optuna

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)


from colorama import Style, Fore

palette = ['#302c36', '#037d97', '#E4591E', '#C09741',
           '#EC5B6D', '#90A6B1', '#6ca957', '#D8E3E2']

blk = Style.BRIGHT + Fore.BLACK
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
res = Style.RESET_ALL


class CFG:
    undersample = True
    
    feature_sel = False
    n_feature_sel_folds = 5
    
    lgbm_optimize = True
    cb_optimize = True
    xgb_optimize = True
    n_trials = 1000
    n_optimize_folds = 3
    n_optimize_repeats = 5
    
    stacking = False
    n_stacking_folds = 10
    

# Load Data

In [2]:
# COMP_PATH = "/kaggle/input/icr-identify-age-related-conditions"
COMP_PATH = "icr-identify-age-related-conditions"

train_df = pd.read_csv(f'{COMP_PATH}//train.csv')
test_df = pd.read_csv(f'{COMP_PATH}/test.csv')
greeks = pd.read_csv(f"{COMP_PATH}/greeks.csv")
sample_submission = pd.read_csv(f"{COMP_PATH}/sample_submission.csv")

train_df['EJ'] = train_df['EJ'].replace({'A': 0, 'B': 1})
test_df['EJ'] = test_df['EJ'].replace({'A': 0, 'B': 1})

train_df.columns = train_df.columns.str.replace(' ', '')
test_df.columns = test_df.columns.str.replace(' ', '')

# train_df.drop('Id',axis=1, inplace=True)
# train_df.fillna(train_df.median(), inplace=True)

# Standard Scaler

In [3]:
# scaler = StandardScaler()
# new_num_cols = train_df.select_dtypes(include=['float64']).columns

# train_df[new_num_cols] = scaler.fit_transform(train_df[new_num_cols])
# test_df[new_num_cols] = scaler.transform(test_df[new_num_cols])

# Brute Force Feature Generation

Combine features in all possible ways.

In [4]:
# fi = pd.read_csv('feature_importances.csv', index_col = 'Unnamed: 0')
# fi_cols = set(fi['Feature'].head(100).values)

# perm = pd.read_csv('perm_df.csv', index_col = 'Unnamed: 0')
# perm_cols = set(perm['importance'].head(100).index)

# important_col = list(perm_cols.intersection(fi_cols))
# print(important_col)

# Denoising

In [5]:
# features = [fe for fe in train_df.columns if fe not in ['Id','CF', 'CB', 'DV', 'BR', 'DF', 'AR', 'GI', 'AY', 'GB',
#                                                         'AH', 'CW', 'CL', 'Class', 'BP']]

# for f in features:
#     train_df[f] = np.floor(train_df[f]*1000)/1000 # quality decreases no significant result for LGBM

# Log features (preserve sign)

In [6]:
# for f in features:
#     train_df[f] = np.sign(train_df[f]) * np.log1p(np.abs(train_df[f])) # no significant result for LGBM

# Remove outliers

In [7]:
features_with_outliers = [fe for fe in train_df.columns if fe not in ['BN', 'BQ', 'CW', 'EL', 'GH', 
                                                                      'GI', 'GL', 'Id', 'Class', 'EJ']]

for f in features_with_outliers:
    train_df[f] = train_df[f].clip(upper=train_df[f].quantile(0.99))

# Feature generation

In [8]:
features = train_df.drop(['Class', 'Id'], axis=1).columns

# features = [fe for fe in train_df.columns if fe not in ['CF', 'CB', 'DV', 'BR', 'DF', 'GB', 'AH', 
#                                                         'CW', 'CL', 'BP', 'BD', 'FC', 'GE', 'GF',
#                                                         'AR', 'GI', 'Id', 'Class', 'AX']]

# def gen_features(features, df):
#     generated_features = pd.DataFrame()

#     for fe_a, fe_b in tqdm(itertools.combinations(features, 2), total=sum([1 for i in itertools.combinations(features, 2)])):

# #         generated_features[f'{fe_a}_2']        = df[fe_a].pow(2)
# #         generated_features[f'{fe_b}_2']        = df[fe_b].pow(2)
# #         generated_features[f'{fe_a}*{fe_b}_2'] = df[fe_a] * df[fe_b].pow(2)
# #         generated_features[f'{fe_a}_2*{fe_b}'] = df[fe_a].pow(2) * df[fe_b]

# #         generated_features[f'{fe_a}_05'] = df[fe_a].pow(0.5)
# #         generated_features[f'{fe_b}_05'] = df[fe_b].pow(0.5)
# #         generated_features[f'{fe_a}*{fe_b}_05'] = df[fe_a] * df[fe_b].pow(0.5)
# #         generated_features[f'{fe_a}_05*{fe_b}'] = df[fe_a].pow(0.5) * df[fe_b]

# #         generated_features[f'{fe_a}_log'] = np.log(df[fe_a])
# #         generated_features[f'{fe_b}_log'] = np.log(df[fe_b])
# #         generated_features[f'{fe_a}*{fe_b}_log'] = df[fe_a] * np.log(df[fe_b])
# #         generated_features[f'{fe_a}_log*{fe_b}'] = np.log(df[fe_a]) * df[fe_b]
        
#     generated_features = generated_features[selected]
#     generated_features = pd.concat([generated_features, df[features]], axis=1)
    
#     # prevent inf
#     for g in generated_features.columns:
#         generated_features[g] = np.minimum(np.maximum(generated_features[g], -1e9), 1e9)
    
#     return generated_features

# generated_features_train = gen_features(features, train_df)
# generated_features_test = gen_features(features, test_df)

# features = generated_features_train.columns

# Add distance features

In [9]:
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier

class_imbalance = train_df[train_df['Class'] == 0].shape[0] / train_df[train_df['Class'] == 1].shape[0]

# average label of 20 Nearest Neighbours (colsine distance)
knn = NearestNeighbors(n_neighbors=21, metric='cosine', n_jobs=-1)
knn.fit(train_df[features].fillna(0))

# train
dists, nears = knn.kneighbors(train_df[features].fillna(0), return_distance=True)
dists, nears = dists[:,1:], nears[:,1:]

classes = np.array([train_df.loc[n, 'Class'] for n in nears])
train_df['class_cos'] = np.array(classes[i].mean() * class_imbalance for i in range(len(nears)))
train_df['class_cos'] = train_df['class_cos'].astype(float)

# test
dists, nears = knn.kneighbors(test_df[features].fillna(0), return_distance=True)
dists, nears = dists[:,1:], nears[:,1:]

classes = np.array([train_df.loc[n, 'Class'] for n in nears])
test_df['class_cos'] = np.array(classes[i].mean()  * class_imbalance for i in range(len(nears)))
test_df['class_cos'] = test_df['class_cos'].astype(float)


# features = [fe for fe in train_df.columns if fe not in ['CF', 'CB', 'DV', 'BR', 'DF', 'GB', 'AH', 
#                                                         'CW', 'CL', 'BP', 'BD', 'FC', 'GE', 'GF',
#                                                         'AR', 'GI', 'Id', 'Class', 'AX']]

# Multilabel Stratification

In [10]:
def IterativeStratification(labels, r, random_state):
    """This function implements the Iterative Stratification algorithm described
    in the following paper:
    Sechidis K., Tsoumakas G., Vlahavas I. (2011) On the Stratification of
    Multi-Label Data. In: Gunopulos D., Hofmann T., Malerba D., Vazirgiannis M.
    (eds) Machine Learning and Knowledge Discovery in Databases. ECML PKDD
    2011. Lecture Notes in Computer Science, vol 6913. Springer, Berlin,
    Heidelberg.
    """

    n_samples = labels.shape[0]
    test_folds = np.zeros(n_samples, dtype=int)

    # Calculate the desired number of examples at each subset
    c_folds = r * n_samples

    # Calculate the desired number of examples of each label at each subset
    c_folds_labels = np.outer(r, labels.sum(axis=0))

    labels_not_processed_mask = np.ones(n_samples, dtype=bool)

    while np.any(labels_not_processed_mask):
        # Find the label with the fewest (but at least one) remaining examples,
        # breaking ties randomly
        num_labels = labels[labels_not_processed_mask].sum(axis=0)

        # Handle case where only all-zero labels are left by distributing
        # across all folds as evenly as possible (not in original algorithm but
        # mentioned in the text). (By handling this case separately, some
        # code redundancy is introduced; however, this approach allows for
        # decreased execution time when there are a relatively large number
        # of all-zero labels.)
        if num_labels.sum() == 0:
            sample_idxs = np.where(labels_not_processed_mask)[0]

            for sample_idx in sample_idxs:
                fold_idx = np.where(c_folds == c_folds.max())[0]

                if fold_idx.shape[0] > 1:
                    fold_idx = fold_idx[random_state.choice(fold_idx.shape[0])]

                test_folds[sample_idx] = fold_idx
                c_folds[fold_idx] -= 1

            break

        label_idx = np.where(num_labels == num_labels[np.nonzero(num_labels)].min())[0]
        if label_idx.shape[0] > 1:
            label_idx = label_idx[random_state.choice(label_idx.shape[0])]

        sample_idxs = np.where(np.logical_and(labels[:, label_idx].flatten(), labels_not_processed_mask))[0]

        for sample_idx in sample_idxs:
            # Find the subset(s) with the largest number of desired examples
            # for this label, breaking ties by considering the largest number
            # of desired examples, breaking further ties randomly
            label_folds = c_folds_labels[:, label_idx]
            fold_idx = np.where(label_folds == label_folds.max())[0]

            if fold_idx.shape[0] > 1:
                temp_fold_idx = np.where(c_folds[fold_idx] ==
                                         c_folds[fold_idx].max())[0]
                fold_idx = fold_idx[temp_fold_idx]

                if temp_fold_idx.shape[0] > 1:
                    fold_idx = fold_idx[random_state.choice(temp_fold_idx.shape[0])]

            test_folds[sample_idx] = fold_idx
            labels_not_processed_mask[sample_idx] = False

            # Update desired number of examples
            c_folds_labels[fold_idx, labels[sample_idx]] -= 1
            c_folds[fold_idx] -= 1

    return test_folds


class MultilabelStratifiedKFold(_BaseKFold):
    """Multilabel stratified K-Folds cross-validator
    Provides train/test indices to split multilabel data into train/test sets.
    This cross-validation object is a variation of KFold that returns
    stratified folds for multilabel data. The folds are made by preserving
    the percentage of samples for each label.
    Parameters
    ----------
    n_splits : int, default=3
        Number of folds. Must be at least 2.
    shuffle : boolean, optional
        Whether to shuffle each stratification of the data before splitting
        into batches.
    random_state : int, RandomState instance or None, optional, default=None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Unlike StratifiedKFold that only uses random_state
        when ``shuffle`` == True, this multilabel implementation
        always uses the random_state since the iterative stratification
        algorithm breaks ties randomly.
    Examples
    --------
    >>> from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
    >>> import numpy as np
    >>> X = np.array([[1,2], [3,4], [1,2], [3,4], [1,2], [3,4], [1,2], [3,4]])
    >>> y = np.array([[0,0], [0,0], [0,1], [0,1], [1,1], [1,1], [1,0], [1,0]])
    >>> mskf = MultilabelStratifiedKFold(n_splits=2, random_state=0)
    >>> mskf.get_n_splits(X, y)
    2
    >>> print(mskf)  # doctest: +NORMALIZE_WHITESPACE
    MultilabelStratifiedKFold(n_splits=2, random_state=0, shuffle=False)
    >>> for train_index, test_index in mskf.split(X, y):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [0 3 4 6] TEST: [1 2 5 7]
    TRAIN: [1 2 5 7] TEST: [0 3 4 6]
    Notes
    -----
    Train and test sizes may be slightly different in each fold.
    See also
    --------
    RepeatedMultilabelStratifiedKFold: Repeats Multilabel Stratified K-Fold
    n times.
    """

    def __init__(self, n_splits=3, *, shuffle=False, random_state=None):
        super(MultilabelStratifiedKFold, self).__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)

    def _make_test_folds(self, X, y):
        y = np.asarray(y, dtype=bool)
        type_of_target_y = type_of_target(y)

        if type_of_target_y != 'multilabel-indicator':
            raise ValueError(
                'Supported target type is: multilabel-indicator. Got {!r} instead.'.format(type_of_target_y))

        num_samples = y.shape[0]

        rng = check_random_state(self.random_state)
        indices = np.arange(num_samples)

        if self.shuffle:
            rng.shuffle(indices)
            y = y[indices]

        r = np.asarray([1 / self.n_splits] * self.n_splits)

        test_folds = IterativeStratification(labels=y, r=r, random_state=rng)

        return test_folds[np.argsort(indices)]

    def _iter_test_masks(self, X=None, y=None, groups=None):
        test_folds = self._make_test_folds(X, y)
        for i in range(self.n_splits):
            yield test_folds == i

    def split(self, X, y, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
            Note that providing ``y`` is sufficient to generate the splits and
            hence ``np.zeros(n_samples)`` may be used as a placeholder for
            ``X`` instead of actual training data.
        y : array-like, shape (n_samples, n_labels)
            The target variable for supervised learning problems.
            Multilabel stratification is done based on the y labels.
        groups : object
            Always ignored, exists for compatibility.
        Returns
        -------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting ``random_state``
        to an integer.
        """
        y = check_array(y, ensure_2d=False, dtype=None)
        return super(MultilabelStratifiedKFold, self).split(X, y, groups)


class RepeatedMultilabelStratifiedKFold(_RepeatedSplits):
    """Repeated Multilabel Stratified K-Fold cross validator.
    Repeats Mulilabel Stratified K-Fold n times with different randomization
    in each repetition.
    Parameters
    ----------
    n_splits : int, default=5
        Number of folds. Must be at least 2.
    n_repeats : int, default=10
        Number of times cross-validator needs to be repeated.
    random_state : None, int or RandomState, default=None
        Random state to be used to generate random state for each
        repetition as well as randomly breaking ties within the iterative
        stratification algorithm.
    Examples
    --------
    >>> from iterstrat.ml_stratifiers import RepeatedMultilabelStratifiedKFold
    >>> import numpy as np
    >>> X = np.array([[1,2], [3,4], [1,2], [3,4], [1,2], [3,4], [1,2], [3,4]])
    >>> y = np.array([[0,0], [0,0], [0,1], [0,1], [1,1], [1,1], [1,0], [1,0]])
    >>> rmskf = RepeatedMultilabelStratifiedKFold(n_splits=2, n_repeats=2,
    ...     random_state=0)
    >>> for train_index, test_index in rmskf.split(X, y):
    ...     print("TRAIN:", train_index, "TEST:", test_index)
    ...     X_train, X_test = X[train_index], X[test_index]
    ...     y_train, y_test = y[train_index], y[test_index]
    ...
    TRAIN: [0 3 4 6] TEST: [1 2 5 7]
    TRAIN: [1 2 5 7] TEST: [0 3 4 6]
    TRAIN: [0 1 4 5] TEST: [2 3 6 7]
    TRAIN: [2 3 6 7] TEST: [0 1 4 5]
    See also
    --------
    RepeatedStratifiedKFold: Repeats (Non-multilabel) Stratified K-Fold
    n times.
    """
    def __init__(self, n_splits=5, *, n_repeats=10, random_state=None):
        super(RepeatedMultilabelStratifiedKFold, self).__init__(
            MultilabelStratifiedKFold, n_repeats=n_repeats, random_state=random_state,
            n_splits=n_splits)


class MultilabelStratifiedShuffleSplit(BaseShuffleSplit):
    """Multilabel Stratified ShuffleSplit cross-validator
    Provides train/test indices to split data into train/test sets.
    This cross-validation object is a merge of MultilabelStratifiedKFold and
    ShuffleSplit, which returns stratified randomized folds for multilabel
    data. The folds are made by preserving the percentage of each label.
    Note: like the ShuffleSplit strategy, multilabel stratified random splits
    do not guarantee that all folds will be different, although this is
    still very likely for sizeable datasets.
    Parameters
    ----------
    n_splits : int, default 10
        Number of re-shuffling & splitting iterations.
    test_size : float, int, None, optional
        If float, should be between 0.0 and 1.0 and represent the proportion
        of the dataset to include in the test split. If int, represents the
        absolute number of test samples. If None, the value is set to the
        complement of the train size. By default, the value is set to 0.1.
        The default will change in version 0.21. It will remain 0.1 only
        if ``train_size`` is unspecified, otherwise it will complement
        the specified ``train_size``.
    train_size : float, int, or None, default is None
        If float, should be between 0.0 and 1.0 and represent the
        proportion of the dataset to include in the train split. If
        int, represents the absolute number of train samples. If None,
        the value is automatically set to the complement of the test size.
    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Unlike StratifiedShuffleSplit that only uses
        random_state when ``shuffle`` == True, this multilabel implementation
        always uses the random_state since the iterative stratification
        algorithm breaks ties randomly.
    Examples
    --------
    >>> from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
    >>> import numpy as np
    >>> X = np.array([[1,2], [3,4], [1,2], [3,4], [1,2], [3,4], [1,2], [3,4]])
    >>> y = np.array([[0,0], [0,0], [0,1], [0,1], [1,1], [1,1], [1,0], [1,0]])
    >>> msss = MultilabelStratifiedShuffleSplit(n_splits=3, test_size=0.5,
    ...    random_state=0)
    >>> msss.get_n_splits(X, y)
    3
    >>> print(mss)       # doctest: +ELLIPSIS
    MultilabelStratifiedShuffleSplit(n_splits=3, random_state=0, test_size=0.5,
                                     train_size=None)
    >>> for train_index, test_index in msss.split(X, y):
    ...    print("TRAIN:", train_index, "TEST:", test_index)
    ...    X_train, X_test = X[train_index], X[test_index]
    ...    y_train, y_test = y[train_index], y[test_index]
    TRAIN: [1 2 5 7] TEST: [0 3 4 6]
    TRAIN: [2 3 6 7] TEST: [0 1 4 5]
    TRAIN: [1 2 5 6] TEST: [0 3 4 7]
    Notes
    -----
    Train and test sizes may be slightly different from desired due to the
    preference of stratification over perfectly sized folds.
    """

    def __init__(self, n_splits=10, *, test_size="default", train_size=None,
                 random_state=None):
        super(MultilabelStratifiedShuffleSplit, self).__init__(
            n_splits=n_splits, test_size=test_size, train_size=train_size, random_state=random_state)

    def _iter_indices(self, X, y, groups=None):
        n_samples = _num_samples(X)
        y = check_array(y, ensure_2d=False, dtype=None)
        y = np.asarray(y, dtype=bool)
        type_of_target_y = type_of_target(y)

        if type_of_target_y != 'multilabel-indicator':
            raise ValueError(
                'Supported target type is: multilabel-indicator. Got {!r} instead.'.format(
                    type_of_target_y))

        n_train, n_test = _validate_shuffle_split(n_samples, self.test_size,
                                                  self.train_size)

        n_samples = y.shape[0]
        rng = check_random_state(self.random_state)
        y_orig = y.copy()

        r = np.array([n_train, n_test]) / (n_train + n_test)

        for _ in range(self.n_splits):
            indices = np.arange(n_samples)
            rng.shuffle(indices)
            y = y_orig[indices]

            test_folds = IterativeStratification(labels=y, r=r, random_state=rng)

            test_idx = test_folds[np.argsort(indices)] == 1
            test = np.where(test_idx)[0]
            train = np.where(~test_idx)[0]

            yield train, test

    def split(self, X, y, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
            Note that providing ``y`` is sufficient to generate the splits and
            hence ``np.zeros(n_samples)`` may be used as a placeholder for
            ``X`` instead of actual training data.
        y : array-like, shape (n_samples, n_labels)
            The target variable for supervised learning problems.
            Multilabel stratification is done based on the y labels.
        groups : object
            Always ignored, exists for compatibility.
        Returns
        -------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        Notes
        -----
        Randomized CV splitters may return different results for each call of
        split. You can make the results identical by setting ``random_state``
        to an integer.
        """
        y = check_array(y, ensure_2d=False, dtype=None)
        return super(MultilabelStratifiedShuffleSplit, self).split(X, y, groups)

# LGBM feature selection

In [45]:
from shaphypetune import BoostBoruta

params = {
        'boosting_type':'goss',
        'learning_rate': 0.06733232950390658, 
        'n_estimators': 5000, 
        'early_stopping_round' : 100, 
        'subsample' : 0.7, # bagging_fraction
        'colsample_bytree': 0.6, # feature_fraction
        'num_leaves': 33,
        'class_weight': 'balanced',
        'metric': 'none', 
        'is_unbalance': True, 
        'random_state': 8062023,
        'feature_fraction_seed': 8062023,
        'bagging_seed': 8062023,
        'max_depth': 6,
        'reg_alpha': 2.025436e-04,  
        'reg_lambda': 2.290193e-07,
#         'bagging_freq': 6,
        'max_bin': 198,
        'min_child_samples': 32,
        'importance_type': 'gain'
        }

def balanced_log_loss(y_true, y_pred):

    # Nc is the number of observations
    N_1 = np.sum(y_true == 1, axis=0)
    N_0 = np.sum(y_true == 0, axis=0)

    # In order to avoid the extremes of the log function, each predicted probability 𝑝 is replaced with max(min(𝑝,1−10−15),10−15)
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)

    # balanced logarithmic loss
    loss_numerator = - (1/N_0) * np.sum((1 - y_true) * np.log(1 - y_pred)) - (1/N_1) * np.sum(y_true * np.log(y_pred))

    return loss_numerator / 2

def bll_metric(y_true, y_pred):
    return 'balanced_log_loss', balanced_log_loss(y_true, y_pred), False

def calc_log_loss_weight(y_true): 
    '''w0, w1 assign different weights to individual data points during training.'''
    nc = np.bincount(y_true)
    w0, w1 = 1/(nc[0]/y_true.shape[0]), 1/(nc[1]/y_true.shape[0])
    return w0, w1

def lgbm_tuning(features, permut=False, boruta=False):
    metric = balanced_log_loss
    eval_results_ = {}

    cv_scores = [] # store all cv scores of outer loop inference

    perm_df_ = pd.DataFrame()
    feature_importances_ = pd.DataFrame()
    boruta_df_ = pd.DataFrame()
    
    for i in range(CFG.n_optimize_repeats):
        print(f'Repeat {blu}#{i+1}')
        
        # Make random under-sampling to balance classes
        positive_count_train = train_df.Class.value_counts()[1]
        sampler = RandomUnderSampler(sampling_strategy={0: positive_count_train, 
                                                        1: positive_count_train}, 
                                     random_state=15062023+i, 
                                     replacement=True)

        X_re, y_re = pd.concat([train_df[features], greeks.iloc[:,1:4]], axis=1), train_df['Class']
        
        if CFG.undersample:
            X_re, y_re = sampler.fit_resample(X_re, y_re)
        
        # Create Stratified Multilabel k-Fold scheme
        kf = MultilabelStratifiedKFold(n_splits=CFG.n_feature_sel_folds, shuffle=True, random_state=8062023+i)

        # Create an oof array for inner loop
        oof = np.zeros(X_re.shape[0])
        
        # Stratify based on Class and Alpha (3 types of conditions)
        for fold, (train_idx, val_idx) in enumerate(kf.split(X=X_re[features], y=X_re.iloc[:,-3:]), start = 1): 
            X, y = X_re[features], y_re

            # Split the dataset according to the fold indexes.
            X_train = X.iloc[train_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[train_idx]
            y_val = y.iloc[val_idx]


            X_train = X_train.reset_index(drop=True)
            y_train = y_train.reset_index(drop=True)

            # Store models here
            models_ = [] 

            eval_results_[fold]= {}

            clf = lgb.LGBMClassifier(**params)
            clf.fit(X_train, y_train, eval_set=[(X_val, y_val)], 
                    eval_metric=bll_metric, # eval_sample_weight=w_val, 
                    early_stopping_rounds=100, verbose=1)

            models_.append(clf)

            val_preds = clf.predict_proba(X_val)[:,1]
            oof[val_idx] = val_preds

            val_score = metric(y_val, val_preds)
            best_iter = clf.best_iteration_

            print(f'Fold: {blu}{fold:>3}{res}| {metric.__name__}: {blu}{val_score:.5f}{res}'
                  f' | Best iteration: {blu}{best_iter:>4}{res}')

            # permutation importance
            if permut:
                perm = PermutationImportance(clf, scoring=None, n_iter=1, 
                                             random_state=42, cv=None, refit=False).fit(X_val, y_val)

                perm_importance_df = pd.DataFrame({'importance': perm.feature_importances_}, 
                                                   index=X_val.columns).sort_index()

                if perm_df_.shape[0] == 0:
                    perm_df_ = perm_importance_df.copy()
                else:
                    perm_df_ += perm_importance_df

            # tree feature importance
            f_i = pd.DataFrame(sorted(zip(clf.feature_importances_, X.columns), 
                                              reverse=True, key=lambda x: x[1]), 
                               columns=['Value','Feature'])

            if feature_importances_.shape[0] == 0:
                feature_importances_ = f_i.copy()
            else:

                feature_importances_['Value'] += f_i['Value']

            # Boruta SHAP importance
            if boruta:
                model = BoostBoruta(clf, importance_type='shap_importances', train_importance=False)
                model.fit(X_train, y_train, eval_set=[(X_val, y_val)], 
                          eval_metric=bll_metric, early_stopping_rounds=300, verbose=-1)

                boruta_importance_df = pd.DataFrame({'importance': model.ranking_}, 
                                                     index=X_train.columns).sort_index()
                if boruta_df_.shape[0] == 0:
                    boruta_df_ = boruta_importance_df.copy()
                else:
                    boruta_df_ += boruta_importance_df

        fold_cv_score = metric(y_re, oof)
        print(f'{red} CV score: {res} {metric.__name__}: {red}{fold_cv_score:.5f}{res}')
        print(f'{"*" * 50}\n')
        cv_scores.append(fold_cv_score)


    print(f'{red} Avg score {CFG.n_feature_sel_folds}-fold: {res} {metric.__name__}: {red}{np.mean(cv_scores):.5f}{res}')
    print(f'{"*" * 50}\n')
    
    if permut:
        perm_df_ = perm_df_.sort_values('importance', ascending=False)
        
    if boruta:
        boruta_df_ = boruta_df_.sort_values('importance')
                                    
    feature_importances_ = feature_importances_.sort_values('Value', ascending=False)
    
    return perm_df_, feature_importances_, boruta_df_, np.mean(cv_scores)

if CFG.feature_sel:
    perm_df_, feature_importances_, boruta_df_, cv_scores = lgbm_tuning(features, permut=False, boruta=False)

Repeat [1m[34m#1
[1]	valid_0's balanced_log_loss: 0.675494
[2]	valid_0's balanced_log_loss: 0.663652
[3]	valid_0's balanced_log_loss: 0.652625
[4]	valid_0's balanced_log_loss: 0.623198
[5]	valid_0's balanced_log_loss: 0.607029
[6]	valid_0's balanced_log_loss: 0.581928
[7]	valid_0's balanced_log_loss: 0.564768
[8]	valid_0's balanced_log_loss: 0.546604
[9]	valid_0's balanced_log_loss: 0.537501
[10]	valid_0's balanced_log_loss: 0.518296
[11]	valid_0's balanced_log_loss: 0.505712
[12]	valid_0's balanced_log_loss: 0.503956
[13]	valid_0's balanced_log_loss: 0.492748
[14]	valid_0's balanced_log_loss: 0.483312
[15]	valid_0's balanced_log_loss: 0.483312
[16]	valid_0's balanced_log_loss: 0.483312
[17]	valid_0's balanced_log_loss: 0.483312
[18]	valid_0's balanced_log_loss: 0.483312
[19]	valid_0's balanced_log_loss: 0.483312
[20]	valid_0's balanced_log_loss: 0.483312
[21]	valid_0's balanced_log_loss: 0.483312
[22]	valid_0's balanced_log_loss: 0.483312
[23]	valid_0's balanced_log_loss: 0.483312
[

[41]	valid_0's balanced_log_loss: 0.476896
[42]	valid_0's balanced_log_loss: 0.476896
[43]	valid_0's balanced_log_loss: 0.476896
[44]	valid_0's balanced_log_loss: 0.476896
[45]	valid_0's balanced_log_loss: 0.476896
[46]	valid_0's balanced_log_loss: 0.476896
[47]	valid_0's balanced_log_loss: 0.476896
[48]	valid_0's balanced_log_loss: 0.476896
[49]	valid_0's balanced_log_loss: 0.476896
[50]	valid_0's balanced_log_loss: 0.476896
[51]	valid_0's balanced_log_loss: 0.476896
[52]	valid_0's balanced_log_loss: 0.476896
[53]	valid_0's balanced_log_loss: 0.476896
[54]	valid_0's balanced_log_loss: 0.476896
[55]	valid_0's balanced_log_loss: 0.476896
[56]	valid_0's balanced_log_loss: 0.476896
[57]	valid_0's balanced_log_loss: 0.476896
[58]	valid_0's balanced_log_loss: 0.476896
[59]	valid_0's balanced_log_loss: 0.476896
[60]	valid_0's balanced_log_loss: 0.476896
[61]	valid_0's balanced_log_loss: 0.476896
[62]	valid_0's balanced_log_loss: 0.476896
[63]	valid_0's balanced_log_loss: 0.476896
[64]	valid_

[1]	valid_0's balanced_log_loss: 0.661572
[2]	valid_0's balanced_log_loss: 0.633734
[3]	valid_0's balanced_log_loss: 0.622212
[4]	valid_0's balanced_log_loss: 0.599433
[5]	valid_0's balanced_log_loss: 0.590956
[6]	valid_0's balanced_log_loss: 0.554559
[7]	valid_0's balanced_log_loss: 0.528836
[8]	valid_0's balanced_log_loss: 0.506323
[9]	valid_0's balanced_log_loss: 0.492589
[10]	valid_0's balanced_log_loss: 0.478391
[11]	valid_0's balanced_log_loss: 0.465595
[12]	valid_0's balanced_log_loss: 0.447033
[13]	valid_0's balanced_log_loss: 0.436092
[14]	valid_0's balanced_log_loss: 0.414396
[15]	valid_0's balanced_log_loss: 0.414396
[16]	valid_0's balanced_log_loss: 0.414396
[17]	valid_0's balanced_log_loss: 0.414396
[18]	valid_0's balanced_log_loss: 0.414396
[19]	valid_0's balanced_log_loss: 0.414396
[20]	valid_0's balanced_log_loss: 0.414396
[21]	valid_0's balanced_log_loss: 0.414396
[22]	valid_0's balanced_log_loss: 0.414396
[23]	valid_0's balanced_log_loss: 0.414396
[24]	valid_0's balan

[10]	valid_0's balanced_log_loss: 0.524836
[11]	valid_0's balanced_log_loss: 0.507324
[12]	valid_0's balanced_log_loss: 0.495464
[13]	valid_0's balanced_log_loss: 0.483312
[14]	valid_0's balanced_log_loss: 0.47058
[15]	valid_0's balanced_log_loss: 0.47058
[16]	valid_0's balanced_log_loss: 0.47058
[17]	valid_0's balanced_log_loss: 0.47058
[18]	valid_0's balanced_log_loss: 0.47058
[19]	valid_0's balanced_log_loss: 0.47058
[20]	valid_0's balanced_log_loss: 0.47058
[21]	valid_0's balanced_log_loss: 0.47058
[22]	valid_0's balanced_log_loss: 0.47058
[23]	valid_0's balanced_log_loss: 0.47058
[24]	valid_0's balanced_log_loss: 0.47058
[25]	valid_0's balanced_log_loss: 0.47058
[26]	valid_0's balanced_log_loss: 0.47058
[27]	valid_0's balanced_log_loss: 0.47058
[28]	valid_0's balanced_log_loss: 0.47058
[29]	valid_0's balanced_log_loss: 0.47058
[30]	valid_0's balanced_log_loss: 0.47058
[31]	valid_0's balanced_log_loss: 0.47058
[32]	valid_0's balanced_log_loss: 0.47058
[33]	valid_0's balanced_log_lo

[1]	valid_0's balanced_log_loss: 0.660084
[2]	valid_0's balanced_log_loss: 0.632845
[3]	valid_0's balanced_log_loss: 0.614469
[4]	valid_0's balanced_log_loss: 0.591348
[5]	valid_0's balanced_log_loss: 0.570386
[6]	valid_0's balanced_log_loss: 0.551285
[7]	valid_0's balanced_log_loss: 0.536158
[8]	valid_0's balanced_log_loss: 0.52579
[9]	valid_0's balanced_log_loss: 0.516911
[10]	valid_0's balanced_log_loss: 0.50592
[11]	valid_0's balanced_log_loss: 0.494012
[12]	valid_0's balanced_log_loss: 0.48363
[13]	valid_0's balanced_log_loss: 0.476049
[14]	valid_0's balanced_log_loss: 0.469509
[15]	valid_0's balanced_log_loss: 0.469509
[16]	valid_0's balanced_log_loss: 0.469509
[17]	valid_0's balanced_log_loss: 0.469509
[18]	valid_0's balanced_log_loss: 0.469509
[19]	valid_0's balanced_log_loss: 0.469509
[20]	valid_0's balanced_log_loss: 0.469509
[21]	valid_0's balanced_log_loss: 0.469509
[22]	valid_0's balanced_log_loss: 0.469509
[23]	valid_0's balanced_log_loss: 0.469509
[24]	valid_0's balanced

[13]	valid_0's balanced_log_loss: 0.45206
[14]	valid_0's balanced_log_loss: 0.440944
[15]	valid_0's balanced_log_loss: 0.440944
[16]	valid_0's balanced_log_loss: 0.440944
[17]	valid_0's balanced_log_loss: 0.440944
[18]	valid_0's balanced_log_loss: 0.440944
[19]	valid_0's balanced_log_loss: 0.440944
[20]	valid_0's balanced_log_loss: 0.440944
[21]	valid_0's balanced_log_loss: 0.440944
[22]	valid_0's balanced_log_loss: 0.440944
[23]	valid_0's balanced_log_loss: 0.440944
[24]	valid_0's balanced_log_loss: 0.440944
[25]	valid_0's balanced_log_loss: 0.440944
[26]	valid_0's balanced_log_loss: 0.440944
[27]	valid_0's balanced_log_loss: 0.440944
[28]	valid_0's balanced_log_loss: 0.440944
[29]	valid_0's balanced_log_loss: 0.440944
[30]	valid_0's balanced_log_loss: 0.440944
[31]	valid_0's balanced_log_loss: 0.440944
[32]	valid_0's balanced_log_loss: 0.440944
[33]	valid_0's balanced_log_loss: 0.440944
[34]	valid_0's balanced_log_loss: 0.440944
[35]	valid_0's balanced_log_loss: 0.440944
[36]	valid_0

# Check features correlation

In [12]:
if CFG.feature_sel:
    col = 'BZ'
    x = train_df[train_df[col] <= train_df[col].quantile(0.99)]
    cm = x[[c for c in train_df.columns if c not in ['Id', 'Class']]].corr()
    display(np.abs(cm[col]).sort_values(ascending=False)[1:])

# Analyze permutation feature importance

In [13]:
if CFG.feature_sel:
    perm_df_.to_csv('perm_df.csv')
    perm_df_
    perm_cols = set(perm_df_.index[-35:])
    display(perm_cols)

# Analyze tree gain feature importance

In [14]:
if CFG.feature_sel:
    feature_importances_.to_csv('feature_importances.csv')
    feature_importances_
    fi_cols = set(feature_importances_['Feature'].values[-23:])
    display(fi_cols)

# Analyze BORUTA importance

In [15]:
if CFG.feature_sel:
    boruta_df_.to_csv('boruta_df_.csv')
    boruta_df_
    boruta_cols = set(boruta_df_.index[-35:])
    display(boruta_cols)

# LGBM Optuna optimization

In [54]:
def balanced_log_loss(y_true, y_pred):
    # Nc is the number of observations
    N_1 = np.sum(y_true == 1, axis=0)
    N_0 = np.sum(y_true == 0, axis=0)

    # In order to avoid the extremes of the log function, each predicted probability 𝑝 is replaced with max(min(𝑝,1−10−15),10−15)
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)

    # balanced logarithmic loss
    loss_numerator = - (1/N_0) * np.sum((1 - y_true) * np.log(1 - y_pred)) - (1/N_1) * np.sum(y_true * np.log(y_pred))

    return loss_numerator / 2

def bll_metric(y_pred, y_true):
    y_true = y_true.get_label()
    return 'balanced_log_loss', balanced_log_loss(y_true, y_pred), False

def calc_log_loss_weight(y_true): 
    '''w0, w1 assign different weights to individual data points during training.'''
    nc = np.bincount(y_true)
    w0, w1 = 1/(nc[0]/y_true.shape[0]), 1/(nc[1]/y_true.shape[0])
    return w0, w1

X, y = train_df[features], train_df.Class 
    
def objective(trial):
    param = {
        # Main parameters
#                     'device': 'gpu',
#                     'gpu_platform_id': 0,
#                     'gpu_device_id': 0,
        'objective': 'binary',
        'metric': 'none',
        'is_unbalance': True,
        'boosting_type': trial.suggest_categorical('boosting_type', ['goss', 'gbdt', 'dart']),   
        # Hyperparamters (in order of importance decreasing)
        'n_estimators': 3000, # trial.suggest_int('n_estimators', 500, 1500),  # max number of trees in model
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 3e-1),
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True), # L1,  alias: reg_alpha
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True), # L2, alias: reg_lambda
         # decrease to deal with overfit
        'max_depth': trial.suggest_int('max_depth', 4, 10),   # tree max depth 
         # decrease to deal with overfit
        'num_leaves': trial.suggest_int('num_leaves', 4, 128),  # Max number of leaves in one tree
                                                               # should be ~ 2**(max_depth-1)
        'bagging_fraction': None, # Randomly select part of data without 
                                  # resampling if bagging_fraction < 1.0
                                  # alias: subsample
        'feature_fraction': trial.suggest_float('feature_fraction', 0.3, 0.7), # Randomly select a subset of features 
                                                                   # if feature_fraction < 1.0
                                                                   # alias: colsample_bytree
        # decrease to deal with overfit
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 5, 100), # Minimal number of data in one leaf
                                                                           # aliases: min_child_samples, 
#             # decrease to deat with overfit
#             'min_sum_hessian_in_leaf': trial.suggest_float('min_sum_hessian_in_leaf', 1e-4, 1e-1), # Stop trying to split 
#                                                                                                    # leave if sum of it's
#                                                                                                    # hessian less than k
#                                                                                                    # alias: min_child_weight

        # increase for accuracy, decrease to deal with overfit
        'max_bin': trial.suggest_int('max_bin', 32, 255), # Max number of bins that feature values will be bucketed in
        # increase to deal with overfit
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7), # Perform bagging at every k iteration
        'early_stopping_round': 100, 

#           'subsample_for_bin': 200000, # Number of data that sampled to construct feature discrete bins; setting this 
                                     # to larger value will give better training result but may increase train time 
#           'cat_smooth': trial.suggest_float('cat_smooth', 10.0, 100.0),  # this can reduce the effect of noises in 
                                                                       # categorical features, especially for 
                                                                       # categories with few data
        'verbose': -1
    }

    if param['boosting_type'] != 'goss':
        param['bagging_fraction'] = trial.suggest_float('bagging_fraction', 0.3, 0.7)

    bll_list = list()
    
    for i in range(CFG.n_optimize_repeats):
        print(f'Repeat {blu}#{i+1}')

        # Make random under-sampling to balance classes
        positive_count_train = train_df.Class.value_counts()[1]
        sampler = RandomUnderSampler(sampling_strategy={0: positive_count_train, 
                                                        1: positive_count_train}, 
                                     random_state=15062023+i, 
                                     replacement=True)

        X_re, y_re = pd.concat([train_df[features], greeks.iloc[:,1:4]], axis=1), train_df['Class']
        
        if CFG.undersample:
            X_re, y_re = sampler.fit_resample(X_re, y_re)
        
        # Create Stratified Multilabel k-Fold scheme
        kf = MultilabelStratifiedKFold(n_splits=CFG.n_feature_sel_folds, shuffle=True, random_state=10062023+i)

        # Create an oof array for inner loop
        oof = np.zeros(X_re.shape[0])

        # Stratify based on Class and Alpha (3 types of conditions)
        for fold, (train_idx, val_idx) in enumerate(kf.split(X=X_re[features], y=X_re.iloc[:,-3:]), start=1): 
            X, y = X_re[features], y_re
            
            # Split the dataset according to the fold indexes.
            X_train = X.iloc[train_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[train_idx]
            y_val = y.iloc[val_idx]

            dtrain = lgb.Dataset(X_train, label=y_train)
            dvalid = lgb.Dataset(X_val, label=y_val)

            # Add a callback for pruning
#             pruning_callback = optuna.integration.LightGBMPruningCallback(trial, 'balanced_log_loss')

            gbm = lgb.train(
                param, dtrain, valid_sets=[dvalid], # callbacks=[pruning_callback], 
                feval=bll_metric, verbose_eval=0
            )

            val_preds = gbm.predict(X_val)
            oof[val_idx] = val_preds
        bll_list.append(balanced_log_loss(y_re, oof))

    return np.mean(bll_list)
            

if CFG.lgbm_optimize:
#     study = optuna.create_study(pruner=optuna.pruners.MedianPruner(n_warmup_steps=100), direction="minimize")
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=CFG.n_trials)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    df = study.trials_dataframe().sort_values('value')
    df.to_csv(f'optuna_lgbm_{boosting_type}.csv')

[I 2023-06-15 15:20:15,455] A new study created in memory with name: no-name-b4491000-b388-4f73-8d8a-7f27ff9f3a2a


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4
Repeat [1m[34m#5


[I 2023-06-15 15:20:17,853] Trial 0 finished with value: 0.18892005719012475 and parameters: {'boosting_type': 'goss', 'learning_rate': 0.10697234520758851, 'lambda_l1': 0.000396964712338032, 'lambda_l2': 0.5452195131263328, 'max_depth': 10, 'num_leaves': 123, 'feature_fraction': 0.31233003634052664, 'min_data_in_leaf': 18, 'max_bin': 113, 'bagging_freq': 5}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4
Repeat [1m[34m#5


[I 2023-06-15 15:20:59,925] Trial 1 finished with value: 0.32141918764421884 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.01291886257117534, 'lambda_l1': 7.971757418525103e-07, 'lambda_l2': 0.033246695034243774, 'max_depth': 10, 'num_leaves': 26, 'feature_fraction': 0.5919810217300694, 'min_data_in_leaf': 23, 'max_bin': 171, 'bagging_freq': 4, 'bagging_fraction': 0.326440880874238}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4
Repeat [1m[34m#5


[I 2023-06-15 15:21:09,727] Trial 2 finished with value: 0.6973915667428459 and parameters: {'boosting_type': 'dart', 'learning_rate': 0.0038497989967737267, 'lambda_l1': 1.4831537299578065e-08, 'lambda_l2': 0.00013517895378162203, 'max_depth': 6, 'num_leaves': 118, 'feature_fraction': 0.6999791503283905, 'min_data_in_leaf': 62, 'max_bin': 160, 'bagging_freq': 5, 'bagging_fraction': 0.3651051110204697}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4
Repeat [1m[34m#5


[I 2023-06-15 15:21:10,358] Trial 3 finished with value: 0.6973915667428459 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0003423338408344596, 'lambda_l1': 2.6572133497111632e-05, 'lambda_l2': 0.00037613744626938263, 'max_depth': 8, 'num_leaves': 34, 'feature_fraction': 0.671469755767987, 'min_data_in_leaf': 98, 'max_bin': 135, 'bagging_freq': 5, 'bagging_fraction': 0.5710559192067766}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4


[I 2023-06-15 15:21:11,031] Trial 4 finished with value: 0.6860128459689832 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.03170141758707658, 'lambda_l1': 0.81728906623537, 'lambda_l2': 8.493898941768053e-08, 'max_depth': 6, 'num_leaves': 75, 'feature_fraction': 0.6795553548506662, 'min_data_in_leaf': 67, 'max_bin': 153, 'bagging_freq': 4, 'bagging_fraction': 0.6828530143036393}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#5
Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4
Repeat [1m[34m#5


[I 2023-06-15 15:21:11,709] Trial 5 finished with value: 0.6973915667428459 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.00673320783371971, 'lambda_l1': 1.1008453984575852, 'lambda_l2': 9.619429492818879e-05, 'max_depth': 5, 'num_leaves': 42, 'feature_fraction': 0.42632923566071784, 'min_data_in_leaf': 73, 'max_bin': 166, 'bagging_freq': 6, 'bagging_fraction': 0.4485009740200605}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4


[I 2023-06-15 15:21:12,321] Trial 6 finished with value: 0.6973915667428459 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0003150133813620926, 'lambda_l1': 1.3553280015177709, 'lambda_l2': 0.0001623003685249506, 'max_depth': 10, 'num_leaves': 66, 'feature_fraction': 0.49546877945332324, 'min_data_in_leaf': 67, 'max_bin': 111, 'bagging_freq': 6, 'bagging_fraction': 0.3737408995272305}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#5
Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4
Repeat [1m[34m#5


[I 2023-06-15 15:21:12,888] Trial 7 finished with value: 0.6973915667428459 and parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.06292288328578349, 'lambda_l1': 0.26821685232687464, 'lambda_l2': 9.730379881711538e-06, 'max_depth': 5, 'num_leaves': 112, 'feature_fraction': 0.5173379162150782, 'min_data_in_leaf': 78, 'max_bin': 123, 'bagging_freq': 6, 'bagging_fraction': 0.5824817764566992}. Best is trial 0 with value: 0.18892005719012475.


Repeat [1m[34m#1


[W 2023-06-15 15:21:14,316] Trial 8 failed with parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.0006303332783493984, 'lambda_l1': 2.6605054586252357, 'lambda_l2': 1.1945871999575372e-07, 'max_depth': 4, 'num_leaves': 48, 'feature_fraction': 0.32667953679014455, 'min_data_in_leaf': 18, 'max_bin': 238, 'bagging_freq': 6, 'bagging_fraction': 0.35721817408643103} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/alex/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_5861/4055452463.py", line 117, in objective
    gbm = lgb.train(
  File "/home/alex/.local/lib/python3.10/site-packages/lightgbm/engine.py", line 292, in train
    booster.update(fobj=fobj)
  File "/home/alex/.local/lib/python3.10/site-packages/lightgbm/basic.py", line 3021, in update
    _safe_call(_LIB.LGBM_BoosterUpdateOneIter(
KeyboardInterrupt
[W 2023-06-15 15:21:14,316

KeyboardInterrupt: 

# Load LGBM parameters

In [18]:
import glob

param_list = glob.glob("optuna_lgbm*.csv")
models = list()
best_lgbm_params = list()

lgbm_params = pd.DataFrame()

for f in param_list:
    gb_type = [f.split('_')][0][2][:-4]
    tmp = pd.read_csv(f, index_col='Unnamed: 0')
    tmp['params_boosting_type'] = gb_type
    if lgbm_params.shape[0] == 0:
        lgbm_params = tmp
    else:
        lgbm_params = pd.concat([lgbm_params, tmp])
        
lgbm_params = lgbm_params.sort_values('value').head(20)
param_cols = [c for c in lgbm_params.columns if c.startswith('params_')]
lgbm_params = lgbm_params[param_cols]

for idx, row in lgbm_params.iterrows():
    row_dict = {k[7:]: v for k, v in row.items()}
    row_dict['objective'] = 'binary'
    row_dict['metric'] = 'none'
#     row_dict['subsample_for_bin'] = 300000
    row_dict['force_col_wise'] = False
    row_dict['early_stopping_rounds'] = 50
    row_dict['verbose'] = -1
    row_dict['max_bin'] = 255
    row_dict['bagging_freq'] = int(row_dict['bagging_freq'])
#     if row_dict['bagging_fraction'] != row_dict['bagging_fraction']:
#         row_dict['bagging_fraction'] = None
    row_dict['min_data_in_leaf'] = int(row_dict['min_data_in_leaf'])
    row_dict['n_estimators'] = 3000 # int(row_dict['n_estimators'])
    
    row_dict['learning_rate'] = 0.06433232950390658 # float(row_dict['learning_rate'])
    row_dict['num_leaves'] = int(row_dict['num_leaves'])
    row_dict['max_depth'] = int(row_dict['max_depth'])
    row_dict['is_unbalance'] = True
    row_dict['class_weight'] = 'balanced'
    row_dict['verbose'] = -1
    
    best_lgbm_params.append(row_dict)

# LGBM train

In [19]:
def bll_metric(y_true, y_pred):
    return 'balanced_log_loss', balanced_log_loss(y_true, y_pred), False

def lgbm_training():
    models_ = list()
    bll_list = list()
    weights_ = list()
    
    X, y = train_df[features], train_df.Class
#     X, y = generated_features_train, train_df.Class
     
    kf = MultilabelStratifiedKFold(n_splits=CFG.n_stacking_folds, shuffle=True, random_state=8062023+20)
    metric = balanced_log_loss
    eval_results_ = {}     # used to store evaluation results for each fold

    oof_level2 = np.zeros([y.shape[0], len(best_lgbm_params) + 1])
    oof_level2[:, len(best_lgbm_params)] = y

    print(f"Training with {blu}{X.shape[1]}{res} features")

    for fold, (fit_idx, val_idx) in tqdm(enumerate(kf.split(X=train_df, y=greeks.iloc[:,1:3]), start = 1),
                                         total=CFG.n_stacking_folds):
        
        # Split the dataset according to the fold indexes.
        X_train = X.iloc[fit_idx]
        X_val = X.iloc[val_idx]
        y_train = y.iloc[fit_idx]
        y_val = y.iloc[val_idx]

        for i, params in enumerate(best_lgbm_params):
            
            clf = lgb.LGBMClassifier(**params)
            clf.fit(X_train, y_train, eval_set=[(X_val, y_val)], 
                    eval_metric=bll_metric, verbose=-1)
            models_.append(clf)

            val_preds = clf.predict_proba(X_val)[:,1]
            oof_level2[val_idx, i] = val_preds

            val_score = balanced_log_loss(y_val, val_preds)
            best_iter = clf.best_iteration_

            print(clf.best_iteration_)
            
            print(f'Fold: {blu}{fold:>3}{res}| bll_metric: {blu}{val_score:.5f}{res}'
                  f' | Best iteration: {blu}{best_iter:>4}{res}')
        
    return oof_level2, models_

if CFG.stacking:
    oof_level2_lgbm, models_lgbm = lgbm_training()

Training with [1m[34m39[0m features


  0%|          | 0/10 [00:00<?, ?it/s]

488
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.01764[0m | Best iteration: [1m[34m 488[0m
609
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.01908[0m | Best iteration: [1m[34m 609[0m
327
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.04364[0m | Best iteration: [1m[34m 327[0m
457
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.03293[0m | Best iteration: [1m[34m 457[0m
652
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.01907[0m | Best iteration: [1m[34m 652[0m
814
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.02054[0m | Best iteration: [1m[34m 814[0m
664
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.01805[0m | Best iteration: [1m[34m 664[0m
322
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.03712[0m | Best iteration: [1m[34m 322[0m
360
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.04023[0m | Best iteration: [1m[34m 360[0m
449
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.02506[0m | Best iteration: [1m[34m 449[0m


599
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.01536[0m | Best iteration: [1m[34m 599[0m
600
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.01629[0m | Best iteration: [1m[34m 600[0m
285
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.04023[0m | Best iteration: [1m[34m 285[0m
496
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.02648[0m | Best iteration: [1m[34m 496[0m
525
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.02836[0m | Best iteration: [1m[34m 525[0m
430
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.02872[0m | Best iteration: [1m[34m 430[0m
501
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.03545[0m | Best iteration: [1m[34m 501[0m
551
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.02040[0m | Best iteration: [1m[34m 551[0m
566
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.02146[0m | Best iteration: [1m[34m 566[0m
527
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.01772[0m | Best iteration: [1m[34m 527[0m


392
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.12295[0m | Best iteration: [1m[34m 392[0m
219
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.12824[0m | Best iteration: [1m[34m 219[0m
313
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.15311[0m | Best iteration: [1m[34m 313[0m
222
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.15214[0m | Best iteration: [1m[34m 222[0m
281
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.11623[0m | Best iteration: [1m[34m 281[0m
297
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.14079[0m | Best iteration: [1m[34m 297[0m
357
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.13217[0m | Best iteration: [1m[34m 357[0m
242
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.13266[0m | Best iteration: [1m[34m 242[0m
258
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.14634[0m | Best iteration: [1m[34m 258[0m
280
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.15303[0m | Best iteration: [1m[34m 280[0m


298
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.13858[0m | Best iteration: [1m[34m 298[0m
295
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.13821[0m | Best iteration: [1m[34m 295[0m
348
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.13657[0m | Best iteration: [1m[34m 348[0m
344
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.10914[0m | Best iteration: [1m[34m 344[0m
550
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.15125[0m | Best iteration: [1m[34m 550[0m
266
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.14536[0m | Best iteration: [1m[34m 266[0m
310
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.12580[0m | Best iteration: [1m[34m 310[0m
342
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.13585[0m | Best iteration: [1m[34m 342[0m
235
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.15159[0m | Best iteration: [1m[34m 235[0m
258
Fold: [1m[34m  2[0m| bll_metric: [1m[34m0.11874[0m | Best iteration: [1m[34m 258[0m
194
Fold: [1m[34m  3[0m| bl

149
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.18118[0m | Best iteration: [1m[34m 149[0m
94
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.20593[0m | Best iteration: [1m[34m  94[0m
109
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.24782[0m | Best iteration: [1m[34m 109[0m
78
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.25317[0m | Best iteration: [1m[34m  78[0m
128
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.17053[0m | Best iteration: [1m[34m 128[0m
149
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.25523[0m | Best iteration: [1m[34m 149[0m
126
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.18260[0m | Best iteration: [1m[34m 126[0m
150
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.19808[0m | Best iteration: [1m[34m 150[0m
131
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.18611[0m | Best iteration: [1m[34m 131[0m
139
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.21219[0m | Best iteration: [1m[34m 139[0m
143
Fold: [1m[34m  3[0m| bll_

164
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.22907[0m | Best iteration: [1m[34m 164[0m
134
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.20971[0m | Best iteration: [1m[34m 134[0m
130
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.22986[0m | Best iteration: [1m[34m 130[0m
97
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.25375[0m | Best iteration: [1m[34m  97[0m
138
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.22087[0m | Best iteration: [1m[34m 138[0m
141
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.22055[0m | Best iteration: [1m[34m 141[0m
167
Fold: [1m[34m  3[0m| bll_metric: [1m[34m0.17915[0m | Best iteration: [1m[34m 167[0m
81
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.19398[0m | Best iteration: [1m[34m  81[0m
90
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.17947[0m | Best iteration: [1m[34m  90[0m
78
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.21063[0m | Best iteration: [1m[34m  78[0m
103
Fold: [1m[34m  4[0m| bll_me

70
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.21770[0m | Best iteration: [1m[34m  70[0m
92
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.16921[0m | Best iteration: [1m[34m  92[0m
92
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.19149[0m | Best iteration: [1m[34m  92[0m
85
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.20883[0m | Best iteration: [1m[34m  85[0m
69
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.22951[0m | Best iteration: [1m[34m  69[0m
92
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.19189[0m | Best iteration: [1m[34m  92[0m
94
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.17683[0m | Best iteration: [1m[34m  94[0m
92
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.23209[0m | Best iteration: [1m[34m  92[0m
83
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.21783[0m | Best iteration: [1m[34m  83[0m
83
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.18783[0m | Best iteration: [1m[34m  83[0m
92
Fold: [1m[34m  4[0m| bll_metric: 

86
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.20224[0m | Best iteration: [1m[34m  86[0m
77
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.21231[0m | Best iteration: [1m[34m  77[0m
107
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.17723[0m | Best iteration: [1m[34m 107[0m
88
Fold: [1m[34m  4[0m| bll_metric: [1m[34m0.18344[0m | Best iteration: [1m[34m  88[0m
145
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.13738[0m | Best iteration: [1m[34m 145[0m
141
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.15327[0m | Best iteration: [1m[34m 141[0m
271
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.11889[0m | Best iteration: [1m[34m 271[0m
274
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.09733[0m | Best iteration: [1m[34m 274[0m
326
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.08525[0m | Best iteration: [1m[34m 326[0m
277
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.09809[0m | Best iteration: [1m[34m 277[0m
231
Fold: [1m[34m  5[0m| bll_m

208
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.12210[0m | Best iteration: [1m[34m 208[0m
262
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.10688[0m | Best iteration: [1m[34m 262[0m
124
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.14146[0m | Best iteration: [1m[34m 124[0m
245
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.09537[0m | Best iteration: [1m[34m 245[0m
220
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.11131[0m | Best iteration: [1m[34m 220[0m
145
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.13350[0m | Best iteration: [1m[34m 145[0m
291
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.09642[0m | Best iteration: [1m[34m 291[0m
220
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.11984[0m | Best iteration: [1m[34m 220[0m
194
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.14164[0m | Best iteration: [1m[34m 194[0m
142
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.12001[0m | Best iteration: [1m[34m 142[0m
273
Fold: [1m[34m  5[0m| bl

170
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.15547[0m | Best iteration: [1m[34m 170[0m
167
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.19790[0m | Best iteration: [1m[34m 167[0m
127
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.12618[0m | Best iteration: [1m[34m 127[0m
102
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.14161[0m | Best iteration: [1m[34m 102[0m
109
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.13945[0m | Best iteration: [1m[34m 109[0m
151
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.14656[0m | Best iteration: [1m[34m 151[0m
134
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.13093[0m | Best iteration: [1m[34m 134[0m
108
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.13862[0m | Best iteration: [1m[34m 108[0m
152
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.13709[0m | Best iteration: [1m[34m 152[0m
124
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.12736[0m | Best iteration: [1m[34m 124[0m
182
Fold: [1m[34m  6[0m| bl

108
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.16797[0m | Best iteration: [1m[34m 108[0m
121
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.13312[0m | Best iteration: [1m[34m 121[0m
119
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.14253[0m | Best iteration: [1m[34m 119[0m
137
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.15117[0m | Best iteration: [1m[34m 137[0m
126
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.15289[0m | Best iteration: [1m[34m 126[0m
132
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.12517[0m | Best iteration: [1m[34m 132[0m
139
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.11909[0m | Best iteration: [1m[34m 139[0m
111
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.15431[0m | Best iteration: [1m[34m 111[0m
130
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.13887[0m | Best iteration: [1m[34m 130[0m
124
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.12514[0m | Best iteration: [1m[34m 124[0m


141
Fold: [1m[34m  6[0m| bll_metric: [1m[34m0.12617[0m | Best iteration: [1m[34m 141[0m
37
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.46087[0m | Best iteration: [1m[34m  37[0m
181
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.28692[0m | Best iteration: [1m[34m 181[0m
144
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.35949[0m | Best iteration: [1m[34m 144[0m
158
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.34454[0m | Best iteration: [1m[34m 158[0m
116
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.36877[0m | Best iteration: [1m[34m 116[0m
176
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.34144[0m | Best iteration: [1m[34m 176[0m
216
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.31105[0m | Best iteration: [1m[34m 216[0m
200
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.29670[0m | Best iteration: [1m[34m 200[0m
160
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.33171[0m | Best iteration: [1m[34m 160[0m


161
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.33988[0m | Best iteration: [1m[34m 161[0m
168
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.32436[0m | Best iteration: [1m[34m 168[0m
160
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.31898[0m | Best iteration: [1m[34m 160[0m
154
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.33223[0m | Best iteration: [1m[34m 154[0m
197
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.29595[0m | Best iteration: [1m[34m 197[0m
264
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.34188[0m | Best iteration: [1m[34m 264[0m
162
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.35123[0m | Best iteration: [1m[34m 162[0m
127
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.33423[0m | Best iteration: [1m[34m 127[0m
163
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.36987[0m | Best iteration: [1m[34m 163[0m
173
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.34625[0m | Best iteration: [1m[34m 173[0m


144
Fold: [1m[34m  7[0m| bll_metric: [1m[34m0.41473[0m | Best iteration: [1m[34m 144[0m
529
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.03332[0m | Best iteration: [1m[34m 529[0m
631
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.03165[0m | Best iteration: [1m[34m 631[0m
564
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.04658[0m | Best iteration: [1m[34m 564[0m
737
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.03772[0m | Best iteration: [1m[34m 737[0m
458
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.04548[0m | Best iteration: [1m[34m 458[0m
588
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.03021[0m | Best iteration: [1m[34m 588[0m
595
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.02716[0m | Best iteration: [1m[34m 595[0m
355
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.07898[0m | Best iteration: [1m[34m 355[0m
531
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.04010[0m | Best iteration: [1m[34m 531[0m
531
Fold: [1m[34m  8[0m| bl

379
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.04460[0m | Best iteration: [1m[34m 379[0m
733
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.01666[0m | Best iteration: [1m[34m 733[0m
409
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.05763[0m | Best iteration: [1m[34m 409[0m
758
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.01702[0m | Best iteration: [1m[34m 758[0m
494
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.05638[0m | Best iteration: [1m[34m 494[0m
691
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.01703[0m | Best iteration: [1m[34m 691[0m
383
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.05774[0m | Best iteration: [1m[34m 383[0m
755
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.02361[0m | Best iteration: [1m[34m 755[0m
457
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.03850[0m | Best iteration: [1m[34m 457[0m
436
Fold: [1m[34m  8[0m| bll_metric: [1m[34m0.06514[0m | Best iteration: [1m[34m 436[0m
223
Fold: [1m[34m  9[0m| bl

186
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.13942[0m | Best iteration: [1m[34m 186[0m
187
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.07333[0m | Best iteration: [1m[34m 187[0m
203
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.13494[0m | Best iteration: [1m[34m 203[0m
223
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.14812[0m | Best iteration: [1m[34m 223[0m
146
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.15503[0m | Best iteration: [1m[34m 146[0m
125
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.16248[0m | Best iteration: [1m[34m 125[0m
209
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.14215[0m | Best iteration: [1m[34m 209[0m
153
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.12045[0m | Best iteration: [1m[34m 153[0m
129
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.12823[0m | Best iteration: [1m[34m 129[0m
164
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.11593[0m | Best iteration: [1m[34m 164[0m


208
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.15718[0m | Best iteration: [1m[34m 208[0m
145
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.16605[0m | Best iteration: [1m[34m 145[0m
180
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.14485[0m | Best iteration: [1m[34m 180[0m
199
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.10702[0m | Best iteration: [1m[34m 199[0m
194
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.11683[0m | Best iteration: [1m[34m 194[0m
166
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.14777[0m | Best iteration: [1m[34m 166[0m
187
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.11566[0m | Best iteration: [1m[34m 187[0m
170
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.12231[0m | Best iteration: [1m[34m 170[0m
182
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.11744[0m | Best iteration: [1m[34m 182[0m
101
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.42460[0m | Best iteration: [1m[34m 101[0m
70
Fold: [1m[34m 10[0m| bll

115
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.38013[0m | Best iteration: [1m[34m 115[0m
105
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.36513[0m | Best iteration: [1m[34m 105[0m
47
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.42220[0m | Best iteration: [1m[34m  47[0m
69
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.37949[0m | Best iteration: [1m[34m  69[0m
93
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.39910[0m | Best iteration: [1m[34m  93[0m
63
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.34726[0m | Best iteration: [1m[34m  63[0m
52
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.36401[0m | Best iteration: [1m[34m  52[0m
47
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.42219[0m | Best iteration: [1m[34m  47[0m
47
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.42866[0m | Best iteration: [1m[34m  47[0m
35
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.39690[0m | Best iteration: [1m[34m  35[0m


95
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.39734[0m | Best iteration: [1m[34m  95[0m
53
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.37825[0m | Best iteration: [1m[34m  53[0m
53
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.44600[0m | Best iteration: [1m[34m  53[0m
95
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.39733[0m | Best iteration: [1m[34m  95[0m
46
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.41446[0m | Best iteration: [1m[34m  46[0m


# XGBoost Optuna optimization

In [57]:
X, y = train_df[features], train_df.Class

def objective(trial):
    bll_list = list()

    params = {
        "n_estimators": 3000, # trial.suggest_int('n_estimators', 100, 1000, step=100),
        "random_state": 14062023,
        "early_stopping_rounds": 100,
        "verbosity": 0,
        "scale_pos_weight": 4.71,
        "objective": "binary:logistic",
        "eval_metric": "logloss",
        # use exact for small dataset.
        "tree_method": "exact",
        # defines booster, gblinear for linear functions.
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear"]),# "dart"]), 
        # L1 regularization weight.
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        # L2 regularization weight.
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        # sampling ratio for training data.
        "subsample": trial.suggest_float("subsample", 0.4, 1.0),
        # sampling according to each tree.
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
    }

    if params["booster"] in ["gbtree", "dart"]:
        params["learning_rate"] = trial.suggest_float("learning_rate", 1e-3, 0.1, log=True) # alias eta
        # maximum depth of the tree, signifies complexity of the tree.
        params["max_depth"] = trial.suggest_int("max_depth", 3, 10)
        # minimum child weight, larger the term more conservative the tree.
        params["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
        # defines how selective algorithm is.
        params["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        params["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

    if params["booster"] == "dart":
        params["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        params["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        params["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        params["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    for i in range(CFG.n_optimize_repeats):
        print(f'Repeat {blu}#{i+1}')

        # Make random under-sampling to balance classes
        positive_count_train = train_df.Class.value_counts()[1]
        sampler = RandomUnderSampler(sampling_strategy={0: positive_count_train, 
                                                        1: positive_count_train}, 
                                     random_state=15062023+i, 
                                     replacement=True)

        X_re, y_re = pd.concat([train_df[features], greeks.iloc[:,1:4]], axis=1), train_df['Class']
        
        if CFG.undersample:
            X_re, y_re = sampler.fit_resample(X_re, y_re)
        
        # Create Stratified Multilabel k-Fold scheme
        kf = MultilabelStratifiedKFold(n_splits=CFG.n_feature_sel_folds, shuffle=True, random_state=10062023+i)

        # Create an oof array for inner loop
        oof = np.zeros(X_re.shape[0])

        # Stratify based on Class and Alpha (3 types of conditions)
        for fold, (train_idx, val_idx) in enumerate(kf.split(X=X_re[features], y=X_re.iloc[:,-3:]), start=1): 
            X, y = X_re[features], y_re
            
            # Split the dataset according to the fold indexes.
            X_train = X.iloc[train_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[train_idx]
            y_val = y.iloc[val_idx]

            # Learning
            model = xgb.XGBClassifier(**params)
            model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=1000)
            # Predict
            val_preds = model.predict_proba(X_val)[:,1]
            oof[val_idx] = val_preds
        
        bll_list.append(balanced_log_loss(y_re, oof))    
    
    return np.mean(bll_list)

if CFG.xgb_optimize:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=CFG.n_trials, )

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    df = study.trials_dataframe()
    df.sort_values('value').iloc[:, [1] + list(range(5, 14))]
    df.to_csv(f'optuna_xgb.csv')

[I 2023-06-15 15:27:44,387] A new study created in memory with name: no-name-5b706e8c-7411-4867-babf-b8c6cb53825e


Repeat [1m[34m#1
[0]	validation_0-logloss:0.68057
[409]	validation_0-logloss:0.28587
[0]	validation_0-logloss:0.68524
[335]	validation_0-logloss:0.22760
[0]	validation_0-logloss:0.66400
[548]	validation_0-logloss:0.40614
[0]	validation_0-logloss:0.66888
[232]	validation_0-logloss:0.43345
[0]	validation_0-logloss:0.68240
[230]	validation_0-logloss:0.42186
Repeat [1m[34m#2
[0]	validation_0-logloss:0.64133
[434]	validation_0-logloss:0.10903
[0]	validation_0-logloss:0.65529
[231]	validation_0-logloss:0.30404
[0]	validation_0-logloss:0.65559
[639]	validation_0-logloss:0.27892
[0]	validation_0-logloss:0.65559
[128]	validation_0-logloss:0.43786
[0]	validation_0-logloss:0.65433
[562]	validation_0-logloss:0.24229
Repeat [1m[34m#3
[0]	validation_0-logloss:0.67084
[311]	validation_0-logloss:0.29251
[0]	validation_0-logloss:0.68051
[374]	validation_0-logloss:0.24343
[0]	validation_0-logloss:0.66691
[539]	validation_0-logloss:0.14530
[0]	validation_0-logloss:0.68407
[704]	validation_0-logloss

[I 2023-06-15 15:27:49,624] Trial 0 finished with value: 0.286093718239291 and parameters: {'booster': 'gbtree', 'alpha': 0.25623728735464407, 'lambda': 0.00019336903261921336, 'subsample': 0.7112340405466451, 'colsample_bytree': 0.6375939340861976, 'learning_rate': 0.0807215616583345, 'max_depth': 9, 'min_child_weight': 9, 'gamma': 0.204081655787243, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.286093718239291.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.74057
[107]	validation_0-logloss:0.69313
[0]	validation_0-logloss:0.72642
[112]	validation_0-logloss:0.42529
[0]	validation_0-logloss:1.31727
[100]	validation_0-logloss:0.84068
[0]	validation_0-logloss:0.74713
[114]	validation_0-logloss:0.54952
[0]	validation_0-logloss:0.78410
[1000]	validation_0-logloss:0.46838
[1068]	validation_0-logloss:0.46838
Repeat [1m[34m#2
[0]	validation_0-logloss:0.74303
[1000]	validation_0-logloss:0.26996
[2000]	validation_0-logloss:0.26966
[2999]	validation_0-logloss:0.26965
[0]	validation_0-logloss:2.21567
[100]	validation_0-logloss:15.42197
[0]	validation_0-logloss:1.13108
[111]	validation_0-logloss:0.61508
[0]	validation_0-logloss:0.79442
[103]	validation_0-logloss:1.66532
[0]	validation_0-logloss:0.73432
[199]	validation_0-logloss:0.37096
Repeat [1m[34m#3
[0]	validation_0-logloss:0.70926
[111]	validation_0-logloss:0.75246
[0]	validation_0-logloss:1.99752
[100]	validation_0-logloss:20.56262
[0]	validation_

[I 2023-06-15 15:27:53,088] Trial 1 finished with value: 4.608972343530562 and parameters: {'booster': 'gblinear', 'alpha': 0.047803898808096594, 'lambda': 0.000669631281406815, 'subsample': 0.6855484871090611, 'colsample_bytree': 0.490262726529753}. Best is trial 0 with value: 0.286093718239291.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.69230
[1000]	validation_0-logloss:0.35016
[2000]	validation_0-logloss:0.29255
[2577]	validation_0-logloss:0.28466
[0]	validation_0-logloss:0.69216
[1000]	validation_0-logloss:0.26329
[2000]	validation_0-logloss:0.22335
[2999]	validation_0-logloss:0.20838
[0]	validation_0-logloss:0.69027
[480]	validation_0-logloss:0.49304
[0]	validation_0-logloss:0.69102
[1000]	validation_0-logloss:0.38234
[1027]	validation_0-logloss:0.38142
[0]	validation_0-logloss:0.69060
[715]	validation_0-logloss:0.43608
Repeat [1m[34m#2
[0]	validation_0-logloss:0.68850
[1000]	validation_0-logloss:0.14692
[2000]	validation_0-logloss:0.11828
[2801]	validation_0-logloss:0.11251
[0]	validation_0-logloss:0.69055
[1000]	validation_0-logloss:0.26884
[1410]	validation_0-logloss:0.26425
[0]	validation_0-logloss:0.68947
[1000]	validation_0-logloss:0.29634
[1942]	validation_0-logloss:0.27116
[0]	validation_0-logloss:0.68962
[543]	validation_0-logloss:0.43305
[0]	validation_0-log

[I 2023-06-15 15:28:13,438] Trial 2 finished with value: 0.28995669627009757 and parameters: {'booster': 'gbtree', 'alpha': 0.00014317194084270988, 'lambda': 0.001337070033804407, 'subsample': 0.7870181850144897, 'colsample_bytree': 0.5275266305332861, 'learning_rate': 0.006894378212073432, 'max_depth': 9, 'min_child_weight': 9, 'gamma': 0.06282789438775545, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.286093718239291.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.69139
[1000]	validation_0-logloss:0.27859
[1521]	validation_0-logloss:0.26315
[0]	validation_0-logloss:0.68960
[1000]	validation_0-logloss:0.21464
[1573]	validation_0-logloss:0.20546
[0]	validation_0-logloss:0.68764
[1000]	validation_0-logloss:0.40070
[1176]	validation_0-logloss:0.39484
[0]	validation_0-logloss:0.68815
[804]	validation_0-logloss:0.36956
[0]	validation_0-logloss:0.68926
[496]	validation_0-logloss:0.40708
Repeat [1m[34m#2
[0]	validation_0-logloss:0.68353
[1000]	validation_0-logloss:0.10973
[2000]	validation_0-logloss:0.09911
[2016]	validation_0-logloss:0.09920
[0]	validation_0-logloss:0.68644
[642]	validation_0-logloss:0.24882
[0]	validation_0-logloss:0.68548
[836]	validation_0-logloss:0.25632
[0]	validation_0-logloss:0.68666
[317]	validation_0-logloss:0.41780
[0]	validation_0-logloss:0.68657
[1000]	validation_0-logloss:0.23232
[1653]	validation_0-logloss:0.22193
Repeat [1m[34m#3
[0]	validation_0-logloss:0.68964
[1000]	va

[I 2023-06-15 15:28:26,579] Trial 3 finished with value: 0.26574023507112743 and parameters: {'booster': 'gbtree', 'alpha': 1.673446089952982e-06, 'lambda': 5.701035590782378e-08, 'subsample': 0.6791338161995366, 'colsample_bytree': 0.5152795884113237, 'learning_rate': 0.013258564465428795, 'max_depth': 7, 'min_child_weight': 7, 'gamma': 0.1533721918116734, 'grow_policy': 'lossguide'}. Best is trial 3 with value: 0.26574023507112743.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.79388
[167]	validation_0-logloss:17.13552
[0]	validation_0-logloss:0.87575
[101]	validation_0-logloss:20.56262
[0]	validation_0-logloss:1.35010
[105]	validation_0-logloss:20.09529
[0]	validation_0-logloss:1.15553
[122]	validation_0-logloss:17.13552
[0]	validation_0-logloss:1.36460
[102]	validation_0-logloss:17.13552
Repeat [1m[34m#2
[0]	validation_0-logloss:1.18007
[111]	validation_0-logloss:17.58338
[0]	validation_0-logloss:0.98454
[139]	validation_0-logloss:21.41940
[0]	validation_0-logloss:2.31550
[101]	validation_0-logloss:19.70584
[0]	validation_0-logloss:0.85237
[101]	validation_0-logloss:17.13552
[0]	validation_0-logloss:1.73321
[103]	validation_0-logloss:16.27874
Repeat [1m[34m#3
[0]	validation_0-logloss:1.01825
[100]	validation_0-logloss:17.99229
[0]	validation_0-logloss:0.74956
[100]	validation_0-logloss:20.56262
[0]	validation_0-logloss:1.35989
[101]	validation_0-logloss:14.56519
[0]	validation_0-logloss:2.56655
[99]	validati

[I 2023-06-15 15:28:27,923] Trial 4 finished with value: 16.709446123167126 and parameters: {'booster': 'gblinear', 'alpha': 1.1963094133785981e-06, 'lambda': 6.853392598216787e-08, 'subsample': 0.41754568076154747, 'colsample_bytree': 0.4168219763283739}. Best is trial 3 with value: 0.26574023507112743.


Repeat [1m[34m#1
[0]	validation_0-logloss:1.63890
[100]	validation_0-logloss:17.13552
[0]	validation_0-logloss:1.39336
[121]	validation_0-logloss:20.56262
[0]	validation_0-logloss:1.14376
[106]	validation_0-logloss:20.09529
[0]	validation_0-logloss:1.16535
[100]	validation_0-logloss:17.13552
[0]	validation_0-logloss:0.88401
[106]	validation_0-logloss:17.13552
Repeat [1m[34m#2
[0]	validation_0-logloss:0.73125
[114]	validation_0-logloss:17.58338
[0]	validation_0-logloss:1.15840
[100]	validation_0-logloss:15.42197
[0]	validation_0-logloss:2.24978
[99]	validation_0-logloss:17.13552
[0]	validation_0-logloss:0.86189
[103]	validation_0-logloss:17.13552
[0]	validation_0-logloss:1.11490
[100]	validation_0-logloss:16.27874
Repeat [1m[34m#3
[0]	validation_0-logloss:3.95136
[100]	validation_0-logloss:17.99229
[0]	validation_0-logloss:3.12749
[100]	validation_0-logloss:20.56262
[0]	validation_0-logloss:0.74316
[101]	validation_0-logloss:15.42197
[0]	validation_0-logloss:0.84191
[104]	validati

[I 2023-06-15 15:28:29,239] Trial 5 finished with value: 16.16842183069694 and parameters: {'booster': 'gblinear', 'alpha': 6.693911815733384e-07, 'lambda': 4.104082288043595e-05, 'subsample': 0.842077620145698, 'colsample_bytree': 0.9323566099311857}. Best is trial 3 with value: 0.26574023507112743.


Repeat [1m[34m#1
[0]	validation_0-logloss:2.87299
[99]	validation_0-logloss:17.13552
[0]	validation_0-logloss:1.27876
[113]	validation_0-logloss:16.27874
[0]	validation_0-logloss:0.77604
[103]	validation_0-logloss:20.09529
[0]	validation_0-logloss:1.07695
[100]	validation_0-logloss:16.27874
[0]	validation_0-logloss:1.69203
[100]	validation_0-logloss:17.13552
Repeat [1m[34m#2
[0]	validation_0-logloss:0.77464
[102]	validation_0-logloss:17.58338
[0]	validation_0-logloss:1.53980
[103]	validation_0-logloss:21.41940
[0]	validation_0-logloss:1.20451
[101]	validation_0-logloss:19.70584
[0]	validation_0-logloss:0.82821
[102]	validation_0-logloss:17.13552
[0]	validation_0-logloss:0.73039
[99]	validation_0-logloss:16.27874
Repeat [1m[34m#3
[0]	validation_0-logloss:0.79529
[101]	validation_0-logloss:0.97604
[0]	validation_0-logloss:0.83819
[102]	validation_0-logloss:20.56262
[0]	validation_0-logloss:1.06618
[102]	validation_0-logloss:21.41940
[0]	validation_0-logloss:0.70904
[122]	validation

[I 2023-06-15 15:28:30,515] Trial 6 finished with value: 15.578352514354089 and parameters: {'booster': 'gblinear', 'alpha': 7.518918729168488e-05, 'lambda': 5.071698666779992e-08, 'subsample': 0.9940302588186833, 'colsample_bytree': 0.6394134083573966}. Best is trial 3 with value: 0.26574023507112743.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.69166
[1000]	validation_0-logloss:0.24598
[2000]	validation_0-logloss:0.19846
[2999]	validation_0-logloss:0.18277
[0]	validation_0-logloss:0.69079
[1000]	validation_0-logloss:0.23935
[2000]	validation_0-logloss:0.20376
[2999]	validation_0-logloss:0.19170
[0]	validation_0-logloss:0.69166
[1000]	validation_0-logloss:0.35239
[2000]	validation_0-logloss:0.33291
[2369]	validation_0-logloss:0.33017
[0]	validation_0-logloss:0.69080
[1000]	validation_0-logloss:0.30705
[1151]	validation_0-logloss:0.30633
[0]	validation_0-logloss:0.69188
[1000]	validation_0-logloss:0.30683
[1317]	validation_0-logloss:0.30300
Repeat [1m[34m#2
[0]	validation_0-logloss:0.68971
[1000]	validation_0-logloss:0.11210
[2000]	validation_0-logloss:0.08173
[2905]	validation_0-logloss:0.07491
[0]	validation_0-logloss:0.69030
[1000]	validation_0-logloss:0.16449
[1834]	validation_0-logloss:0.14433
[0]	validation_0-logloss:0.69020
[1000]	validation_0-logloss:0.23912
[2000]	validat

[I 2023-06-15 15:28:57,992] Trial 7 finished with value: 0.2161163239742509 and parameters: {'booster': 'gbtree', 'alpha': 0.34168240158341634, 'lambda': 2.633136279415161e-07, 'subsample': 0.6467556845791176, 'colsample_bytree': 0.6305522882983134, 'learning_rate': 0.00460225068312338, 'max_depth': 10, 'min_child_weight': 3, 'gamma': 3.5004421525015634e-06, 'grow_policy': 'depthwise'}. Best is trial 7 with value: 0.2161163239742509.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.67909
[564]	validation_0-logloss:0.18813
[0]	validation_0-logloss:0.68621
[738]	validation_0-logloss:0.18885
[0]	validation_0-logloss:0.67963
[547]	validation_0-logloss:0.31977
[0]	validation_0-logloss:0.67578
[322]	validation_0-logloss:0.29945
[0]	validation_0-logloss:0.68176
[312]	validation_0-logloss:0.33179
Repeat [1m[34m#2
[0]	validation_0-logloss:0.67211
[854]	validation_0-logloss:0.05832
[0]	validation_0-logloss:0.68707
[364]	validation_0-logloss:0.11062
[0]	validation_0-logloss:0.67445
[316]	validation_0-logloss:0.19413
[0]	validation_0-logloss:0.68732
[235]	validation_0-logloss:0.34346
[0]	validation_0-logloss:0.67869
[406]	validation_0-logloss:0.18938
Repeat [1m[34m#3
[0]	validation_0-logloss:0.68349
[285]	validation_0-logloss:0.25390
[0]	validation_0-logloss:0.67788
[974]	validation_0-logloss:0.12447
[0]	validation_0-logloss:0.67825
[1000]	validation_0-logloss:0.11532
[1270]	validation_0-logloss:0.11283
[0]	validation_0-loglo

[I 2023-06-15 15:29:04,867] Trial 8 finished with value: 0.19453448622302355 and parameters: {'booster': 'gbtree', 'alpha': 4.383408941082582e-08, 'lambda': 7.224002743743152e-06, 'subsample': 0.7214192492925153, 'colsample_bytree': 0.6487475085154554, 'learning_rate': 0.03325288085414608, 'max_depth': 3, 'min_child_weight': 2, 'gamma': 1.87597859410113e-06, 'grow_policy': 'lossguide'}. Best is trial 8 with value: 0.19453448622302355.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.69195
[1000]	validation_0-logloss:0.31229
[2000]	validation_0-logloss:0.26761
[2656]	validation_0-logloss:0.26208
[0]	validation_0-logloss:0.69162
[1000]	validation_0-logloss:0.25470
[1915]	validation_0-logloss:0.21886
[0]	validation_0-logloss:0.68969
[472]	validation_0-logloss:0.48283
[0]	validation_0-logloss:0.69084
[1000]	validation_0-logloss:0.37498
[1052]	validation_0-logloss:0.37337
[0]	validation_0-logloss:0.69009
[1000]	validation_0-logloss:0.40883
[1626]	validation_0-logloss:0.40144
Repeat [1m[34m#2
[0]	validation_0-logloss:0.68722
[1000]	validation_0-logloss:0.12938
[2000]	validation_0-logloss:0.10660
[2868]	validation_0-logloss:0.10200
[0]	validation_0-logloss:0.68920
[943]	validation_0-logloss:0.25796
[0]	validation_0-logloss:0.68886
[1000]	validation_0-logloss:0.27779
[2000]	validation_0-logloss:0.25984
[2731]	validation_0-logloss:0.25337
[0]	validation_0-logloss:0.68905
[537]	validation_0-logloss:0.42491
[0]	validation_0-log

[I 2023-06-15 15:29:23,751] Trial 9 finished with value: 0.2807813295226981 and parameters: {'booster': 'gbtree', 'alpha': 2.21761125917675e-06, 'lambda': 4.839015380711274e-07, 'subsample': 0.7331140614158747, 'colsample_bytree': 0.6296714747215986, 'learning_rate': 0.008262126104638789, 'max_depth': 8, 'min_child_weight': 8, 'gamma': 0.00039833279485992603, 'grow_policy': 'lossguide'}. Best is trial 8 with value: 0.19453448622302355.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.69280
[1000]	validation_0-logloss:0.48232
[2000]	validation_0-logloss:0.37538
[2999]	validation_0-logloss:0.30243
[0]	validation_0-logloss:0.69268
[1000]	validation_0-logloss:0.43700
[2000]	validation_0-logloss:0.32790
[2999]	validation_0-logloss:0.27324
[0]	validation_0-logloss:0.69263
[1000]	validation_0-logloss:0.45601
[2000]	validation_0-logloss:0.37881
[2999]	validation_0-logloss:0.34839
[0]	validation_0-logloss:0.69274
[1000]	validation_0-logloss:0.46698
[2000]	validation_0-logloss:0.38270
[2999]	validation_0-logloss:0.34442
[0]	validation_0-logloss:0.69277
[1000]	validation_0-logloss:0.44565
[2000]	validation_0-logloss:0.35538
[2999]	validation_0-logloss:0.31672
Repeat [1m[34m#2
[0]	validation_0-logloss:0.69275
[1000]	validation_0-logloss:0.34750
[2000]	validation_0-logloss:0.21172
[2999]	validation_0-logloss:0.14818
[0]	validation_0-logloss:0.69271
[1000]	validation_0-logloss:0.42524
[2000]	validation_0-logloss:0.27887
[2999]	vali

[I 2023-06-15 15:30:01,892] Trial 10 finished with value: 0.2789750803750953 and parameters: {'booster': 'gbtree', 'alpha': 5.2074748137124503e-08, 'lambda': 0.24977844976744568, 'subsample': 0.5459264243204285, 'colsample_bytree': 0.7900878585863832, 'learning_rate': 0.0011164871539282425, 'max_depth': 3, 'min_child_weight': 2, 'gamma': 1.686644629592159e-08, 'grow_policy': 'lossguide'}. Best is trial 8 with value: 0.19453448622302355.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.67438
[337]	validation_0-logloss:0.20444
[0]	validation_0-logloss:0.67090
[689]	validation_0-logloss:0.17668
[0]	validation_0-logloss:0.67964
[237]	validation_0-logloss:0.32245
[0]	validation_0-logloss:0.67554
[213]	validation_0-logloss:0.32650
[0]	validation_0-logloss:0.67247
[266]	validation_0-logloss:0.30518
Repeat [1m[34m#2
[0]	validation_0-logloss:0.67818
[553]	validation_0-logloss:0.06217
[0]	validation_0-logloss:0.67198
[272]	validation_0-logloss:0.13285
[0]	validation_0-logloss:0.66794
[1000]	validation_0-logloss:0.18055
[1012]	validation_0-logloss:0.17961
[0]	validation_0-logloss:0.68103
[204]	validation_0-logloss:0.34653
[0]	validation_0-logloss:0.66897
[455]	validation_0-logloss:0.18587
Repeat [1m[34m#3
[0]	validation_0-logloss:0.66838
[280]	validation_0-logloss:0.25700
[0]	validation_0-logloss:0.67835
[707]	validation_0-logloss:0.13249
[0]	validation_0-logloss:0.67327
[742]	validation_0-logloss:0.08574
[0]	validation_0-loglo

[I 2023-06-15 15:30:07,971] Trial 11 finished with value: 0.1930995163714154 and parameters: {'booster': 'gbtree', 'alpha': 0.008337045735939824, 'lambda': 1.2773032475076913e-06, 'subsample': 0.5881656713002873, 'colsample_bytree': 0.7463972542037534, 'learning_rate': 0.041082829795820654, 'max_depth': 4, 'min_child_weight': 2, 'gamma': 7.861350185475737e-07, 'grow_policy': 'depthwise'}. Best is trial 11 with value: 0.1930995163714154.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.67527
[371]	validation_0-logloss:0.26118
[0]	validation_0-logloss:0.67525
[464]	validation_0-logloss:0.20159
[0]	validation_0-logloss:0.66892
[547]	validation_0-logloss:0.34287
[0]	validation_0-logloss:0.67557
[224]	validation_0-logloss:0.37245
[0]	validation_0-logloss:0.67872
[402]	validation_0-logloss:0.34761
Repeat [1m[34m#2
[0]	validation_0-logloss:0.66731
[550]	validation_0-logloss:0.08070
[0]	validation_0-logloss:0.67761
[373]	validation_0-logloss:0.20090
[0]	validation_0-logloss:0.67852
[426]	validation_0-logloss:0.20967
[0]	validation_0-logloss:0.67467
[168]	validation_0-logloss:0.38562
[0]	validation_0-logloss:0.67579
[411]	validation_0-logloss:0.20702
Repeat [1m[34m#3
[0]	validation_0-logloss:0.66461
[424]	validation_0-logloss:0.29920
[0]	validation_0-logloss:0.68094
[706]	validation_0-logloss:0.14279
[0]	validation_0-logloss:0.67339
[634]	validation_0-logloss:0.10729
[0]	validation_0-logloss:0.67901
[430]	validation_0-logloss

[I 2023-06-15 15:30:13,254] Trial 12 finished with value: 0.22774611920433596 and parameters: {'booster': 'gbtree', 'alpha': 0.0038773497909091736, 'lambda': 1.0596491399462018e-05, 'subsample': 0.5713432404091563, 'colsample_bytree': 0.7733492716535749, 'learning_rate': 0.050398767666114355, 'max_depth': 3, 'min_child_weight': 4, 'gamma': 1.1088363890597523e-06, 'grow_policy': 'depthwise'}. Best is trial 11 with value: 0.1930995163714154.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.68265
[574]	validation_0-logloss:0.26779
[0]	validation_0-logloss:0.68239
[910]	validation_0-logloss:0.19025
[0]	validation_0-logloss:0.67835
[549]	validation_0-logloss:0.34818
[0]	validation_0-logloss:0.68377
[409]	validation_0-logloss:0.35613
[0]	validation_0-logloss:0.68295
[420]	validation_0-logloss:0.33334
Repeat [1m[34m#2
[0]	validation_0-logloss:0.67285
[854]	validation_0-logloss:0.08571
[0]	validation_0-logloss:0.68229
[365]	validation_0-logloss:0.22362
[0]	validation_0-logloss:0.67518
[1000]	validation_0-logloss:0.22330
[1120]	validation_0-logloss:0.22106
[0]	validation_0-logloss:0.67809
[227]	validation_0-logloss:0.41606
[0]	validation_0-logloss:0.67594
[872]	validation_0-logloss:0.20362
Repeat [1m[34m#3
[0]	validation_0-logloss:0.67704
[635]	validation_0-logloss:0.27499
[0]	validation_0-logloss:0.68247
[935]	validation_0-logloss:0.18215
[0]	validation_0-logloss:0.67608
[746]	validation_0-logloss:0.11535
[0]	validation_0-loglo

[I 2023-06-15 15:30:20,567] Trial 13 finished with value: 0.24072421654345516 and parameters: {'booster': 'gbtree', 'alpha': 1.305902982967035e-08, 'lambda': 2.5172351460072036e-06, 'subsample': 0.5753938269675715, 'colsample_bytree': 0.7623992790871502, 'learning_rate': 0.031632044063630856, 'max_depth': 5, 'min_child_weight': 5, 'gamma': 2.163742885523729e-06, 'grow_policy': 'depthwise'}. Best is trial 11 with value: 0.1930995163714154.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.68310
[456]	validation_0-logloss:0.18281
[0]	validation_0-logloss:0.67702
[948]	validation_0-logloss:0.17564
[0]	validation_0-logloss:0.67811
[206]	validation_0-logloss:0.32507
[0]	validation_0-logloss:0.67605
[296]	validation_0-logloss:0.28860
[0]	validation_0-logloss:0.67681
[226]	validation_0-logloss:0.30176
Repeat [1m[34m#2
[0]	validation_0-logloss:0.66796
[779]	validation_0-logloss:0.05935
[0]	validation_0-logloss:0.67258
[426]	validation_0-logloss:0.08057
[0]	validation_0-logloss:0.67284
[291]	validation_0-logloss:0.20741
[0]	validation_0-logloss:0.68081
[215]	validation_0-logloss:0.33796
[0]	validation_0-logloss:0.67919
[667]	validation_0-logloss:0.19042
Repeat [1m[34m#3
[0]	validation_0-logloss:0.67715
[227]	validation_0-logloss:0.23578
[0]	validation_0-logloss:0.67367
[958]	validation_0-logloss:0.12054
[0]	validation_0-logloss:0.68044
[1000]	validation_0-logloss:0.09029
[1274]	validation_0-logloss:0.08938
[0]	validation_0-loglo

[I 2023-06-15 15:30:27,623] Trial 14 finished with value: 0.19042787401206218 and parameters: {'booster': 'gbtree', 'alpha': 0.0027937466766300503, 'lambda': 2.7138521177750515e-06, 'subsample': 0.8141487090356295, 'colsample_bytree': 0.8810635829162934, 'learning_rate': 0.02919081520857816, 'max_depth': 5, 'min_child_weight': 2, 'gamma': 1.9982059654628238e-08, 'grow_policy': 'depthwise'}. Best is trial 14 with value: 0.19042787401206218.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.64428
[287]	validation_0-logloss:0.19797
[0]	validation_0-logloss:0.64830
[321]	validation_0-logloss:0.20343
[0]	validation_0-logloss:0.65742
[506]	validation_0-logloss:0.30169
[0]	validation_0-logloss:0.65392
[168]	validation_0-logloss:0.32080
[0]	validation_0-logloss:0.65758
[227]	validation_0-logloss:0.33696
Repeat [1m[34m#2
[0]	validation_0-logloss:0.63341
[412]	validation_0-logloss:0.08022
[0]	validation_0-logloss:0.66129
[164]	validation_0-logloss:0.19181
[0]	validation_0-logloss:0.66236
[271]	validation_0-logloss:0.18934
[0]	validation_0-logloss:0.66407
[133]	validation_0-logloss:0.39863
[0]	validation_0-logloss:0.65168
[421]	validation_0-logloss:0.20402
Repeat [1m[34m#3
[0]	validation_0-logloss:0.66310
[200]	validation_0-logloss:0.28374
[0]	validation_0-logloss:0.66695
[613]	validation_0-logloss:0.15821
[0]	validation_0-logloss:0.66852
[395]	validation_0-logloss:0.10681
[0]	validation_0-logloss:0.66030
[391]	validation_0-logloss

[I 2023-06-15 15:30:32,372] Trial 15 finished with value: 0.21633831475070214 and parameters: {'booster': 'gbtree', 'alpha': 0.007856446303269218, 'lambda': 1.0251258377880626e-08, 'subsample': 0.8449816432887808, 'colsample_bytree': 0.8948878104762891, 'learning_rate': 0.08357681560179434, 'max_depth': 5, 'min_child_weight': 5, 'gamma': 1.0891531063019111e-08, 'grow_policy': 'depthwise'}. Best is trial 14 with value: 0.19042787401206218.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.68788
[854]	validation_0-logloss:0.22366
[0]	validation_0-logloss:0.68357
[871]	validation_0-logloss:0.18031
[0]	validation_0-logloss:0.68564
[795]	validation_0-logloss:0.33571
[0]	validation_0-logloss:0.68465
[283]	validation_0-logloss:0.33059
[0]	validation_0-logloss:0.68727
[403]	validation_0-logloss:0.32905
Repeat [1m[34m#2
[0]	validation_0-logloss:0.68311
[858]	validation_0-logloss:0.08189
[0]	validation_0-logloss:0.68446
[412]	validation_0-logloss:0.17314
[0]	validation_0-logloss:0.68716
[827]	validation_0-logloss:0.21017
[0]	validation_0-logloss:0.68321
[258]	validation_0-logloss:0.36261
[0]	validation_0-logloss:0.68643
[717]	validation_0-logloss:0.18939
Repeat [1m[34m#3
[0]	validation_0-logloss:0.68827
[339]	validation_0-logloss:0.28302
[0]	validation_0-logloss:0.68692
[756]	validation_0-logloss:0.17217
[0]	validation_0-logloss:0.68397
[742]	validation_0-logloss:0.12015
[0]	validation_0-logloss:0.68884
[731]	validation_0-logloss

[I 2023-06-15 15:30:40,860] Trial 16 finished with value: 0.2231321558737589 and parameters: {'booster': 'gbtree', 'alpha': 0.00134217561088062, 'lambda': 7.594781053671044e-07, 'subsample': 0.46682263229961274, 'colsample_bytree': 0.9998060445070174, 'learning_rate': 0.02101985988722709, 'max_depth': 5, 'min_child_weight': 3, 'gamma': 6.382277604477745e-08, 'grow_policy': 'depthwise'}. Best is trial 14 with value: 0.19042787401206218.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.68401
[908]	validation_0-logloss:0.16983
[0]	validation_0-logloss:0.68314
[1000]	validation_0-logloss:0.17580
[1144]	validation_0-logloss:0.17813
[0]	validation_0-logloss:0.68552
[322]	validation_0-logloss:0.31791
[0]	validation_0-logloss:0.68593
[423]	validation_0-logloss:0.30274
[0]	validation_0-logloss:0.68416
[393]	validation_0-logloss:0.29132
Repeat [1m[34m#2
[0]	validation_0-logloss:0.68101
[1000]	validation_0-logloss:0.06612
[1015]	validation_0-logloss:0.06600
[0]	validation_0-logloss:0.68680
[567]	validation_0-logloss:0.12690
[0]	validation_0-logloss:0.68531
[588]	validation_0-logloss:0.20309
[0]	validation_0-logloss:0.68593
[308]	validation_0-logloss:0.31558
[0]	validation_0-logloss:0.68298
[868]	validation_0-logloss:0.18350
Repeat [1m[34m#3
[0]	validation_0-logloss:0.68165
[497]	validation_0-logloss:0.24873
[0]	validation_0-logloss:0.68414
[1000]	validation_0-logloss:0.12198
[1196]	validation_0-logloss:0.11968
[0]	validation_0

[I 2023-06-15 15:30:50,453] Trial 17 finished with value: 0.19398809258048424 and parameters: {'booster': 'gbtree', 'alpha': 0.03106931723138533, 'lambda': 1.8805299510504855e-05, 'subsample': 0.6185650458858846, 'colsample_bytree': 0.8278290389972701, 'learning_rate': 0.018595626965791767, 'max_depth': 6, 'min_child_weight': 2, 'gamma': 1.6129693080046337e-07, 'grow_policy': 'depthwise'}. Best is trial 14 with value: 0.19042787401206218.


Repeat [1m[34m#1
[0]	validation_0-logloss:1.36002
[101]	validation_0-logloss:17.13552
[0]	validation_0-logloss:0.76786
[120]	validation_0-logloss:20.56262
[0]	validation_0-logloss:1.04871
[109]	validation_0-logloss:0.84322
[0]	validation_0-logloss:0.72684
[250]	validation_0-logloss:0.43913
[0]	validation_0-logloss:1.17248
[104]	validation_0-logloss:17.13552
Repeat [1m[34m#2
[0]	validation_0-logloss:1.15798
[141]	validation_0-logloss:17.58338
[0]	validation_0-logloss:2.85480
[100]	validation_0-logloss:21.41940
[0]	validation_0-logloss:2.16160
[101]	validation_0-logloss:19.70584
[0]	validation_0-logloss:0.77967
[100]	validation_0-logloss:17.13552
[0]	validation_0-logloss:0.85593
[101]	validation_0-logloss:16.27874
Repeat [1m[34m#3
[0]	validation_0-logloss:0.91164
[104]	validation_0-logloss:17.99229
[0]	validation_0-logloss:0.63189
[115]	validation_0-logloss:20.56262
[0]	validation_0-logloss:0.90536
[101]	validation_0-logloss:15.42197
[0]	validation_0-logloss:1.02583
[109]	validatio

[I 2023-06-15 15:30:51,862] Trial 18 finished with value: 13.230997508388972 and parameters: {'booster': 'gblinear', 'alpha': 0.0004917576090950916, 'lambda': 2.0604867195720236e-06, 'subsample': 0.47268447930073065, 'colsample_bytree': 0.7193344273533461}. Best is trial 14 with value: 0.19042787401206218.


Repeat [1m[34m#1
[0]	validation_0-logloss:0.66288
[1000]	validation_0-logloss:0.21399
[1054]	validation_0-logloss:0.21448
[0]	validation_0-logloss:0.67667
[1000]	validation_0-logloss:0.21456
[1524]	validation_0-logloss:0.21092
[0]	validation_0-logloss:0.67068
[429]	validation_0-logloss:0.37235
[0]	validation_0-logloss:0.66989
[404]	validation_0-logloss:0.31835
[0]	validation_0-logloss:0.67279
[263]	validation_0-logloss:0.35235
Repeat [1m[34m#2
[0]	validation_0-logloss:0.66221
[1000]	validation_0-logloss:0.08273
[1355]	validation_0-logloss:0.08143
[0]	validation_0-logloss:0.68102
[353]	validation_0-logloss:0.16782
[0]	validation_0-logloss:0.68043
[1000]	validation_0-logloss:0.19972
[1065]	validation_0-logloss:0.19994
[0]	validation_0-logloss:0.67902
[166]	validation_0-logloss:0.40022
[0]	validation_0-logloss:0.68016
[1000]	validation_0-logloss:0.18707
[1107]	validation_0-logloss:0.18721
Repeat [1m[34m#3
[0]	validation_0-logloss:0.67903
[942]	validation_0-logloss:0.28831
[0]	valida

[W 2023-06-15 15:31:00,963] Trial 19 failed with parameters: {'booster': 'gbtree', 'alpha': 0.9565953476127818, 'lambda': 0.00012312636091171112, 'subsample': 0.9312375839707614, 'colsample_bytree': 0.8585146363436257, 'learning_rate': 0.0429556507861634, 'max_depth': 4, 'min_child_weight': 6, 'gamma': 1.5425985432141796e-07, 'grow_policy': 'depthwise'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/alex/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_5861/4120141353.py", line 75, in objective
    model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=1000)
  File "/home/alex/.local/lib/python3.10/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/home/alex/.local/lib/python3.10/site-packages/xgboost/sklearn.py", line 1490, in fit
    self._Booster = train(
  File "/home/alex/.local/lib/pytho

KeyboardInterrupt: 

# Load XGBoost parameters

In [21]:
import glob

param_list = glob.glob("optuna_xgb.csv")
models = list()
best_xb_params = list()

xb_params = pd.DataFrame()

for f in param_list:
    tmp = pd.read_csv(f, index_col='Unnamed: 0')
    if xb_params.shape[0] == 0:
        xb_params = tmp
    else:
        xb_params = pd.concat([cb_params, tmp])
        
xb_params = xb_params.sort_values('value').head(10)
param_cols = [c for c in xb_params.columns if c.startswith('params_')]
xb_params = xb_params[param_cols]

for idx, row in xb_params.iterrows():
    row_dict = {k[7:]: v for k, v in row.items()}
    row_dict['n_estimators'] = 3000
    row_dict['random_state'] = 14062023
    row_dict['early_stopping_rounds'] = 100
    row_dict['verbosity'] = 0
    row_dict['scale_pos_weight'] = 4.71
    row_dict['objective'] = "binary:logistic"
    row_dict['eval_metric'] = "logloss"
    row_dict['tree_method'] = "exact"
    row_dict['booster'] = "gbtree"

    if row_dict["booster"] in ["gbtree", "dart"]:
        row_dict["learning_rate"] = float(row_dict['learning_rate'])
        row_dict["max_depth"] = int(row_dict['max_depth'])
        row_dict["min_child_weight"] = float(row_dict['min_child_weight'])
        row_dict["gamma"] = float(row_dict['gamma'])
    else:
        row_dict["learning_rate"] = None
        row_dict["max_depth"] = None
        row_dict["min_child_weight"] = None
        row_dict["gamma"] = None
        row_dict["grow_policy"] = None     

    if row_dict["booster"] == "dart":
        row_dict["rate_drop"] = float(row_dict['rate_drop'])
        row_dict["skip_drop"] = float(row_dict['skip_drop'])
    else:
        row_dict["sample_type"] = None
        row_dict["normalize_type"] = None
        row_dict["rate_drop"] = None
        row_dict["skip_drop"] = None

    best_xb_params.append(row_dict)

# XGBoost train

In [22]:
def bll_metric(y_true, y_pred):
    return 'balanced_log_loss', balanced_log_loss(y_true, y_pred), False

def xgboost_training():
    models_ = list()
    bll_list = list()
    weights_ = list()
    
    X, y = train_df[features], train_df.Class
#     X, y = generated_features_train, train_df.Class
     
    kf = MultilabelStratifiedKFold(n_splits=CFG.n_stacking_folds, shuffle=True, random_state=8062023+20)
    metric = balanced_log_loss
    eval_results_ = {}     # used to store evaluation results for each fold

    oof_level2 = np.zeros([y.shape[0], len(best_xb_params) + 1])
    oof_level2[:, len(best_xb_params)] = y

    print(f"Training with {blu}{X.shape[1]}{res} features")

    for fold, (fit_idx, val_idx) in tqdm(enumerate(kf.split(X=train_df, y=greeks.iloc[:,1:3]), start = 1),
                                         total=CFG.n_stacking_folds):
        
        # Split the dataset according to the fold indexes.
        X_train = X.iloc[fit_idx]
        X_val = X.iloc[val_idx]
        y_train = y.iloc[fit_idx]
        y_val = y.iloc[val_idx]

        for i, params in enumerate(best_xb_params):
            
            clf = xgb.XGBClassifier(**params)
            
            clf.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=500)
            models_.append(clf)

            val_preds = clf.predict_proba(X_val)[:,1]
            oof_level2[val_idx, i] = val_preds

            val_score = balanced_log_loss(y_val, val_preds)
            best_iter = 0

#             print(clf.best_iteration_)
            
            print(f'Fold: {blu}{fold:>3}{res}| bll_metric: {blu}{val_score:.5f}{res}'
                  f' | Best iteration: {blu}{best_iter:>4}{res}')
        
    return oof_level2, models_

if CFG.stacking:
    oof_level2_xgb, models_xgb = xgboost_training()

Training with [1m[34m39[0m features


  0%|          | 0/10 [00:00<?, ?it/s]

[0]	validation_0-logloss:0.65601
[500]	validation_0-logloss:0.05221
[531]	validation_0-logloss:0.05197
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.07028[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66491
[500]	validation_0-logloss:0.07772
[735]	validation_0-logloss:0.07377
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.09962[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.64909
[500]	validation_0-logloss:0.08023
[510]	validation_0-logloss:0.08030
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.10457[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66341
[500]	validation_0-logloss:0.07050
[649]	validation_0-logloss:0.07042
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.09266[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.65339
[500]	validation_0-logloss:0.07362
[508]	validation_0-logloss:0.07433
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.08669[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-log

[646]	validation_0-logloss:0.08729
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.10013[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66566
[500]	validation_0-logloss:0.10372
[720]	validation_0-logloss:0.10107
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.10159[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66404
[500]	validation_0-logloss:0.08916
[656]	validation_0-logloss:0.08924
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.09322[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66765
[500]	validation_0-logloss:0.08458
[655]	validation_0-logloss:0.08460
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.09224[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66462
[500]	validation_0-logloss:0.09470
[937]	validation_0-logloss:0.08926
Fold: [1m[34m  5[0m| bll_metric: [1m[34m0.10344[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.65429
[385]	validation_0-logloss:0.13522
Fold: [1m[34m  6[

[232]	validation_0-logloss:0.19921
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.57993[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.65842
[262]	validation_0-logloss:0.18848
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.59030[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66117
[368]	validation_0-logloss:0.17645
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.58525[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66031
[261]	validation_0-logloss:0.19464
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.61616[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66443
[375]	validation_0-logloss:0.20335
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.67076[0m | Best iteration: [1m[34m   0[0m
[0]	validation_0-logloss:0.66100
[260]	validation_0-logloss:0.19775
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.61978[0m | Best iteration: [1m[34m   0[0m


# CatBoost Optuna optimization

In [67]:
from optuna.integration import CatBoostPruningCallback

X, y = train_df[features], train_df.Class

def objective(trial):
    
    bll_list = list()

    # Parameters
    params = {
        'task_type': 'CPU', # GPU
        'auto_class_weights': 'Balanced',
        'boosting_type': trial.suggest_categorical('boosting_type', ['Ordered', 'Plain']),
        'bootstrap_type': trial.suggest_categorical('bootstrap_type', ['Bayesian', 'Bernoulli']),
        'eval_metric': 'Logloss',
        'loss_function': 'Logloss', 
        'random_seed': 10062023,
        'od_type': 'Iter', # Type of overfitting detector - stop after k iteraions
        'od_wait': 100, # Overfitting detector - stop training after k iterations without metric improvement
#             'metric_period': 100, # Show metric each k iterations
        'grow_policy': trial.suggest_categorical('grow_policy', ['SymmetricTree', 'Depthwise', 'Lossguide']),
         # Hyperparamters (in order of importance decreasing)
        'iterations' : 5000, # trial.suggest_int('iterations', 300, 1200),        
        'learning_rate' : trial.suggest_loguniform('learning_rate', 1e-3, 3e-1),    
        'l2_leaf_reg': trial.suggest_loguniform("l2_leaf_reg", 1e-8, 100),
        'depth' : trial.suggest_int('depth', 4, 10),  # Max tree depth         
         # decrease to deal with overfit
        'subsample': trial.suggest_float('subsample', 0.3, 1), # randomly select part of data without return
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.3, 1), # the percentage of features to use at each 
                                                                               # split selection
                                                                               # alias: rsm
                                                                               # not supported in GPU mode
         # decrease to deal with overfit
         'max_leaves': trial.suggest_int('max_leaves', 4, 128), # Max number of leaves in one tree                                                 
         # increase to deal with overfit
        'random_strength': trial.suggest_float('random_strength', 0, 100), # The amount of randomness to use 
                                                                           # for scoring splits when the tree structure
                                                                           # is selected. Helps to avoid overfitting
        'bagging_temperature' : trial.suggest_float('bagging_temperature', 0, 100),     # Assigns random 
                                                                                        # weights to objects
                                                                                        # works only with Bayesian bootstrap
        # this feature value can be increased to 1024 for important features:
        # per_float_feature_quantization='0:border_count=1024'
        'border_count': 254, # trial.suggest_categorical('border_count', [128, 254]), # The number of splits for numerical features
                                                                                      # bigger is better but slowly
                                                                                      # alias: max_bin
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 5, 100), # Minimal number of data in one leaf
                                                                           # aliases: min_child_samples, 

    }

    if params['grow_policy'] != 'SymmetricTree': 
        params['boosting_type'] = 'Plain'

    if params['grow_policy'] != 'Lossguide': 
        params['max_leaves'] = None
        
    if params['bootstrap_type'] != 'Bayesian':
        params['bagging_temperature'] = None
    else:
        params['subsample'] = None

#         if params['bootstrap_type'] == 'Bayesian':
#             params['subsample'] = None
#         else:
#             params['subsample'] = trial.suggest_float('subsample', 0.3, 1)

    for i in range(CFG.n_optimize_repeats):
        print(f'Repeat {blu}#{i+1}')

        # Make random under-sampling to balance classes
        positive_count_train = train_df.Class.value_counts()[1]
        sampler = RandomUnderSampler(sampling_strategy={0: positive_count_train, 
                                                        1: positive_count_train}, 
                                     random_state=15062023+i, 
                                     replacement=True)

        X_re, y_re = pd.concat([train_df[features], greeks.iloc[:,1:4]], axis=1), train_df['Class']
        
        if CFG.undersample:
            X_re, y_re = sampler.fit_resample(X_re, y_re)
        
        # Create Stratified Multilabel k-Fold scheme
        kf = MultilabelStratifiedKFold(n_splits=CFG.n_feature_sel_folds, shuffle=True, random_state=10062023+i)

        # Create an oof array for inner loop
        oof = np.zeros(X_re.shape[0])

        # Stratify based on Class and Alpha (3 types of conditions)
        for fold, (train_idx, val_idx) in enumerate(kf.split(X=X_re[features], y=X_re.iloc[:,-3:]), start=1): 
            X, y = X_re[features], y_re
            
            # Split the dataset according to the fold indexes.
            X_train = X.iloc[train_idx]
            X_val = X.iloc[val_idx]
            y_train = y.iloc[train_idx]
            y_val = y.iloc[val_idx]

            train_pool = Pool(X_train, y_train, cat_features=['EJ'])
            val_pool = Pool(X_val, y_val, cat_features=['EJ'])

            # Learning
            model = cat.CatBoostClassifier(**params)     
            # Add a callback for pruning
#             pruning_callback = optuna.integration.CatBoostPruningCallback(trial, "Logloss")
            model.fit(train_pool, eval_set=val_pool, verbose=0)#, callbacks=[pruning_callback])
            # Evoke pruning manually
#                 pruning_callback.check_pruned()
            # Predict
            val_preds = model.predict_proba(val_pool)[:,1]
            oof[val_idx] = val_preds
        
        bll_list.append(balanced_log_loss(y_re, oof))    
    
    return np.mean(bll_list)

if CFG.cb_optimize:
#     study = optuna.create_study(pruner=optuna.pruners.MedianPruner(n_warmup_steps=100), direction="minimize")
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=CFG.n_trials)

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    df = study.trials_dataframe()
    df.sort_values('value').iloc[:, [1] + list(range(5, 14))]
    df.to_csv(f'optuna_catboost_fold_.csv')

[I 2023-06-15 15:47:25,150] A new study created in memory with name: no-name-f3c5fd83-32bb-46e0-92c1-970d9dcf1e13


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3
Repeat [1m[34m#4
Repeat [1m[34m#5


[I 2023-06-15 15:48:08,476] Trial 0 finished with value: 0.17049209249813652 and parameters: {'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'grow_policy': 'Depthwise', 'learning_rate': 0.006043094362183259, 'l2_leaf_reg': 5.2665170889586595e-08, 'depth': 4, 'subsample': 0.4264901925342959, 'colsample_bylevel': 0.759570287866387, 'max_leaves': 87, 'random_strength': 34.15720934838517, 'bagging_temperature': 66.23907022233848, 'min_data_in_leaf': 34}. Best is trial 0 with value: 0.17049209249813652.


Repeat [1m[34m#1
Repeat [1m[34m#2
Repeat [1m[34m#3


[W 2023-06-15 15:48:26,409] Trial 1 failed with parameters: {'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'grow_policy': 'Depthwise', 'learning_rate': 0.008459153699234497, 'l2_leaf_reg': 3.275667642123539e-05, 'depth': 6, 'subsample': 0.8027356041258045, 'colsample_bylevel': 0.4943883271450967, 'max_leaves': 97, 'random_strength': 84.78125060785999, 'bagging_temperature': 13.006780327558431, 'min_data_in_leaf': 46} because of the following error: KeyboardInterrupt('').
Traceback (most recent call last):
  File "/home/alex/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_5861/708397832.py", line 104, in objective
    model.fit(train_pool, eval_set=val_pool, verbose=0)#, callbacks=[pruning_callback])
  File "/home/alex/.local/lib/python3.10/site-packages/catboost/core.py", line 5131, in fit
    self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, N

KeyboardInterrupt: 

# Load CatBoost parameters

In [24]:
import glob

param_list = glob.glob("optuna_catboost*.csv")
models = list()
best_cb_params = list()

cb_params = pd.DataFrame()

for f in param_list:
    tmp = pd.read_csv(f, index_col='Unnamed: 0')
    if cb_params.shape[0] == 0:
        cb_params = tmp
    else:
        cb_params = pd.concat([cb_params, tmp])
        
cb_params = cb_params.sort_values('value').head(10)
param_cols = [c for c in cb_params.columns if c.startswith('params_')]
cb_params = cb_params[param_cols]


for idx, row in cb_params.iterrows():
    row_dict = {k[7:]: v for k, v in row.items()}
    row_dict['task_type'] = 'CPU'
    row_dict['auto_class_weights'] = 'Balanced'
    row_dict['eval_metric'] = 'Logloss'
    row_dict['loss_function'] = 'Logloss'
    row_dict['random_seed'] = 13062023
    row_dict['verbose'] = 0
    row_dict['od_type'] = 'Iter'
    row_dict['od_wait'] = 100
    row_dict['border_count'] = 254
    row_dict['iterations'] = 10000
    row_dict['bagging_temperature'] = float(row_dict['bagging_temperature'])
    row_dict['subsample'] = 0.7 # float(row_dict['subsample'])
    row_dict['learning_rate'] = float(row_dict['learning_rate'])
    row_dict['l2_leaf_reg'] = float(row_dict['l2_leaf_reg'])
    row_dict['depth'] = int(row_dict['depth'])
    row_dict['random_strength'] = float(row_dict['random_strength'])
    row_dict['min_data_in_leaf'] = int(row_dict['min_data_in_leaf'])
    
    if row_dict['grow_policy'] == 'Lossguide':
        row_dict['max_leaves'] = int(row_dict['max_leaves'])
    else:
        del row_dict['max_leaves']
    
    if row_dict['grow_policy'] == 'SymmetricTree':
        row_dict['boosting_type'] = 'Plain'
    else:
        del row_dict['boosting_type']
        
    if row_dict['task_type'] == 'CPU':
        row_dict['colsample_bylevel'] = 0.6 # float(row_dict['colsample_bylevel'])
    else:
        del row_dict['colsample_bylevel']
    
    best_cb_params.append(row_dict)

# CatBoost train

In [25]:
def bll_metric(y_true, y_pred):
    return 'balanced_log_loss', balanced_log_loss(y_true, y_pred), False

def cb_training():
    models_ = list()
    bll_list = list()
    weights_ = list()
    
    X, y = train_df[features], train_df.Class
#     X, y = generated_features_train, train_df.Class
     
    kf = MultilabelStratifiedKFold(n_splits=CFG.n_stacking_folds, shuffle=True, random_state=8062023+20)
    metric = balanced_log_loss
    eval_results_ = {}     # used to store evaluation results for each fold

    oof_level2 = np.zeros([y.shape[0], len(best_cb_params) + 1])
    oof_level2[:, len(best_cb_params)] = y

    print(f"Training with {blu}{X.shape[1]}{res} features")

    for fold, (fit_idx, val_idx) in tqdm(enumerate(kf.split(X=train_df, y=greeks.iloc[:,1:3]), start = 1),
                                         total=CFG.n_stacking_folds):
        
        # Split the dataset according to the fold indexes.
        X_train = X.iloc[fit_idx]
        X_val = X.iloc[val_idx]
        y_train = y.iloc[fit_idx]
        y_val = y.iloc[val_idx]

        train_pool = Pool(X_train, y_train, cat_features=['EJ'])
        val_pool = Pool(X_val, y_val, cat_features=['EJ'])
        
        for i, params in enumerate(best_cb_params):
            
            model = cat.CatBoostClassifier(**params)
            model.fit(train_pool, eval_set=val_pool, verbose=0)
            models_.append(model)

            val_preds = model.predict_proba(val_pool)[:,1]
            oof_level2[val_idx, i] = val_preds

            val_score = balanced_log_loss(y_val, val_preds)
            best_iter = model.best_iteration_

            print(model.best_iteration_)
            
            print(f'Fold: {blu}{fold:>3}{res}| bll_metric: {blu}{val_score:.5f}{res}'
                  f' | Best iteration: {blu}{best_iter:>4}{res}')
        
    return oof_level2, models_

if CFG.stacking:
    oof_level2_cb, models_cb = cb_training()

Training with [1m[34m39[0m features


  0%|          | 0/10 [00:00<?, ?it/s]

6551
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.07625[0m | Best iteration: [1m[34m6551[0m
9992
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.07660[0m | Best iteration: [1m[34m9992[0m
6238
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.10452[0m | Best iteration: [1m[34m6238[0m
3594
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.09652[0m | Best iteration: [1m[34m3594[0m
9994
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.07382[0m | Best iteration: [1m[34m9994[0m
6527
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.09270[0m | Best iteration: [1m[34m6527[0m
2277
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.09004[0m | Best iteration: [1m[34m2277[0m
1763
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.09397[0m | Best iteration: [1m[34m1763[0m
5158
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.09341[0m | Best iteration: [1m[34m5158[0m
5944
Fold: [1m[34m  1[0m| bll_metric: [1m[34m0.08718[0m | Best iteration: [1m[34m5944[0m
4727
Fold: [1m[34m

1151
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.23804[0m | Best iteration: [1m[34m1151[0m
1645
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.24161[0m | Best iteration: [1m[34m1645[0m
787
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.22050[0m | Best iteration: [1m[34m 787[0m
827
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.19958[0m | Best iteration: [1m[34m 827[0m
1544
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.23594[0m | Best iteration: [1m[34m1544[0m
745
Fold: [1m[34m  9[0m| bll_metric: [1m[34m0.23193[0m | Best iteration: [1m[34m 745[0m
762
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.47249[0m | Best iteration: [1m[34m 762[0m
905
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.45249[0m | Best iteration: [1m[34m 905[0m
919
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.46607[0m | Best iteration: [1m[34m 919[0m
782
Fold: [1m[34m 10[0m| bll_metric: [1m[34m0.45930[0m | Best iteration: [1m[34m 782[0m
870
Fold: [1m[34m 10[0m|

# Stacking with Logistic Regression

In [36]:
from sklearn.linear_model import LogisticRegression

oof_level2 = np.concatenate([oof_level2_lgbm[:,:-1] , oof_level2_cb[:,:-1], oof_level2_xgb[:,:-1]], axis=1)
X = oof_level2
y = oof_level2_lgbm[:,-1]

# mean bll
print(balanced_log_loss(y, np.mean(X, axis=1)))

lr = LogisticRegression(class_weight='balanced')
lr.fit(X, y)

pred = lr.predict_proba(X)[:,1]

# lr bll
print(balanced_log_loss(y, pred))

weights = lr.coef_[0]

0.18570262853231068
0.15867122888336638


In [38]:
len(models_cb)

100

In [27]:
# 0.16616213477809366
# 0.162813150781854

# Models evaluation

In [28]:
# ## Model Evaluation
# metric_score_folds = pd.DataFrame.from_dict(all_eval_results_)
# fit_logloss = []
# val_logloss = []

# for seed in CFG.seeds:
#     for fold in range(1,CFG.n_folds+1):
#         fit_logloss.append(metric_score_folds[seed][fold]['training']['balanced_log_loss'])
#         val_logloss.append(metric_score_folds[seed][fold]['valid_1']['balanced_log_loss'])

# fig, axes = plt.subplots(math.ceil(CFG.n_folds*len(CFG.seeds)/CFG.n_folds), CFG.n_folds, figsize=(20, 20), dpi=150)
# ax = axes.flatten()
# for i, (f, v, m) in enumerate(zip(fit_logloss, val_logloss, models_), start = 1): 
#     sns.lineplot(f, color='#B90000', ax=ax[i-1], label='fit')
#     sns.lineplot(v, color='#048BA8', ax=ax[i-1], label='val')
#     ax[i-1].legend()
#     ax[i-1].spines['top'].set_visible(False);
#     ax[i-1].spines['right'].set_visible(False)
#     ax[i-1].set_title(f'Seed {CFG.seeds[(i-1)//CFG.n_folds]} Fold {CFG.n_folds if i%CFG.n_folds==0 else i%CFG.n_folds}', fontdict={'fontweight': 'bold'})

#     color =  ['#048BA8', palette[-3]]
#     best_iter = m.best_iteration_
#     span_range = [[0, best_iter], [best_iter + 10, best_iter + CFG.num_boost_round]]

#     for idx, sub_title in enumerate([f'Best\nIteration: {best_iter}', f'Early\n Stopping: 2000']):
#         ax[i-1].annotate(sub_title,
#                     xy=(sum(span_range[idx])/2 , 0.5),
#                     xytext=(0,0), textcoords='offset points',
#                     va="center", ha="center",
#                     color="w", fontsize=16, fontweight='bold',
#                     bbox=dict(boxstyle='round4', pad=0.4, color=color[idx], alpha=0.6))
#         ax[i-1].axvspan(span_range[idx][0]-0.4,span_range[idx][1]+0.4,  color=color[idx], alpha=0.07)

#     ax[i-1].set_xlim(0, best_iter + 20 + 2000)
#     ax[i-1].legend(bbox_to_anchor=(0.95, 1), loc='upper right', title='logloss')

# plt.tight_layout();

# Predict test

In [29]:
models = models_lgbm # + models_cb

def predict(X):
    y = np.zeros(len(X))
    for i, model in enumerate(models):
#         y += weights[i] * model.predict_proba(X)[:,1]
        y += model.predict_proba(X)[:,1]
#     return y / sum(weights)
    return y / len(models)

predictions = predict(test_df[features])
# predictions = predict(generated_features_test)

test_df['class_1'] = predictions
test_df['class_0'] = 1 - predictions

sample_submission[['class_0', 'class_1']] = test_df[['class_0', 'class_1']]
sample_submission.to_csv(r"submission.csv", index=False)
sample_submission

Unnamed: 0,Id,class_0,class_1
0,00eed32682bb,0.696636,0.303364
1,010ebe33f668,0.696636,0.303364
2,02fa521e1838,0.696636,0.303364
3,040e15f562a2,0.696636,0.303364
4,046e85c7cc7f,0.696636,0.303364


You have a lot of resulting features. I have already identified a few important once. 