### This notebook describes how to structure a project.

During the competition there were many tasks to do, plan, research, test, tune, accept disappointment of failed tests, and be happy with a little improvement. 

Therefore, the workspace and the files of the project should be structured in a flexible way with less repeated code. In other words, to split code from data/configuration to save time and reduce errors/bugs.

Therefore, let us first define the main entities in the project.
There are four main entities and usually these are the same in all projects: **experiment**, **model**, **level**, and **stack**. These will be modeled by classes as described in this notebook.


*This is a draft work, and will be improved regularly.*


In [1]:
# Familiar imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

import os
import gc
import glob
import random
from datetime import datetime
from pathlib import Path



# helpers
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, OneHotEncoder, PowerTransformer, StandardScaler, \
                                  MinMaxScaler, RobustScaler, PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, KFold, cross_val_score, StratifiedKFold
from sklearn.pipeline import make_pipeline, Pipeline

# memory stuff
from fail_safe_parallel_memory_reduction import Reducer


# Models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble     import HistGradientBoostingClassifier

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# base
from sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin, clone

# scoring
from sklearn import metrics

In [2]:
# notebook options
pd.set_option("display.max_columns", 100)
path = "../input/tabular-playground-series-sep-2021/"
train_file = "train.csv"
test_file = "test.csv"

In [3]:
# Load the training data
train = pd.read_csv(f'{path}{os.sep}{train_file}', index_col=0)
test = pd.read_csv(f'{path}{os.sep}{test_file}', index_col=0)

In [4]:
def reduce_mem_usage(dataframe):
    m_start = dataframe.memory_usage().sum() / 1024 ** 2
    for col in dataframe.columns:
        col_type = dataframe[col].dtype
        if col_type != object:
            c_min = dataframe[col].min()
            c_max = dataframe[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    dataframe[col] = dataframe[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    dataframe[col] = dataframe[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    dataframe[col] = dataframe[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    dataframe[col] = dataframe[col].astype(np.int64)
            elif str(col_type)[:5] == 'float':
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    dataframe[col] = dataframe[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    dataframe[col] = dataframe[col].astype(np.float32)
                else:
                    dataframe[col] = dataframe[col].astype(np.float64)

    m_end = dataframe.memory_usage().sum() / 1024 ** 2
    return dataframe

In [5]:
print(f'Train set size {train.memory_usage(index=False).sum()/(2**30)}')
print(f'Test set size {test.memory_usage(index=False).sum()/(2**30)}')

Train set size 0.8493092730641365
Test set size 0.4338468015193939


In [6]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)
gc.collect()
print(f'Train set size {train.memory_usage(index=False).sum()/(2**30)}')
print(f'Test set size {test.memory_usage(index=False).sum()/(2**30)}')

Train set size 0.25247323978692293
Test set size 0.12960254028439522


In [7]:

# did not work for me !
# # n_jobs=4
# # memory reducer
# reducer = Reducer(use_categoricals=False)
# train = reducer.reduce(train,  verbose=True)
# test = reducer.reduce(test)
# gc.collect()


In [8]:
# Separate target from features
y = train['claim']
X = train.drop(['claim'], axis=1)


### Feature Engineering

In [9]:

# identify columns
numerical_cols = list(X.select_dtypes(include=np.number).columns)
non_numeric_cols = list(X.select_dtypes(include=['object', 'bool']).columns)

print(f'We have {len(numerical_cols)} numeric and {len(non_numeric_cols)} non-numeric features')


# work on a copy
X_train = X.copy()
X_test = test.copy()


# all features
features = non_numeric_cols + numerical_cols

# new features
# https://www.kaggle.com/hiro5299834/tps-sep-2021-single-lgbm
X_train['n_missing'] = X_train[features].isna().sum(axis=1)
X_test['n_missing'] = X_test[features].isna().sum(axis=1)

X_train['std'] = X_train[features].std(axis=1)
X_test['std'] = X_test[features].std(axis=1)

#X_train['min'] = X_train[features].min(axis=1)
#X_test['min'] = X_test[features].min(axis=1)

features += ['n_missing', 'std']
#n_missing = X_train['n_missing'].copy()

# imputation
X_train[features] = X_train[features].fillna(X_train[features].mean())
X_test[features] = X_test[features].fillna(X_test[features].mean())

# scaler = RobustScaler()
# X_train[features] = scaler.fit_transform(X_train[features])
# X_test[features] = scaler.transform(X_test[features])


# useful for column transformers 
numerical_ix = X_train.columns.get_indexer(features)
#non_numeric_ix = X_train.columns.get_indexer(non_numeric_cols)

We have 118 numeric and 0 non-numeric features


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


### Model-dev helper functions

These are functions to save and load predictions, they can be wrapped within a class for a better modeling or kept as they are since they are independent of the project setting.


In [10]:

## helper fucntions
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

def to_file(data, output_folder, idxs=None, suffix='.csv'):
    print(data)
    df = pd.DataFrame(data)
    df.to_csv(f'{output_folder}{os.sep}{suffix}', index=True)
        
    
def calc_folds_indexes(X, y, n_folds=5, shuffle=True, sampler=KFold, seeds=[42]):
    """
    Create folds from a dataset X and a target y
    sampler: can be KFold,  StratifiedKFold, or any sampling class  
    
    return a list of dictionaries of {'seed':, 'idxs':[train_idxs, test_idxs]}
    """
    folds_idxs_list = []
    for seed in seeds:
        folds = sampler(n_splits=n_folds, 
                        random_state=seed,
                        shuffle=shuffle)

        folds_idxs_list.append({'seed': seed, 'idxs':list(folds.split(X, y))})
        
    return folds_idxs_list   

def score(y, target, average=False):
    # if y is a list then it will return a list of scores
    # if average is True then it will return the mean of the scores
    
    if type(y) in [list, np.ndarray]:
        scores = []
        for y_i in y:
            scores.append(score_func(y_i, target, **score_func_params))
        if average:
            return np.mean(scores)
        else:
            return scores
        
    return score_func(y, target, **score_func_param)


In [11]:
# initialize things

# seed
seed = 42
seed_everything(seed)

### Forward selection

In [12]:
def HillClimber():
    """ Ensembel multiple OOFs meta-features.
        See https://www.kaggle.com/cdeotte/forward-selection-oof-ensemble-0-942-private
    """
    def __init__(self,
                 resolution,
                 tol,
                 patience, 
                 score=score, 
                 duplicat=False):
        self.resolution = resolution
        self.tol = tol
        self.patience = patience
        self.score = score
        
    def hill_climb(self, X, y, verbose=True):

        best_x_idx = np.argmax(X)
        best_score = X[best_x]
        selected =  [best_x_idx]
        weights = [1] 
        print(f'Initial score from the best model {best_x_idx} is {best_score} ')
        for x in X:
            if not duplicate and x in selected:
                continue       
            # find best weights
            best_alpha = 0
            some_improvement = False
            for i in range(int(resolution * patience)):
                alpha_i = i / resolution
                score_i = alpha_i * x + (1 - alpha_i) * best_score
                
                # significant improvement?
                if score_i > best_score and ((score_i - best_score) >= self.tol): 
                    best_alpha = alpha_i
                    best_score = score_i
                    some_improvement = True
            
            if some_improvement:
                weights.append(best_alpha)
            
        return best_score, selected, weights

### ModelWrapper 
This main role of this class is to avoid coding multiple classes for each model (or model types). We can see that models can actually be categorized into different categories, where some models accept more parameters than the others. For instance xgboost can use an evaluation set to determine the stopping round number, while Lasso does not have such extra parameters.

Thanks to the flexibility of Python and the design of the base models, we can wrap the model and develope a `wrapper` to do what the model should do. In fact, we can easily stretch this class to support sklearn pipelines or any framework we are using. The idea is again, seperate code from data and try to generalize.


In [13]:
class ModelWrapper():
    def __init__(self, 
                 model,
                 name,
                 uses_eval_set=False,
                 fit_params={}):
        
        self.model = model
        self.name = name
        
        self.uses_eval_set = uses_eval_set
        self.fit_params = fit_params # any extra params for the 'fit' function
                
    def fit(self, X, y, eval_set=None):
        if self.uses_eval_set:
            self.model.fit(X, y, eval_set=eval_set, **(self.fit_params))
        else:
            self.model.fit(X, y, **(self.fit_params)) 
        return self
    

    def predict(self, X):
        return self.model.predict(X)

    def predict_proba(self, X):
        return self.model.predict_proba(X)
    
    def clone_me(self, random_state=None):
        wrapper = ModelWrapper(model=clone(self.model), # clone from sklean.base
                               name=self.name, 
                               uses_eval_set=self.uses_eval_set,
                               fit_params=self.fit_params)
        wrapper.name = self.name
        if random_state is not None:
            wrapper.set_random_state(random_state)
        
        return wrapper
    
    def set_random_state(self, random_state):
        if hasattr(self.model, 'random_state'):
            self.model.random_state = random_state
        elif hasattr(self.model, 'random_seed'):
            self.model.random_seed = random_state
            
    def get_random_state(self):
        if hasattr(self.model, 'random_state'):
            return self.model.random_state 
        elif hasattr(self.model, 'random_seed'):
            return self.model.random_seed
    
        

### ModelTrainer
This role of this class is to train a model and calculate the oofs and the test predictions (meta-features). That is, to cross validate.


In [14]:
class ModelTrainer():
    def __init__(self,
                  model: ModelWrapper):
        
        self.model = model
        
    def cross_validate(self,
                  X, y,
                  X_test,
                  folds_idxs,
                  transformer=None,
                  fit_transform_on_test_set=False,
                  verbose=False,
                  use_different_random_states=True, 
                  score_function=metrics.roc_auc_score,
                  score_function_params={}):
        """
        Return the oofs predictions and the meta features (test predictions)
        """
        
        test_predictions = 0
        oof_predictions = np.zeros_like(np.array(y), dtype=np.float64)
        valid_mean_score = [] 
        for fold, (train_ix, valid_ix) in enumerate(folds_idxs): # we are not using spilit here for a better generalization
            X_train, X_valid = X[train_ix], X[valid_ix]
            y_train, y_valid = y[train_ix], y[valid_ix]
                             
            # transform input
            if transformer is not None:
                X_train = transformer.fit_transform(X_train)
                if fit_transform_on_test_set:
                    X_valid = transformer.fit_transform(X_valid)
                    X_test_ = transformer.fit_transform(X_test)
                else:
                    X_test_ = transformer.transform(X_test)
                    X_valid = transformer.transform(X_valid)
            else:
                X_test_ = X_test
                
            # check if we train each fold on differently initialized clone
            if use_different_random_states:
                model = self.model.clone_me(random_state=fold)
            else:
                model = self.model.clone_me()
            
            # fit the model
            if model.uses_eval_set:
                model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_valid, y_valid)])
            else:
                model.fit(X_train, y_train)
                
            ## predictions
            # on the validation set
            valid_predications = model.predict_proba(X_valid)[:, -1]
#             print(valid_predications[1:10, -1])
            score = score_function(y_valid, valid_predications)
            valid_mean_score.append(score)
            #for i, x in enumerate(valid_ix):                
            oof_predictions[valid_ix] = valid_predications
#             print(valid_predications[valid_ix[1:10]])
            
            # on the test set        
            test_predictions += model.predict_proba(X_test_)[:, -1] / len(folds_idxs)
            
            if verbose:
                print('Fold:{} score:{:.4f}'.format(fold + 1, score))
        
        if verbose:
            print('Average score:{:.4f} ({:.4f})'.format(np.mean(valid_mean_score), np.std(valid_mean_score) ))
    
        return oof_predictions, test_predictions

### Level
The level class glues all components in a given layer

In [15]:
class Level():
    def __init__(self,
                level_id,
                models,
                folder,
                transformer,
                n_folds=5,
                seeds=[42],
                frozen=False,
                fit_transform_on_test_set = False,
                use_different_random_states=True):
        
        self.level_id = level_id
        self.models = models
        self.folder = folder
        self.transformer = transformer
        self.n_folds = n_folds
        self.seeds = seeds
        self.frozen = frozen=False
        self.fit_transform_on_test_set = fit_transform_on_test_set
        self.use_different_random_states = use_different_random_states
    
    def create(self, model_zoo):
        """
         Create a level.
         model_zoo: a dictionay of all avialable models.
        """
        self.model_wrappers = []
        
        # get models 
        # if models is set to 'all' use all models
        if self.models[0].lower() == 'all':
            level_models_names = model_zoo.keys()
        else: 
            level_models_names = self.models

        for model_name in level_models_names:
            # get paramaters  
            model = model_zoo[model_name]
            fit_kwargs = model_zoo[model_name]['fit_kwargs']
            app_params = model_zoo[model_name]['app_params']

            model_wrapper = ModelWrapper(model=model['model'], name=model_name)
            if fit_kwargs is not None:
                model_wrapper.fit_params = fit_kwargs
            if app_params is not None:
                model_wrapper.uses_eval_set = app_params['uses_eval_set']
            self.model_wrappers.append(model_wrapper)

#### Level Trainer
Trains all models in a given level.

In [16]:
class LevelTrainer():
    def __init__(self,
                level,
                seeds_folds_idxs_list):
        self.level = level
        self.seeds_folds_idxs_list = seeds_folds_idxs_list
        
    def train(self, X_train, y, X_test, verbose=True, agg_func=None):
        """
        train the level and return the oofs and meta-features for each model in the level.
        If the level has many seeds it will either use the agg_func to combine predictions
        or will just return eveything, it depends on agg_func
        
        agg_func: can be None, np.mean, or any other numpy reduction function
        """

        level_oof_preds, level_test_preds = {}, {}
        for model_wrapper in self.level.model_wrappers:
            if verbose:
                print('-'*30)
                print(f'Model:{model_wrapper.name}')
                print('-'*30)

            # train each model with as many times as the length of folds_idxs_list 
            model_oof_preds, model_test_preds = [], []
            
            for seeds_folds_idxs in self.seeds_folds_idxs_list:
                seed, folds_idxs = seeds_folds_idxs['seed'], seeds_folds_idxs['idxs']
                print('-'*30)
                print(f'Seed:{seed}')
                print('-'*30)
                
                trainer = ModelTrainer(model_wrapper)
                oof_preds, test_preds = trainer.cross_validate(X_train, 
                                                          y,
                                                          X_test,
                                                          transformer=self.level.transformer,
                                                          folds_idxs=folds_idxs,
                                                          verbose=verbose,
                                                          fit_transform_on_test_set=self.level.fit_transform_on_test_set)
                if agg_func is None:
                    level_oof_preds[f'{model_wrapper.name}_seed_{seed}'] =  oof_preds
                    level_test_preds[f'{model_wrapper.name}_seed_{seed}'] =  test_preds
                else: # collect them in order to aggregate them with the agg_func function
                    model_oof_preds.append(oof_preds)
                    model_test_preds.append(test_preds)

          # aggregate the results
        if agg_func is not None:
            level_oof_preds[f'{model_wrapper.name}'] = agg_func(np.column_stack(model_oof_preds))
            level_test_preds[f'{model_wrapper.name}'] = agg_func(np.column_stack(model_test_preds))

        if verbose:
            print('-'*30)

        return pd.DataFrame(level_oof_preds), pd.DataFrame(level_test_preds)

### Experiment 
Since in many cases everything boils down to stacking, the experiment class will handle the organization of the resulted files from the test: test and oofs predictions. Therefore, assuming the project has the following structure with a folder called **experiments** we can save our tests in this folder. This is what this class will do. This class is the entry point for any run (experiment) in the project. It reads the input and the settings and produces the output.

```
    ML30_project
    │   README.md
    │
    └───notebooks
    │   ...
    │
    └───experiments
    │   │   
    │   │
    │   └───experiment_1   
    │   │   level_1_oofs.csv
    │   │   level_1_test.csv
    │   │   level_2_oofs.csv
    │   │   level_2_test.csv
    │   │   ...
    │   │   meta_level_oofs.csv
    │   │   meta_level_test.csv
    │   └───experiment_...
```


>The code that generated the results is important to save too, but that can be done easily by creating a new version of the notebook or copying notebook with the CV_LB results. If we are running it in a local machine without notebooks, we can create a small function to copy the code files to the experiment levels. On other words, to save the code and the results for each experiement for a better look up.

>This class is so important when running notebooks in our computers. Since Kaggle has a nice notebook management system it saves outputs as well.



In [17]:
class Experiment():
    def __init__(self,
                 title,
                 description,
                 stack,
                 model_zoo,
                 main_folder=os.getcwd()):
        
        self.title = title
        self.main_folder = main_folder
        self.stack = stack
        self.model_zoo = model_zoo
        self.description = description
        # create the main folder if it does not exist
        if not os.path.exists(f'{self.main_folder}'):
            os.makedirs(f'{self.main_folder}', exist_ok=True)
        
    def join_folder(self, folder=None):
         """
         Join a folder and output where results will be saved.
         If 'folder' is None, it will create a folder
         with a time stamp.
         """

         # create time stamp and subfolder with the current time stamp
         if folder is not None: # if folder is specified
            self.output_folder = folder 
            # create a folder if does not exit.
            folder_path = f'{self.main_folder}{os.sep}{self.output_folder}'
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)                
         else: # create a folder with the time stamp
            time_stamp = datetime.now().isoformat(' ', 'seconds')
            self.output_folder = self.title + ' ' + time_stamp.replace(':', '-')
            # create and replace if it exits.
            Path(f'{self.main_folder}{os.sep}{self.output_folder}').mkdir(parents=True, exist_ok=True)
    
    
    def run(self, X_train, y, X_test, 
            train_idxs,
            test_idxs,
            verbose=True, store=True):
        
        # run the stack
        for  level_params in self.stack:
            # create all models in the level
            level = Level(**level_params)
            level.create(self.model_zoo)

            print('-'*50)
            print(f'Current Level: {level.level_id}')
            print('-'*50)

            # join the level's output folder
            #self.join_folder(folder=level.folder)

            # create folds indexes for the level
            seeds_folds_idxs_list = calc_folds_indexes(X=X_train,
                                                       y=y,
                                                       n_folds=level.n_folds,
                                                       sampler=StratifiedKFold,
                                                       seeds=level.seeds)

            # train the level
            if not level.frozen:  # escape any trained level   
                level_trainer = LevelTrainer(level=level, 
                                             seeds_folds_idxs_list=seeds_folds_idxs_list)

                level_oof_preds, level_test_preds =  level_trainer.train(X_train=X_train,
                                                                         y=y,
                                                                         X_test=X_test)
                # store predictions?
                if store:
                    # oofs 
                    level_oof_preds.to_csv(f'{self.main_folder}{os.sep}{self.output_folder}{os.sep}{level.level_id}_oofs.csv')
                    # test predictions
                    level_test_preds.to_csv(f'{self.main_folder}{os.sep}{self.output_folder}{os.sep}{level.level_id}_test.csv')
                    
                
                # update train and test 
                X_train, X_test = level_oof_preds.values, level_test_preds.values
            else:
                print('This level is already trained')
                # load saved of this level and raise error
                fold_id = level.n_folds
                folder =f'{self.main_folder}{os.sep}{self.output_folder}'
                
                # new features 
                level_oof_preds = pd.read_csv(f"{folder}{os.sep}*{fold_id}_oofs.csv")
                level_test_preds = pdf.read_csv(f"{folder}{os.sep}*{fold_id}_test.csv")
                
                X_train = level_oof_preds.values
                X_test = level_test_preds.values
                
            if verbose:
                display(level_oof_preds.head(10))
                display(level_test_preds.head(10))
                
        # return the last output from the last level
        return level_test_preds

### Hyperparameters

Here goes the paramaters of each model. These can actually be stored in an external JSON file.


In [18]:
# rf
rf_params = {
    'n_jobs': -1,
    'n_estimators': 100,
    'max_features': 0.2,
    'max_depth': 8,
    'min_samples_leaf': 2
}

hgb_params = {
    'max_bins': 255,
    'max_depth': 31,
    'max_leaf_nodes': 185
}

hgb_params = {'l2_regularization': 0.2734400901671028,
              'learning_rate': 0.4284455920281809, 
              'max_depth': 30}

# lgbm
# https://www.kaggle.com/hiro5299834/tps-sep-2021-single-lgbm
lgb_params_1 = {
    'device' : 'gpu',
    'objective': 'binary',
    'metric': 'auc',
    'n_estimators': 20000,
    'random_state': 42,
    'learning_rate': 5e-3,
    'subsample': 0.6,
    'subsample_freq': 1,
    'colsample_bytree': 0.4,
    'reg_alpha': 10.0,
    'reg_lambda': 1e-1,
    'min_child_weight': 256,
    'min_child_samples': 20,
}

lgb_params_2 = {
    'metric' : 'auc',
    'objective' : 'binary',
    'device_type': 'gpu', 
    'n_estimators': 10000, 
    'learning_rate': 0.12230165751633416, 
    'num_leaves': 1400, 
    'max_depth': 8, 
    'min_child_samples': 300, 
    'reg_alpha': 10, 
    'reg_lambda': 65, 
    'min_split_gain': 5.157818977461183, 
    'subsample': 0.5, 
    'subsample_freq': 1, 
    'colsample_bytree': 0.2,
    'random_state': 42
}


catb_params = {
    'eval_metric' : 'AUC',
    'iterations': 15585, 
    'objective': 'CrossEntropy',
    'bootstrap_type': 'Bernoulli', 
    'od_wait': 1144, 
    'learning_rate': 0.023575206684596582, 
    'reg_lambda': 36.30433203563295, 
    'random_strength': 43.75597655616195, 
    'depth': 7, 
    'min_data_in_leaf': 11, 
    'leaf_estimation_iterations': 1, 
    'subsample': 0.8227911142845009,
    'task_type' : 'GPU',
    'devices' : '0',
    'verbose' : 0
}

xgb_params = {
    'eval_metric': 'auc', 
    'objective': 'binary:logistic', 
    'tree_method': 'gpu_hist', 
    'gpu_id': 0, 
    'predictor': 'gpu_predictor', 
    'n_estimators': 20000, 
    'learning_rate': 0.01063045229441343, 
    'gamma': 0.24652519525750877, 
    'max_depth': 4, 
    'min_child_weight': 366, 
    'subsample': 0.6423040816299684, 
    'colsample_bytree': 0.7751264493218339, 
    'colsample_bylevel': 0.8675692743597421, 
    'lambda': 0, 
    'alpha': 10
}

In [19]:
gc.collect()

42

### These are model/task dependent parameters

In [20]:
# external hyperparamaters

### fit function hyperparamaters
# some models require special paramaters like early stoping in xgboost and lgbm
fit_params = {'early_stopping_rounds': 300,
                  'verbose': 1000}

### application/implementation paramaters
# These paramaters are implementation dependent 
app_params = {'uses_eval_set':True}



### Models

In [21]:
lr = LogisticRegression()
rf = RandomForestClassifier(**rf_params)
hgb = HistGradientBoostingClassifier()
lgb_1 = LGBMClassifier(**lgb_params_1)
lgb_2 = LGBMClassifier(**lgb_params_2)
xgb = XGBClassifier(**xgb_params)
catb = CatBoostClassifier(**catb_params)

In [22]:
# compile all settings in one dictionary, 
# we can store/load it then to a JSON file
model_zoo = {
          'RandomForestClassifier': {"model": rf, "fit_kwargs":None, "app_params": None},
          'LogisticRegression': {"model": lr, "fit_kwargs":None, "app_params": None},
          'HistGradientBoostingClassifier': {"model": hgb, "fit_kwargs":None, "app_params": None},
          'LGBMClassifier-1': {"model": lgb_1, "fit_kwargs":fit_params, "app_params": app_params},
          'LGBMClassifier-2': {"model": lgb_2, "fit_kwargs":fit_params, "app_params": app_params},
          'XGBClassifier': {"model": xgb, "fit_kwargs":fit_params, "app_params": app_params},
          'CatBoostClassifier': {"model": catb, "fit_kwargs":fit_params, "app_params": app_params},
          # we can add any number of models here 
        }

In [23]:
model_zoo.keys()

dict_keys(['RandomForestClassifier', 'LogisticRegression', 'HistGradientBoostingClassifier', 'LGBMClassifier-1', 'LGBMClassifier-2', 'XGBClassifier', 'CatBoostClassifier'])

### Stacking

Here goes the actual stacking procedure. 
   - We first define the architecture, and setup the a session.
   - Define the stack. That is, the models and transformers in the levels

In [24]:
# settings: experiment and stacking architecutre

# initialize the stack to the input
X_train_, X_test_ = X_train, X_test

# any special transformers for any level
#
pipe_line = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler',  RobustScaler())
])

level_1_transformers = [('num', RobustScaler(), numerical_ix)]

level_1_transform = ColumnTransformer(transformers=level_1_transformers)


# define the actual stack
stack = [ {"level_id": "level-1", 
           "models": [
                     'XGBClassifier'
                    ],
            "n_folds": 5,
            "seeds" : [42, 43, 44, 45, 46, 47, 48, 49, 50],
            "folder": "level_1", 
            "transformer": level_1_transform,
            "fit_transform_on_test_set": False,
            "frozen": False # to freeze the level if already trained
            },
            
#          {"level_id": "level-2",
#            "models": [
#                      'CatBoostRegressor',
#                      'XGBRegressor-1',
#                      #'XGBRegressor-2',
#                      'RandomForestRegressor'
#                      #'LinearRegression',
#                      # we can add any model here
#                     ],
#             "n_folds": 5,
#             "seeds" : [46, 47],
#             "folder": "level_2", 
#             "transformer": None, 
#             "frozen": False # to freeze the level if already trained
#             },
         
         # ...
         # we can add any number of levels here
         # ...
         
          {"level_id": "meta_level",
            "models": [#'LinearRegression',
                       'RandomForestClassifier'
                      ],
            "n_folds": 5,
            "seeds" : [42],
            "folder": "meta_level",
            "transformer": None,
            "fit_transform_on_test_set": False,
            "frozen": False
          }
         
         
        ]
         

- Loop through each level in the stack

In [25]:
# create experiment
experiments_folder = "Experiments"
experiment_folder = 'experiement_1' # if None a folder with time stamp will be created
experiment_description = "Simple model, multiple seeds"


ml30_experiment = Experiment(title='ML 30 days',
                             description=experiment_description,
                             stack=stack,
                             model_zoo=model_zoo,
                             main_folder=f'{os.getcwd()}{os.sep}{experiments_folder}')

ml30_experiment.join_folder(experiment_folder)

results = ml30_experiment.run(X_train=X_train_.values,
                     y=y.values, 
                     X_test=X_test_.values,
                     train_idxs = X_train_.index,
                     test_idxs = X_test_.index)



--------------------------------------------------
Current Level: level-1
--------------------------------------------------
------------------------------
Model:XGBClassifier
------------------------------
------------------------------
Seed:42
------------------------------




[0]	validation_0-auc:0.79922	validation_1-auc:0.79867
[1000]	validation_0-auc:0.81754	validation_1-auc:0.81093
[2000]	validation_0-auc:0.82187	validation_1-auc:0.81319
[3000]	validation_0-auc:0.82476	validation_1-auc:0.81430
[4000]	validation_0-auc:0.82720	validation_1-auc:0.81489
[5000]	validation_0-auc:0.82953	validation_1-auc:0.81528
[6000]	validation_0-auc:0.83184	validation_1-auc:0.81558
[7000]	validation_0-auc:0.83416	validation_1-auc:0.81576
[8000]	validation_0-auc:0.83647	validation_1-auc:0.81590
[9000]	validation_0-auc:0.83869	validation_1-auc:0.81602
[10000]	validation_0-auc:0.84090	validation_1-auc:0.81612
[10933]	validation_0-auc:0.84293	validation_1-auc:0.81616
Fold:1 score:0.8162




[0]	validation_0-auc:0.79869	validation_1-auc:0.79876
[1000]	validation_0-auc:0.81749	validation_1-auc:0.81311
[2000]	validation_0-auc:0.82183	validation_1-auc:0.81495
[3000]	validation_0-auc:0.82470	validation_1-auc:0.81577
[4000]	validation_0-auc:0.82725	validation_1-auc:0.81616
[5000]	validation_0-auc:0.82968	validation_1-auc:0.81647
[6000]	validation_0-auc:0.83204	validation_1-auc:0.81672
[7000]	validation_0-auc:0.83437	validation_1-auc:0.81687
[8000]	validation_0-auc:0.83664	validation_1-auc:0.81699
[9000]	validation_0-auc:0.83887	validation_1-auc:0.81711
[9590]	validation_0-auc:0.84018	validation_1-auc:0.81713
Fold:2 score:0.8171




[0]	validation_0-auc:0.79897	validation_1-auc:0.79814
[1000]	validation_0-auc:0.81778	validation_1-auc:0.81141
[2000]	validation_0-auc:0.82206	validation_1-auc:0.81340
[3000]	validation_0-auc:0.82489	validation_1-auc:0.81431
[4000]	validation_0-auc:0.82733	validation_1-auc:0.81492
[5000]	validation_0-auc:0.82975	validation_1-auc:0.81533
[6000]	validation_0-auc:0.83215	validation_1-auc:0.81559
[7000]	validation_0-auc:0.83449	validation_1-auc:0.81576
[8000]	validation_0-auc:0.83676	validation_1-auc:0.81590
[9000]	validation_0-auc:0.83900	validation_1-auc:0.81605
[10000]	validation_0-auc:0.84116	validation_1-auc:0.81614
[11000]	validation_0-auc:0.84333	validation_1-auc:0.81621
[11462]	validation_0-auc:0.84431	validation_1-auc:0.81622
Fold:3 score:0.8162




[0]	validation_0-auc:0.79892	validation_1-auc:0.79842
[1000]	validation_0-auc:0.81774	validation_1-auc:0.81142
[2000]	validation_0-auc:0.82202	validation_1-auc:0.81336
[3000]	validation_0-auc:0.82498	validation_1-auc:0.81430
[4000]	validation_0-auc:0.82749	validation_1-auc:0.81476
[5000]	validation_0-auc:0.82988	validation_1-auc:0.81511
[6000]	validation_0-auc:0.83221	validation_1-auc:0.81538
[7000]	validation_0-auc:0.83453	validation_1-auc:0.81551
[8000]	validation_0-auc:0.83678	validation_1-auc:0.81564
[9000]	validation_0-auc:0.83905	validation_1-auc:0.81571
[10000]	validation_0-auc:0.84123	validation_1-auc:0.81578
[11000]	validation_0-auc:0.84340	validation_1-auc:0.81584
[11599]	validation_0-auc:0.84468	validation_1-auc:0.81586
Fold:4 score:0.8159




[0]	validation_0-auc:0.79751	validation_1-auc:0.79691
[1000]	validation_0-auc:0.81781	validation_1-auc:0.81075
[2000]	validation_0-auc:0.82224	validation_1-auc:0.81268
[3000]	validation_0-auc:0.82510	validation_1-auc:0.81362
[4000]	validation_0-auc:0.82752	validation_1-auc:0.81417
[5000]	validation_0-auc:0.82995	validation_1-auc:0.81452
[6000]	validation_0-auc:0.83225	validation_1-auc:0.81480
[7000]	validation_0-auc:0.83456	validation_1-auc:0.81502
[8000]	validation_0-auc:0.83679	validation_1-auc:0.81518
[9000]	validation_0-auc:0.83903	validation_1-auc:0.81532
[10000]	validation_0-auc:0.84120	validation_1-auc:0.81540
[10906]	validation_0-auc:0.84314	validation_1-auc:0.81544
Fold:5 score:0.8154
Average score:0.8162 (0.0006)
------------------------------
Seed:43
------------------------------




[0]	validation_0-auc:0.79831	validation_1-auc:0.79689
[1000]	validation_0-auc:0.81789	validation_1-auc:0.80987
[2000]	validation_0-auc:0.82224	validation_1-auc:0.81198
[3000]	validation_0-auc:0.82499	validation_1-auc:0.81291
[4000]	validation_0-auc:0.82751	validation_1-auc:0.81354
[5000]	validation_0-auc:0.82988	validation_1-auc:0.81393
[6000]	validation_0-auc:0.83220	validation_1-auc:0.81420
[7000]	validation_0-auc:0.83452	validation_1-auc:0.81439
[8000]	validation_0-auc:0.83680	validation_1-auc:0.81458
[9000]	validation_0-auc:0.83901	validation_1-auc:0.81469
[10000]	validation_0-auc:0.84122	validation_1-auc:0.81475
[11000]	validation_0-auc:0.84336	validation_1-auc:0.81483
[11459]	validation_0-auc:0.84433	validation_1-auc:0.81484
Fold:1 score:0.8148




[0]	validation_0-auc:0.79910	validation_1-auc:0.80015
[1000]	validation_0-auc:0.81739	validation_1-auc:0.81316
[2000]	validation_0-auc:0.82155	validation_1-auc:0.81518
[3000]	validation_0-auc:0.82442	validation_1-auc:0.81617
[4000]	validation_0-auc:0.82694	validation_1-auc:0.81675
[5000]	validation_0-auc:0.82933	validation_1-auc:0.81715
[6000]	validation_0-auc:0.83169	validation_1-auc:0.81747
[7000]	validation_0-auc:0.83398	validation_1-auc:0.81762
[8000]	validation_0-auc:0.83631	validation_1-auc:0.81778
[9000]	validation_0-auc:0.83856	validation_1-auc:0.81787
[10000]	validation_0-auc:0.84076	validation_1-auc:0.81799
[11000]	validation_0-auc:0.84292	validation_1-auc:0.81805
[11256]	validation_0-auc:0.84347	validation_1-auc:0.81805
Fold:2 score:0.8181




[0]	validation_0-auc:0.79857	validation_1-auc:0.79872
[1000]	validation_0-auc:0.81763	validation_1-auc:0.81176
[2000]	validation_0-auc:0.82189	validation_1-auc:0.81366
[3000]	validation_0-auc:0.82479	validation_1-auc:0.81462
[4000]	validation_0-auc:0.82730	validation_1-auc:0.81523
[5000]	validation_0-auc:0.82969	validation_1-auc:0.81559
[6000]	validation_0-auc:0.83205	validation_1-auc:0.81583
[7000]	validation_0-auc:0.83440	validation_1-auc:0.81600
[8000]	validation_0-auc:0.83668	validation_1-auc:0.81615
[9000]	validation_0-auc:0.83889	validation_1-auc:0.81624
[9579]	validation_0-auc:0.84021	validation_1-auc:0.81624
Fold:3 score:0.8163




[0]	validation_0-auc:0.79888	validation_1-auc:0.79688
[1000]	validation_0-auc:0.81799	validation_1-auc:0.80964
[2000]	validation_0-auc:0.82237	validation_1-auc:0.81176
[3000]	validation_0-auc:0.82524	validation_1-auc:0.81268
[4000]	validation_0-auc:0.82782	validation_1-auc:0.81333
[5000]	validation_0-auc:0.83015	validation_1-auc:0.81379
[6000]	validation_0-auc:0.83249	validation_1-auc:0.81409
[7000]	validation_0-auc:0.83474	validation_1-auc:0.81425
[8000]	validation_0-auc:0.83698	validation_1-auc:0.81442
[9000]	validation_0-auc:0.83915	validation_1-auc:0.81454
[10000]	validation_0-auc:0.84136	validation_1-auc:0.81465
[11000]	validation_0-auc:0.84352	validation_1-auc:0.81468
[12000]	validation_0-auc:0.84561	validation_1-auc:0.81474
[13000]	validation_0-auc:0.84767	validation_1-auc:0.81479
[13082]	validation_0-auc:0.84782	validation_1-auc:0.81478
Fold:4 score:0.8148




[0]	validation_0-auc:0.79775	validation_1-auc:0.79926
[1000]	validation_0-auc:0.81758	validation_1-auc:0.81208
[2000]	validation_0-auc:0.82198	validation_1-auc:0.81404
[3000]	validation_0-auc:0.82488	validation_1-auc:0.81492
[4000]	validation_0-auc:0.82732	validation_1-auc:0.81547
[5000]	validation_0-auc:0.82967	validation_1-auc:0.81578
[6000]	validation_0-auc:0.83199	validation_1-auc:0.81606
[7000]	validation_0-auc:0.83429	validation_1-auc:0.81618
[8000]	validation_0-auc:0.83658	validation_1-auc:0.81630
[8559]	validation_0-auc:0.83781	validation_1-auc:0.81632
Fold:5 score:0.8163
Average score:0.8161 (0.0012)
------------------------------
Seed:44
------------------------------




[0]	validation_0-auc:0.79924	validation_1-auc:0.79777
[1000]	validation_0-auc:0.81749	validation_1-auc:0.81072
[2000]	validation_0-auc:0.82196	validation_1-auc:0.81310
[3000]	validation_0-auc:0.82478	validation_1-auc:0.81408
[4000]	validation_0-auc:0.82726	validation_1-auc:0.81468
[5000]	validation_0-auc:0.82961	validation_1-auc:0.81503
[6000]	validation_0-auc:0.83200	validation_1-auc:0.81533
[7000]	validation_0-auc:0.83434	validation_1-auc:0.81554
[8000]	validation_0-auc:0.83663	validation_1-auc:0.81569
[9000]	validation_0-auc:0.83882	validation_1-auc:0.81581
[10000]	validation_0-auc:0.84099	validation_1-auc:0.81590
[11000]	validation_0-auc:0.84318	validation_1-auc:0.81596
[11637]	validation_0-auc:0.84455	validation_1-auc:0.81597
Fold:1 score:0.8160




[0]	validation_0-auc:0.79813	validation_1-auc:0.79924
[1000]	validation_0-auc:0.81740	validation_1-auc:0.81315
[2000]	validation_0-auc:0.82167	validation_1-auc:0.81504
[3000]	validation_0-auc:0.82452	validation_1-auc:0.81600
[4000]	validation_0-auc:0.82709	validation_1-auc:0.81654
[5000]	validation_0-auc:0.82945	validation_1-auc:0.81690
[6000]	validation_0-auc:0.83177	validation_1-auc:0.81713
[7000]	validation_0-auc:0.83412	validation_1-auc:0.81733
[8000]	validation_0-auc:0.83640	validation_1-auc:0.81746
[9000]	validation_0-auc:0.83863	validation_1-auc:0.81753
[10000]	validation_0-auc:0.84083	validation_1-auc:0.81760
[10981]	validation_0-auc:0.84295	validation_1-auc:0.81760
Fold:2 score:0.8176




[0]	validation_0-auc:0.79931	validation_1-auc:0.79845
[1000]	validation_0-auc:0.81794	validation_1-auc:0.81061
[2000]	validation_0-auc:0.82225	validation_1-auc:0.81265
[3000]	validation_0-auc:0.82511	validation_1-auc:0.81354
[4000]	validation_0-auc:0.82758	validation_1-auc:0.81407
[5000]	validation_0-auc:0.83000	validation_1-auc:0.81451
[6000]	validation_0-auc:0.83231	validation_1-auc:0.81478
[7000]	validation_0-auc:0.83459	validation_1-auc:0.81501
[8000]	validation_0-auc:0.83687	validation_1-auc:0.81517
[9000]	validation_0-auc:0.83914	validation_1-auc:0.81530
[10000]	validation_0-auc:0.84131	validation_1-auc:0.81541
[11000]	validation_0-auc:0.84345	validation_1-auc:0.81549
[11324]	validation_0-auc:0.84414	validation_1-auc:0.81549
Fold:3 score:0.8155




[0]	validation_0-auc:0.79871	validation_1-auc:0.79837
[1000]	validation_0-auc:0.81777	validation_1-auc:0.81149
[2000]	validation_0-auc:0.82211	validation_1-auc:0.81347
[3000]	validation_0-auc:0.82493	validation_1-auc:0.81440
[4000]	validation_0-auc:0.82744	validation_1-auc:0.81502
[5000]	validation_0-auc:0.82983	validation_1-auc:0.81539
[6000]	validation_0-auc:0.83215	validation_1-auc:0.81567
[7000]	validation_0-auc:0.83444	validation_1-auc:0.81586
[8000]	validation_0-auc:0.83669	validation_1-auc:0.81599
[8876]	validation_0-auc:0.83861	validation_1-auc:0.81606
Fold:4 score:0.8161




[0]	validation_0-auc:0.79791	validation_1-auc:0.79770
[1000]	validation_0-auc:0.81765	validation_1-auc:0.81134
[2000]	validation_0-auc:0.82193	validation_1-auc:0.81337
[3000]	validation_0-auc:0.82478	validation_1-auc:0.81429
[4000]	validation_0-auc:0.82730	validation_1-auc:0.81487
[5000]	validation_0-auc:0.82969	validation_1-auc:0.81522
[6000]	validation_0-auc:0.83203	validation_1-auc:0.81549
[7000]	validation_0-auc:0.83435	validation_1-auc:0.81566
[8000]	validation_0-auc:0.83666	validation_1-auc:0.81584
[8397]	validation_0-auc:0.83753	validation_1-auc:0.81584
Fold:5 score:0.8158
Average score:0.8162 (0.0007)
------------------------------
Seed:45
------------------------------




[0]	validation_0-auc:0.79822	validation_1-auc:0.79869
[1000]	validation_0-auc:0.81743	validation_1-auc:0.81225
[2000]	validation_0-auc:0.82186	validation_1-auc:0.81418
[3000]	validation_0-auc:0.82476	validation_1-auc:0.81497
[4000]	validation_0-auc:0.82728	validation_1-auc:0.81545
[5000]	validation_0-auc:0.82961	validation_1-auc:0.81574
[6000]	validation_0-auc:0.83198	validation_1-auc:0.81597
[7000]	validation_0-auc:0.83429	validation_1-auc:0.81616
[8000]	validation_0-auc:0.83653	validation_1-auc:0.81628
[9000]	validation_0-auc:0.83874	validation_1-auc:0.81639
[9629]	validation_0-auc:0.84014	validation_1-auc:0.81641
Fold:1 score:0.8164




[0]	validation_0-auc:0.79882	validation_1-auc:0.79889
[1000]	validation_0-auc:0.81786	validation_1-auc:0.81168
[2000]	validation_0-auc:0.82208	validation_1-auc:0.81368
[3000]	validation_0-auc:0.82494	validation_1-auc:0.81459
[4000]	validation_0-auc:0.82737	validation_1-auc:0.81511
[5000]	validation_0-auc:0.82975	validation_1-auc:0.81547
[6000]	validation_0-auc:0.83211	validation_1-auc:0.81571
[7000]	validation_0-auc:0.83447	validation_1-auc:0.81586
[8000]	validation_0-auc:0.83672	validation_1-auc:0.81604
[9000]	validation_0-auc:0.83891	validation_1-auc:0.81612
[10000]	validation_0-auc:0.84109	validation_1-auc:0.81615
[10524]	validation_0-auc:0.84223	validation_1-auc:0.81617
Fold:2 score:0.8162




[0]	validation_0-auc:0.79890	validation_1-auc:0.79872
[1000]	validation_0-auc:0.81746	validation_1-auc:0.81180
[2000]	validation_0-auc:0.82179	validation_1-auc:0.81399
[3000]	validation_0-auc:0.82465	validation_1-auc:0.81505
[4000]	validation_0-auc:0.82716	validation_1-auc:0.81563
[5000]	validation_0-auc:0.82950	validation_1-auc:0.81601
[6000]	validation_0-auc:0.83184	validation_1-auc:0.81629
[7000]	validation_0-auc:0.83415	validation_1-auc:0.81646
[8000]	validation_0-auc:0.83640	validation_1-auc:0.81659
[9000]	validation_0-auc:0.83865	validation_1-auc:0.81669
[9451]	validation_0-auc:0.83965	validation_1-auc:0.81671
Fold:3 score:0.8167




[0]	validation_0-auc:0.79902	validation_1-auc:0.79807
[1000]	validation_0-auc:0.81797	validation_1-auc:0.81058
[2000]	validation_0-auc:0.82217	validation_1-auc:0.81272
[3000]	validation_0-auc:0.82503	validation_1-auc:0.81373
[4000]	validation_0-auc:0.82756	validation_1-auc:0.81433
[5000]	validation_0-auc:0.82992	validation_1-auc:0.81474
[6000]	validation_0-auc:0.83224	validation_1-auc:0.81502
[7000]	validation_0-auc:0.83460	validation_1-auc:0.81521
[8000]	validation_0-auc:0.83689	validation_1-auc:0.81532
[9000]	validation_0-auc:0.83913	validation_1-auc:0.81543
[10000]	validation_0-auc:0.84133	validation_1-auc:0.81550
[10226]	validation_0-auc:0.84180	validation_1-auc:0.81550
Fold:4 score:0.8155




[0]	validation_0-auc:0.79744	validation_1-auc:0.79718
[1000]	validation_0-auc:0.81762	validation_1-auc:0.81116
[2000]	validation_0-auc:0.82207	validation_1-auc:0.81305
[3000]	validation_0-auc:0.82491	validation_1-auc:0.81397
[4000]	validation_0-auc:0.82744	validation_1-auc:0.81454
[5000]	validation_0-auc:0.82979	validation_1-auc:0.81490
[6000]	validation_0-auc:0.83218	validation_1-auc:0.81514
[7000]	validation_0-auc:0.83451	validation_1-auc:0.81529
[8000]	validation_0-auc:0.83679	validation_1-auc:0.81539
[9000]	validation_0-auc:0.83902	validation_1-auc:0.81550
[10000]	validation_0-auc:0.84119	validation_1-auc:0.81557
[10950]	validation_0-auc:0.84324	validation_1-auc:0.81559
Fold:5 score:0.8156
Average score:0.8161 (0.0005)
------------------------------
Seed:46
------------------------------




[0]	validation_0-auc:0.79893	validation_1-auc:0.80034
[1000]	validation_0-auc:0.81716	validation_1-auc:0.81227
[2000]	validation_0-auc:0.82153	validation_1-auc:0.81455
[3000]	validation_0-auc:0.82444	validation_1-auc:0.81558
[4000]	validation_0-auc:0.82701	validation_1-auc:0.81622
[5000]	validation_0-auc:0.82936	validation_1-auc:0.81662
[6000]	validation_0-auc:0.83170	validation_1-auc:0.81684
[7000]	validation_0-auc:0.83399	validation_1-auc:0.81701
[8000]	validation_0-auc:0.83626	validation_1-auc:0.81713
[9000]	validation_0-auc:0.83846	validation_1-auc:0.81725
[10000]	validation_0-auc:0.84068	validation_1-auc:0.81735
[11000]	validation_0-auc:0.84278	validation_1-auc:0.81742
[12000]	validation_0-auc:0.84487	validation_1-auc:0.81745
[12009]	validation_0-auc:0.84489	validation_1-auc:0.81745
Fold:1 score:0.8175




[0]	validation_0-auc:0.79966	validation_1-auc:0.79743
[1000]	validation_0-auc:0.81804	validation_1-auc:0.81016
[2000]	validation_0-auc:0.82228	validation_1-auc:0.81218
[3000]	validation_0-auc:0.82514	validation_1-auc:0.81313
[4000]	validation_0-auc:0.82759	validation_1-auc:0.81367
[5000]	validation_0-auc:0.82989	validation_1-auc:0.81405
[6000]	validation_0-auc:0.83223	validation_1-auc:0.81429
[7000]	validation_0-auc:0.83459	validation_1-auc:0.81448
[8000]	validation_0-auc:0.83686	validation_1-auc:0.81457
[9000]	validation_0-auc:0.83904	validation_1-auc:0.81465
[10000]	validation_0-auc:0.84123	validation_1-auc:0.81473
[11000]	validation_0-auc:0.84340	validation_1-auc:0.81474
[11957]	validation_0-auc:0.84536	validation_1-auc:0.81478
Fold:2 score:0.8148




[0]	validation_0-auc:0.79895	validation_1-auc:0.79823
[1000]	validation_0-auc:0.81771	validation_1-auc:0.81180
[2000]	validation_0-auc:0.82203	validation_1-auc:0.81392
[3000]	validation_0-auc:0.82479	validation_1-auc:0.81486
[4000]	validation_0-auc:0.82726	validation_1-auc:0.81540
[5000]	validation_0-auc:0.82968	validation_1-auc:0.81581
[6000]	validation_0-auc:0.83205	validation_1-auc:0.81604
[7000]	validation_0-auc:0.83435	validation_1-auc:0.81624
[8000]	validation_0-auc:0.83668	validation_1-auc:0.81642
[9000]	validation_0-auc:0.83893	validation_1-auc:0.81652
[10000]	validation_0-auc:0.84115	validation_1-auc:0.81657
[10141]	validation_0-auc:0.84145	validation_1-auc:0.81657
Fold:3 score:0.8166




[0]	validation_0-auc:0.79815	validation_1-auc:0.79767
[1000]	validation_0-auc:0.81781	validation_1-auc:0.81142
[2000]	validation_0-auc:0.82214	validation_1-auc:0.81334
[3000]	validation_0-auc:0.82492	validation_1-auc:0.81419
[4000]	validation_0-auc:0.82740	validation_1-auc:0.81472
[5000]	validation_0-auc:0.82978	validation_1-auc:0.81507
[6000]	validation_0-auc:0.83210	validation_1-auc:0.81533
[7000]	validation_0-auc:0.83441	validation_1-auc:0.81549
[8000]	validation_0-auc:0.83664	validation_1-auc:0.81562
[9000]	validation_0-auc:0.83891	validation_1-auc:0.81576
[10000]	validation_0-auc:0.84108	validation_1-auc:0.81584
[11000]	validation_0-auc:0.84322	validation_1-auc:0.81590
[11919]	validation_0-auc:0.84514	validation_1-auc:0.81595
Fold:4 score:0.8160




[0]	validation_0-auc:0.79761	validation_1-auc:0.79737
[1000]	validation_0-auc:0.81764	validation_1-auc:0.81098
[2000]	validation_0-auc:0.82199	validation_1-auc:0.81309
[3000]	validation_0-auc:0.82488	validation_1-auc:0.81408
[4000]	validation_0-auc:0.82736	validation_1-auc:0.81458
[5000]	validation_0-auc:0.82971	validation_1-auc:0.81500
[6000]	validation_0-auc:0.83202	validation_1-auc:0.81525
[7000]	validation_0-auc:0.83437	validation_1-auc:0.81546
[8000]	validation_0-auc:0.83663	validation_1-auc:0.81564
[9000]	validation_0-auc:0.83887	validation_1-auc:0.81574
[10000]	validation_0-auc:0.84101	validation_1-auc:0.81582
[11000]	validation_0-auc:0.84315	validation_1-auc:0.81586
[11050]	validation_0-auc:0.84325	validation_1-auc:0.81586
Fold:5 score:0.8159
Average score:0.8161 (0.0009)
------------------------------
Seed:47
------------------------------




[0]	validation_0-auc:0.79931	validation_1-auc:0.79797
[1000]	validation_0-auc:0.81774	validation_1-auc:0.81098
[2000]	validation_0-auc:0.82212	validation_1-auc:0.81320
[3000]	validation_0-auc:0.82498	validation_1-auc:0.81412
[4000]	validation_0-auc:0.82744	validation_1-auc:0.81468
[5000]	validation_0-auc:0.82982	validation_1-auc:0.81505
[6000]	validation_0-auc:0.83210	validation_1-auc:0.81531
[7000]	validation_0-auc:0.83433	validation_1-auc:0.81551
[8000]	validation_0-auc:0.83660	validation_1-auc:0.81563
[9000]	validation_0-auc:0.83881	validation_1-auc:0.81577
[10000]	validation_0-auc:0.84104	validation_1-auc:0.81590
[11000]	validation_0-auc:0.84315	validation_1-auc:0.81599
[11844]	validation_0-auc:0.84493	validation_1-auc:0.81603
Fold:1 score:0.8160




[0]	validation_0-auc:0.79950	validation_1-auc:0.79853
[1000]	validation_0-auc:0.81770	validation_1-auc:0.81097
[2000]	validation_0-auc:0.82202	validation_1-auc:0.81294
[3000]	validation_0-auc:0.82490	validation_1-auc:0.81388
[4000]	validation_0-auc:0.82740	validation_1-auc:0.81438
[5000]	validation_0-auc:0.82975	validation_1-auc:0.81472
[6000]	validation_0-auc:0.83210	validation_1-auc:0.81496
[7000]	validation_0-auc:0.83442	validation_1-auc:0.81516
[8000]	validation_0-auc:0.83667	validation_1-auc:0.81527
[9000]	validation_0-auc:0.83886	validation_1-auc:0.81534
[10000]	validation_0-auc:0.84107	validation_1-auc:0.81544
[11000]	validation_0-auc:0.84323	validation_1-auc:0.81551
[12000]	validation_0-auc:0.84535	validation_1-auc:0.81556
[12303]	validation_0-auc:0.84597	validation_1-auc:0.81556
Fold:2 score:0.8156




[0]	validation_0-auc:0.79920	validation_1-auc:0.79886
[1000]	validation_0-auc:0.81787	validation_1-auc:0.81043
[2000]	validation_0-auc:0.82216	validation_1-auc:0.81249
[3000]	validation_0-auc:0.82504	validation_1-auc:0.81347
[4000]	validation_0-auc:0.82753	validation_1-auc:0.81411
[5000]	validation_0-auc:0.82992	validation_1-auc:0.81447
[6000]	validation_0-auc:0.83229	validation_1-auc:0.81481
[7000]	validation_0-auc:0.83463	validation_1-auc:0.81500
[8000]	validation_0-auc:0.83692	validation_1-auc:0.81515
[8798]	validation_0-auc:0.83872	validation_1-auc:0.81519
Fold:3 score:0.8152




[0]	validation_0-auc:0.79827	validation_1-auc:0.79906
[1000]	validation_0-auc:0.81734	validation_1-auc:0.81294
[2000]	validation_0-auc:0.82162	validation_1-auc:0.81487
[3000]	validation_0-auc:0.82442	validation_1-auc:0.81578
[4000]	validation_0-auc:0.82697	validation_1-auc:0.81644
[5000]	validation_0-auc:0.82935	validation_1-auc:0.81688
[6000]	validation_0-auc:0.83172	validation_1-auc:0.81722
[7000]	validation_0-auc:0.83409	validation_1-auc:0.81745
[8000]	validation_0-auc:0.83630	validation_1-auc:0.81755
[9000]	validation_0-auc:0.83854	validation_1-auc:0.81765
[10000]	validation_0-auc:0.84077	validation_1-auc:0.81771
[10390]	validation_0-auc:0.84162	validation_1-auc:0.81770
Fold:4 score:0.8177




[0]	validation_0-auc:0.79746	validation_1-auc:0.79734
[1000]	validation_0-auc:0.81755	validation_1-auc:0.81156
[2000]	validation_0-auc:0.82190	validation_1-auc:0.81354
[3000]	validation_0-auc:0.82475	validation_1-auc:0.81447
[4000]	validation_0-auc:0.82720	validation_1-auc:0.81504
[5000]	validation_0-auc:0.82960	validation_1-auc:0.81537
[6000]	validation_0-auc:0.83195	validation_1-auc:0.81561
[7000]	validation_0-auc:0.83429	validation_1-auc:0.81579
[8000]	validation_0-auc:0.83657	validation_1-auc:0.81590
[9000]	validation_0-auc:0.83882	validation_1-auc:0.81600
[10000]	validation_0-auc:0.84104	validation_1-auc:0.81602
[10533]	validation_0-auc:0.84223	validation_1-auc:0.81603
Fold:5 score:0.8160
Average score:0.8161 (0.0009)
------------------------------
Seed:48
------------------------------




[0]	validation_0-auc:0.79891	validation_1-auc:0.79947
[1000]	validation_0-auc:0.81738	validation_1-auc:0.81210
[2000]	validation_0-auc:0.82175	validation_1-auc:0.81421
[3000]	validation_0-auc:0.82468	validation_1-auc:0.81526
[4000]	validation_0-auc:0.82709	validation_1-auc:0.81586
[5000]	validation_0-auc:0.82949	validation_1-auc:0.81630
[6000]	validation_0-auc:0.83184	validation_1-auc:0.81663
[7000]	validation_0-auc:0.83413	validation_1-auc:0.81683
[8000]	validation_0-auc:0.83644	validation_1-auc:0.81699
[9000]	validation_0-auc:0.83868	validation_1-auc:0.81708
[10000]	validation_0-auc:0.84091	validation_1-auc:0.81718
[11000]	validation_0-auc:0.84306	validation_1-auc:0.81726
[12000]	validation_0-auc:0.84513	validation_1-auc:0.81730
[12448]	validation_0-auc:0.84607	validation_1-auc:0.81731
Fold:1 score:0.8173




[0]	validation_0-auc:0.79925	validation_1-auc:0.80019
[1000]	validation_0-auc:0.81757	validation_1-auc:0.81198
[2000]	validation_0-auc:0.82177	validation_1-auc:0.81395
[3000]	validation_0-auc:0.82463	validation_1-auc:0.81490
[4000]	validation_0-auc:0.82706	validation_1-auc:0.81540
[5000]	validation_0-auc:0.82947	validation_1-auc:0.81585
[6000]	validation_0-auc:0.83181	validation_1-auc:0.81612
[7000]	validation_0-auc:0.83408	validation_1-auc:0.81632
[8000]	validation_0-auc:0.83634	validation_1-auc:0.81643
[9000]	validation_0-auc:0.83856	validation_1-auc:0.81653
[10000]	validation_0-auc:0.84074	validation_1-auc:0.81658
[11000]	validation_0-auc:0.84286	validation_1-auc:0.81665
[12000]	validation_0-auc:0.84498	validation_1-auc:0.81667
[12098]	validation_0-auc:0.84518	validation_1-auc:0.81667
Fold:2 score:0.8167




[0]	validation_0-auc:0.79928	validation_1-auc:0.79821
[1000]	validation_0-auc:0.81777	validation_1-auc:0.81100
[2000]	validation_0-auc:0.82204	validation_1-auc:0.81290
[3000]	validation_0-auc:0.82486	validation_1-auc:0.81386
[4000]	validation_0-auc:0.82735	validation_1-auc:0.81447
[5000]	validation_0-auc:0.82975	validation_1-auc:0.81483
[6000]	validation_0-auc:0.83205	validation_1-auc:0.81510
[7000]	validation_0-auc:0.83437	validation_1-auc:0.81535
[8000]	validation_0-auc:0.83660	validation_1-auc:0.81546
[9000]	validation_0-auc:0.83887	validation_1-auc:0.81556
[10000]	validation_0-auc:0.84109	validation_1-auc:0.81567
[11000]	validation_0-auc:0.84323	validation_1-auc:0.81573
[12000]	validation_0-auc:0.84535	validation_1-auc:0.81578
[12852]	validation_0-auc:0.84712	validation_1-auc:0.81582
Fold:3 score:0.8158




[0]	validation_0-auc:0.79847	validation_1-auc:0.79825
[1000]	validation_0-auc:0.81778	validation_1-auc:0.81139
[2000]	validation_0-auc:0.82210	validation_1-auc:0.81332
[3000]	validation_0-auc:0.82498	validation_1-auc:0.81420
[4000]	validation_0-auc:0.82748	validation_1-auc:0.81478
[5000]	validation_0-auc:0.82982	validation_1-auc:0.81512
[6000]	validation_0-auc:0.83212	validation_1-auc:0.81536
[7000]	validation_0-auc:0.83445	validation_1-auc:0.81546
[8000]	validation_0-auc:0.83672	validation_1-auc:0.81556
[9000]	validation_0-auc:0.83901	validation_1-auc:0.81565
[10000]	validation_0-auc:0.84120	validation_1-auc:0.81570
[10123]	validation_0-auc:0.84147	validation_1-auc:0.81571
Fold:4 score:0.8157




[0]	validation_0-auc:0.79764	validation_1-auc:0.79663
[1000]	validation_0-auc:0.81792	validation_1-auc:0.81043
[2000]	validation_0-auc:0.82229	validation_1-auc:0.81263
[3000]	validation_0-auc:0.82515	validation_1-auc:0.81364
[4000]	validation_0-auc:0.82763	validation_1-auc:0.81423
[5000]	validation_0-auc:0.83001	validation_1-auc:0.81460
[6000]	validation_0-auc:0.83232	validation_1-auc:0.81484
[7000]	validation_0-auc:0.83465	validation_1-auc:0.81505
[8000]	validation_0-auc:0.83690	validation_1-auc:0.81516
[9000]	validation_0-auc:0.83915	validation_1-auc:0.81524
[9149]	validation_0-auc:0.83949	validation_1-auc:0.81525
Fold:5 score:0.8153
Average score:0.8162 (0.0007)
------------------------------
Seed:49
------------------------------




[0]	validation_0-auc:0.79863	validation_1-auc:0.79781
[1000]	validation_0-auc:0.81750	validation_1-auc:0.81104
[2000]	validation_0-auc:0.82190	validation_1-auc:0.81316
[3000]	validation_0-auc:0.82479	validation_1-auc:0.81412
[4000]	validation_0-auc:0.82725	validation_1-auc:0.81471
[5000]	validation_0-auc:0.82962	validation_1-auc:0.81512
[6000]	validation_0-auc:0.83193	validation_1-auc:0.81544
[7000]	validation_0-auc:0.83424	validation_1-auc:0.81564
[8000]	validation_0-auc:0.83648	validation_1-auc:0.81582
[9000]	validation_0-auc:0.83871	validation_1-auc:0.81595
[10000]	validation_0-auc:0.84092	validation_1-auc:0.81605
[11000]	validation_0-auc:0.84308	validation_1-auc:0.81611
[11926]	validation_0-auc:0.84504	validation_1-auc:0.81612
Fold:1 score:0.8161




[0]	validation_0-auc:0.79904	validation_1-auc:0.79923
[1000]	validation_0-auc:0.81752	validation_1-auc:0.81210
[2000]	validation_0-auc:0.82187	validation_1-auc:0.81413
[3000]	validation_0-auc:0.82473	validation_1-auc:0.81510
[4000]	validation_0-auc:0.82724	validation_1-auc:0.81562
[5000]	validation_0-auc:0.82963	validation_1-auc:0.81599
[6000]	validation_0-auc:0.83198	validation_1-auc:0.81625
[7000]	validation_0-auc:0.83425	validation_1-auc:0.81644
[8000]	validation_0-auc:0.83652	validation_1-auc:0.81657
[9000]	validation_0-auc:0.83869	validation_1-auc:0.81666
[9739]	validation_0-auc:0.84031	validation_1-auc:0.81671
Fold:2 score:0.8167




[0]	validation_0-auc:0.79852	validation_1-auc:0.79699
[1000]	validation_0-auc:0.81784	validation_1-auc:0.81010
[2000]	validation_0-auc:0.82221	validation_1-auc:0.81234
[3000]	validation_0-auc:0.82511	validation_1-auc:0.81333
[4000]	validation_0-auc:0.82761	validation_1-auc:0.81398
[5000]	validation_0-auc:0.83002	validation_1-auc:0.81442
[6000]	validation_0-auc:0.83232	validation_1-auc:0.81472
[7000]	validation_0-auc:0.83466	validation_1-auc:0.81491
[8000]	validation_0-auc:0.83695	validation_1-auc:0.81504
[8739]	validation_0-auc:0.83860	validation_1-auc:0.81508
Fold:3 score:0.8151




[0]	validation_0-auc:0.79816	validation_1-auc:0.79843
[1000]	validation_0-auc:0.81768	validation_1-auc:0.81205
[2000]	validation_0-auc:0.82193	validation_1-auc:0.81387
[3000]	validation_0-auc:0.82474	validation_1-auc:0.81476
[4000]	validation_0-auc:0.82713	validation_1-auc:0.81524
[5000]	validation_0-auc:0.82953	validation_1-auc:0.81568
[6000]	validation_0-auc:0.83193	validation_1-auc:0.81598
[7000]	validation_0-auc:0.83423	validation_1-auc:0.81616
[8000]	validation_0-auc:0.83650	validation_1-auc:0.81632
[9000]	validation_0-auc:0.83878	validation_1-auc:0.81639
[10000]	validation_0-auc:0.84098	validation_1-auc:0.81644
[10762]	validation_0-auc:0.84263	validation_1-auc:0.81646
Fold:4 score:0.8165




[0]	validation_0-auc:0.79812	validation_1-auc:0.79844
[1000]	validation_0-auc:0.81761	validation_1-auc:0.81130
[2000]	validation_0-auc:0.82189	validation_1-auc:0.81340
[3000]	validation_0-auc:0.82477	validation_1-auc:0.81430
[4000]	validation_0-auc:0.82730	validation_1-auc:0.81491
[5000]	validation_0-auc:0.82970	validation_1-auc:0.81527
[6000]	validation_0-auc:0.83204	validation_1-auc:0.81552
[7000]	validation_0-auc:0.83435	validation_1-auc:0.81569
[8000]	validation_0-auc:0.83662	validation_1-auc:0.81578
[9000]	validation_0-auc:0.83887	validation_1-auc:0.81588
[9548]	validation_0-auc:0.84008	validation_1-auc:0.81590
Fold:5 score:0.8159
Average score:0.8161 (0.0006)
------------------------------
Seed:50
------------------------------




[0]	validation_0-auc:0.79883	validation_1-auc:0.79791
[1000]	validation_0-auc:0.81755	validation_1-auc:0.81191
[2000]	validation_0-auc:0.82189	validation_1-auc:0.81383
[3000]	validation_0-auc:0.82472	validation_1-auc:0.81478
[4000]	validation_0-auc:0.82724	validation_1-auc:0.81539
[5000]	validation_0-auc:0.82965	validation_1-auc:0.81581
[6000]	validation_0-auc:0.83204	validation_1-auc:0.81607
[7000]	validation_0-auc:0.83437	validation_1-auc:0.81627
[8000]	validation_0-auc:0.83668	validation_1-auc:0.81638
[8626]	validation_0-auc:0.83806	validation_1-auc:0.81644
Fold:1 score:0.8164




[0]	validation_0-auc:0.79831	validation_1-auc:0.80138
[1000]	validation_0-auc:0.81710	validation_1-auc:0.81330
[2000]	validation_0-auc:0.82148	validation_1-auc:0.81529
[3000]	validation_0-auc:0.82435	validation_1-auc:0.81631
[4000]	validation_0-auc:0.82683	validation_1-auc:0.81692
[5000]	validation_0-auc:0.82922	validation_1-auc:0.81729
[6000]	validation_0-auc:0.83151	validation_1-auc:0.81754
[7000]	validation_0-auc:0.83382	validation_1-auc:0.81772
[8000]	validation_0-auc:0.83608	validation_1-auc:0.81783
[9000]	validation_0-auc:0.83829	validation_1-auc:0.81792
[10000]	validation_0-auc:0.84049	validation_1-auc:0.81798
[11000]	validation_0-auc:0.84263	validation_1-auc:0.81805
[12000]	validation_0-auc:0.84467	validation_1-auc:0.81808
[13000]	validation_0-auc:0.84673	validation_1-auc:0.81812
[13388]	validation_0-auc:0.84752	validation_1-auc:0.81811
Fold:2 score:0.8181




[0]	validation_0-auc:0.79919	validation_1-auc:0.79784
[1000]	validation_0-auc:0.81795	validation_1-auc:0.81021
[2000]	validation_0-auc:0.82221	validation_1-auc:0.81246
[3000]	validation_0-auc:0.82509	validation_1-auc:0.81343
[4000]	validation_0-auc:0.82761	validation_1-auc:0.81401
[5000]	validation_0-auc:0.83000	validation_1-auc:0.81439
[6000]	validation_0-auc:0.83236	validation_1-auc:0.81461
[7000]	validation_0-auc:0.83467	validation_1-auc:0.81476
[8000]	validation_0-auc:0.83696	validation_1-auc:0.81493
[9000]	validation_0-auc:0.83920	validation_1-auc:0.81503
[10000]	validation_0-auc:0.84139	validation_1-auc:0.81507
[10249]	validation_0-auc:0.84190	validation_1-auc:0.81508
Fold:3 score:0.8151




[0]	validation_0-auc:0.79866	validation_1-auc:0.79742
[1000]	validation_0-auc:0.81784	validation_1-auc:0.81049
[2000]	validation_0-auc:0.82218	validation_1-auc:0.81256
[3000]	validation_0-auc:0.82500	validation_1-auc:0.81341
[4000]	validation_0-auc:0.82747	validation_1-auc:0.81400
[5000]	validation_0-auc:0.82985	validation_1-auc:0.81436
[6000]	validation_0-auc:0.83214	validation_1-auc:0.81466
[7000]	validation_0-auc:0.83445	validation_1-auc:0.81488
[8000]	validation_0-auc:0.83669	validation_1-auc:0.81504
[9000]	validation_0-auc:0.83895	validation_1-auc:0.81515
[10000]	validation_0-auc:0.84115	validation_1-auc:0.81522
[11000]	validation_0-auc:0.84330	validation_1-auc:0.81532
[12000]	validation_0-auc:0.84541	validation_1-auc:0.81536
[12483]	validation_0-auc:0.84643	validation_1-auc:0.81537
Fold:4 score:0.8154




[0]	validation_0-auc:0.79838	validation_1-auc:0.79733
[1000]	validation_0-auc:0.81778	validation_1-auc:0.81108
[2000]	validation_0-auc:0.82208	validation_1-auc:0.81308
[3000]	validation_0-auc:0.82493	validation_1-auc:0.81392
[4000]	validation_0-auc:0.82739	validation_1-auc:0.81445
[5000]	validation_0-auc:0.82976	validation_1-auc:0.81480
[6000]	validation_0-auc:0.83209	validation_1-auc:0.81504
[7000]	validation_0-auc:0.83439	validation_1-auc:0.81524
[8000]	validation_0-auc:0.83669	validation_1-auc:0.81533
[9000]	validation_0-auc:0.83892	validation_1-auc:0.81545
[9853]	validation_0-auc:0.84081	validation_1-auc:0.81550
Fold:5 score:0.8155
Average score:0.8161 (0.0011)
------------------------------


Unnamed: 0,XGBClassifier_seed_42,XGBClassifier_seed_43,XGBClassifier_seed_44,XGBClassifier_seed_45,XGBClassifier_seed_46,XGBClassifier_seed_47,XGBClassifier_seed_48,XGBClassifier_seed_49,XGBClassifier_seed_50
0,0.574032,0.558254,0.577429,0.583098,0.567952,0.565287,0.557048,0.585667,0.58641
1,0.172318,0.172514,0.173474,0.16469,0.166969,0.177321,0.16957,0.165053,0.158789
2,0.806406,0.807042,0.824119,0.815907,0.840936,0.827772,0.838242,0.808805,0.811777
3,0.785276,0.795352,0.804891,0.802182,0.771534,0.795239,0.776732,0.80054,0.789282
4,0.777838,0.782893,0.792105,0.785199,0.798111,0.772324,0.78591,0.784927,0.781707
5,0.447601,0.42103,0.43086,0.438568,0.452559,0.418826,0.39217,0.449788,0.454433
6,0.777173,0.77137,0.788934,0.778708,0.781524,0.789102,0.786852,0.777813,0.771758
7,0.663565,0.656446,0.673704,0.629198,0.648916,0.650476,0.679579,0.684076,0.688596
8,0.17946,0.170501,0.192721,0.189006,0.191672,0.185844,0.187959,0.189348,0.178556
9,0.178784,0.207111,0.192861,0.179767,0.178435,0.18504,0.186918,0.172962,0.176062


Unnamed: 0,XGBClassifier_seed_42,XGBClassifier_seed_43,XGBClassifier_seed_44,XGBClassifier_seed_45,XGBClassifier_seed_46,XGBClassifier_seed_47,XGBClassifier_seed_48,XGBClassifier_seed_49,XGBClassifier_seed_50
0,0.570304,0.571341,0.571977,0.565733,0.572773,0.570533,0.561267,0.569153,0.56999
1,0.123918,0.123646,0.121137,0.11937,0.125131,0.120313,0.123094,0.121039,0.124606
2,0.637998,0.634826,0.633701,0.634534,0.636692,0.630868,0.637449,0.637669,0.633959
3,0.128328,0.125954,0.128035,0.128491,0.125542,0.126903,0.127965,0.129404,0.124441
4,0.152237,0.156951,0.156745,0.156737,0.154215,0.157108,0.151989,0.15093,0.155373
5,0.180815,0.18213,0.177795,0.179429,0.180967,0.177052,0.176549,0.179292,0.180191
6,0.838238,0.838668,0.839094,0.839015,0.84013,0.843701,0.841456,0.842583,0.842793
7,0.139359,0.140771,0.136263,0.13542,0.139428,0.138992,0.136153,0.137996,0.138625
8,0.569015,0.57449,0.569588,0.567145,0.573581,0.571177,0.573364,0.570392,0.564318
9,0.744184,0.738952,0.742208,0.746253,0.747845,0.739708,0.742195,0.749448,0.744227


--------------------------------------------------
Current Level: meta_level
--------------------------------------------------
------------------------------
Model:RandomForestClassifier
------------------------------
------------------------------
Seed:42
------------------------------
Fold:1 score:0.8167
Fold:2 score:0.8175
Fold:3 score:0.8167
Fold:4 score:0.8162
Fold:5 score:0.8156
Average score:0.8166 (0.0006)
------------------------------


Unnamed: 0,RandomForestClassifier_seed_42
0,0.568038
1,0.167683
2,0.820099
3,0.791873
4,0.786931
5,0.428447
6,0.78482
7,0.653929
8,0.179775
9,0.181568


Unnamed: 0,RandomForestClassifier_seed_42
0,0.570463
1,0.123853
2,0.632315
3,0.130626
4,0.156176
5,0.18059
6,0.842449
7,0.135266
8,0.571715
9,0.744925


In [26]:
# final results
results.head(10)

Unnamed: 0,RandomForestClassifier_seed_42
0,0.570463
1,0.123853
2,0.632315
3,0.130626
4,0.156176
5,0.18059
6,0.842449
7,0.135266
8,0.571715
9,0.744925


### Submit the results

In [27]:
predictions = results.iloc[:, -1].values

In [28]:
# Save the predictions to a CSV file
output = pd.DataFrame({'id': X_test.index,
                       'target': predictions})
output.to_csv('submission.csv', index=False)

In [29]:
# results 
output.head(20)

Unnamed: 0,id,target
0,957919,0.570463
1,957920,0.123853
2,957921,0.632315
3,957922,0.130626
4,957923,0.156176
5,957924,0.18059
6,957925,0.842449
7,957926,0.135266
8,957927,0.571715
9,957928,0.744925
