# IMPORTS

In [1]:
import sys
sys.path.insert(0, "../..")
import config as cfg
import gc
import os

In [2]:
import pandas as pd
import numpy as np
import re
from tqdm.notebook import tqdm
from metrics import compute_single_col_score, get_tresholds
from helper import make_prediction, check_path
from sklearn.model_selection import StratifiedKFold, train_test_split
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [3]:
import lightgbm as lgb

# MODEL TRAINING

In [4]:
train = pd.read_pickle(cfg.PREPARED_TRAIN_DATA_PATH)
test = pd.read_pickle(cfg.PREPARED_TEST_DATA_PATH)

In [5]:
X_train, Y_train = train.drop(cfg.TARGETS, axis=1), train[cfg.TARGETS]

In [6]:
pred_proba_oof = pd.DataFrame(data=np.zeros(shape=(len(train), len(cfg.TARGETS))), index=train.index, columns=cfg.TARGETS)
pred_proba_test = pd.DataFrame(data=np.zeros(shape=(len(test), len(cfg.TARGETS))), index=test.index, columns=cfg.TARGETS)
metrics = {}

In [7]:
EXPERIMENT_FAMILY_NAME = 'lgb'
EXPERIMENT_NAME = 'baseline'
RANDOM_STATE = 77
N_SPLITS = 5
N_RANDOM_SEEDS = 7

In [13]:
cv = StratifiedKFold(n_splits=N_SPLITS, random_state=RANDOM_STATE, shuffle=True)

CAT_COLS = cfg.CAT_UNORDERED_COLS

test_pool = lgb.Dataset(
        data=test,
        categorical_feature=CAT_COLS)

for y_idx, y_col in tqdm(enumerate(Y_train.columns), total=Y_train.shape[1]):
    
    fold = 0
    for train_idx, val_idx in tqdm(cv.split(X_train, Y_train[y_col]), total=N_SPLITS):
        train_pool = lgb.Dataset(
            data=X_train.iloc[train_idx], 
            label=Y_train.iloc[train_idx, y_idx],
            categorical_feature=CAT_COLS,
            free_raw_data=False)

        val_pool = lgb.Dataset(
            data=X_train.iloc[val_idx], 
            label=Y_train.iloc[val_idx, y_idx],
            categorical_feature=CAT_COLS,
            reference=train_pool,
            free_raw_data=False)
            
        for random_seed in tqdm(range(N_RANDOM_SEEDS), total=N_RANDOM_SEEDS):

            num_round = 2000
            params = {
                'num_iterations': 1000,
                'random_seed': random_seed,
                'early_stopping_round': 100,
                'num_leaves': 64,
                'verbosity': 0
                }
            clf = lgb.train(
                params, train_pool, num_round, valid_sets=[val_pool],
                callbacks=[lgb.early_stopping(stopping_rounds=30)])
            
            model_name = f'{EXPERIMENT_NAME}_fold_{fold}_rs_{random_seed}_tar_{y_col}.txt'
            model_path = os.path.join(cfg.MODELS_PATH, EXPERIMENT_FAMILY_NAME, EXPERIMENT_NAME)
            check_path(model_path)
            clf.save_model(os.path.join(model_path, model_name), num_iteration=clf.best_iteration)
            
            pred_proba_oof.iloc[val_idx, y_idx] += clf.predict(X_train.iloc[val_idx], num_iteration=clf.best_iteration)
            pred_proba_test.iloc[:, y_idx] += clf.predict(test, num_iteration=clf.best_iteration)
            del clf; gc.collect()
        del train_pool,val_pool; gc.collect() 
        
        fold += 1
pred_proba_oof /= N_RANDOM_SEEDS
pred_proba_test /= (N_SPLITS * N_RANDOM_SEEDS)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's l2: 0.0862609




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's l2: 0.0862609




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's l2: 0.0862609
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's l2: 0.0862609




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's l2: 0.0862609
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's l2: 0.0862609
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[13]	valid_0's l2: 0.0862609




  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0893842
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[1]	valid_0's l2: 0.0893842
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[1]	valid_0's l2: 0.0893842
You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[1]	valid_0's l2: 0.0893842
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0893842
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0893842




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0893842


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.0847725




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.0847725
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.0847725




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.0847725
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.0847725




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.0847725
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.0847725


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's l2: 0.0878602
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[3]	valid_0's l2: 0.0878602
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's l2: 0.0878602




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's l2: 0.0878602
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's l2: 0.0878602




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's l2: 0.0878602
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[3]	valid_0's l2: 0.0878602




  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.088655




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.088655
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.088655




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.088655
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.088655
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.088655




You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.088655


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's l2: 0.10049




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's l2: 0.10049




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's l2: 0.10049
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's l2: 0.10049




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's l2: 0.10049
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's l2: 0.10049




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[12]	valid_0's l2: 0.10049


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.100546




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.100546
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.100546




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.100546
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.100546




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.100546
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[7]	valid_0's l2: 0.100546




  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.101942




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.101942
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.101942
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[14]	valid_0's l2: 0.101942
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.101942
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.101942




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.101942


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.104719




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.104719
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.104719




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.104719
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[4]	valid_0's l2: 0.104719
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[4]	valid_0's l2: 0.104719
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.104719




  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.101923
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[9]	valid_0's l2: 0.101923




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.101923
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[9]	valid_0's l2: 0.101923
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.101923
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.101923




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.101923


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0808206




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0808206
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0808206
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0808206




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0808206




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0808206
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0808206


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0813851
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[1]	valid_0's l2: 0.0813851
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0813851
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0813851




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0813851
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0813851




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0813851


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.0807198
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.0807198




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.0807198
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.0807198




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.0807198
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.0807198




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[4]	valid_0's l2: 0.0807198


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.080812
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.080812




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.080812
You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.080812




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.080812
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.080812
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.080812




  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0847637




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0847637
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0847637




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0847637
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0847637




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0847637




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	valid_0's l2: 0.0847637


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's l2: 0.0388263




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's l2: 0.0388263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[10]	valid_0's l2: 0.0388263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[10]	valid_0's l2: 0.0388263




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's l2: 0.0388263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[10]	valid_0's l2: 0.0388263
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[10]	valid_0's l2: 0.0388263


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0402484




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0402484
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0402484
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[1]	valid_0's l2: 0.0402484
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0402484
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0402484




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0402484


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.040283
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[1]	valid_0's l2: 0.040283
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[1]	valid_0's l2: 0.040283
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.040283
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.040283




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.040283




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.040283


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0403348




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0403348
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0403348
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.




Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0403348




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0403348




You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0403348
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[1]	valid_0's l2: 0.0403348




  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.0442162




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.0442162
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.0442162
You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.0442162




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.0442162
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.0442162




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[8]	valid_0's l2: 0.0442162


  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.208501




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.208501




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.208501
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[15]	valid_0's l2: 0.208501




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.208501




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.208501
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.208501


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.193183




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.193183
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.193183
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.193183




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.193183
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.193183




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[14]	valid_0's l2: 0.193183


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[21]	valid_0's l2: 0.189613
You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[21]	valid_0's l2: 0.189613
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[21]	valid_0's l2: 0.189613




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[21]	valid_0's l2: 0.189613
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[21]	valid_0's l2: 0.189613
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[21]	valid_0's l2: 0.189613
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[21]	valid_0's l2: 0.189613


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[19]	valid_0's l2: 0.171143
You can set `force_col_wise=true` to remove the overhead.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[19]	valid_0's l2: 0.171143




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[19]	valid_0's l2: 0.171143
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[19]	valid_0's l2: 0.171143




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[19]	valid_0's l2: 0.171143




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[19]	valid_0's l2: 0.171143
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[19]	valid_0's l2: 0.171143


  0%|          | 0/7 [00:00<?, ?it/s]



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.203685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.203685
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.203685
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.203685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.203685
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[15]	valid_0's l2: 0.203685
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[15]	valid_0's l2: 0.203685


# PREDICT AND SAVE PREDICTIONS

In [14]:
tresholds = get_tresholds(train[cfg.TARGETS], pred_proba_oof)
sample_submission = pd.read_csv(cfg.SAMPLE_SUBMISSION_PATH).set_index('ID')
submission = make_prediction(pred_proba_test, tresholds, sample_submission)

0.6449599113514662


In [15]:
## BEST PARAMS
# 0.644987662943

# RANDOM_STATE = 77
# N_SPLITS = 7

# params = {
#                 'num_iterations': 1000,
#                 'random_seed': random_seed,
#                 'early_stopping_round': 100,
#                 'verbosity': 0
#                 }

In [16]:
submission_path = os.path.join(cfg.SUBMISSION_PATH, EXPERIMENT_FAMILY_NAME)
check_path(submission_path)
submission.to_csv(os.path.join(submission_path, f'{EXPERIMENT_NAME}.csv'))

pred_proba_oof_path = os.path.join(cfg.OOF_PRED_PATH, EXPERIMENT_FAMILY_NAME)
check_path(pred_proba_oof_path)
pred_proba_oof.to_pickle(os.path.join(pred_proba_oof_path, f'{EXPERIMENT_NAME}.pkl'))

pred_proba_test_path = os.path.join(cfg.TEST_PRED_PATH, EXPERIMENT_FAMILY_NAME)
check_path(pred_proba_test_path)
pred_proba_test.to_pickle(os.path.join(pred_proba_test_path, f'{EXPERIMENT_NAME}.pkl'))