## Import libraries

In [1]:
import gc
import pickle
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

## Prepare data for model training

In [2]:
with open("../input/tps-may-data-preprocess-v5/TPS_May_Dataset.txt", 'rb') as handle: 
    data = handle.read()

processed_data = pickle.loads(data)
train_df = processed_data['train_df']
test_df = processed_data['test_df']

del processed_data
gc.collect()

0

In [3]:
cat_cols = ['feature_0','feature_1','feature_2','feature_3','feature_4','feature_5','feature_6','feature_7',
            'feature_8','feature_9','feature_10','feature_11','feature_12','feature_13','feature_14','feature_15',
            'feature_16','feature_17','feature_18','feature_19','feature_20','feature_21','feature_22','feature_23',
            'feature_24','feature_25','feature_26','feature_27','feature_28','feature_29','feature_30','feature_31',
            'feature_32','feature_33','feature_34','feature_35','feature_36','feature_37','feature_38','feature_39',
            'feature_40','feature_41','feature_42','feature_43','feature_44','feature_45','feature_46','feature_47',
            'feature_48','feature_49','clusters__0','clusters__1','clusters__2','clusters__3','clusters__4',
            'clusters__5','clusters__6','clusters__7','clusters__8','clusters__9','clusters__10','clusters__11',
            'clusters__12','clusters__13','clusters__14']

train_df[cat_cols] = train_df[cat_cols].astype(int)
test_df[cat_cols] = test_df[cat_cols].astype(int)
cat_cols_indices = [train_df.columns.get_loc(col) for col in cat_cols]
print(cat_cols_indices)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


In [4]:
Xtrain = train_df.loc[:, train_df.columns != 'target'].values
Ytrain = train_df['target'].values
Ytrain_oh = pd.get_dummies(train_df['target']).values
Xtest = test_df.values

print("Xtrain: {} \nYtrain: {} \nYtrain_oh: {} \nXtest: {}".format(Xtrain.shape, Ytrain.shape, 
                                                                   Ytrain_oh.shape, Xtest.shape))

del train_df
del test_df
gc.collect()

Xtrain: (99918, 100) 
Ytrain: (99918,) 
Ytrain_oh: (99918, 4) 
Xtest: (50000, 100)


20

## Build and validate the model

In [5]:
params = {}
params["objective"] = 'multiclass'
params['metric'] = 'multi_logloss'
params['boosting'] = 'gbdt'
#params["device_type"] = 'gpu'
params['num_class'] = 4
params['is_unbalance'] = True
params["learning_rate"] = 0.012
params["lambda_l2"] = 0.0256
params["num_leaves"] = 52
#params["max_depth"] = 10
params["feature_fraction"] = 0.503
params["bagging_fraction"] = 0.6741
params["bagging_freq"] = 8
params["bagging_seed"] = 10
params["min_data_in_leaf"] = 10
params["verbosity"] = -1
num_rounds = 5000

In [6]:
FOLD = 10
NUM_SEED = 3

# Prediction Clipping Thresholds
p_min = 0.025
p_max = 1 - p_min

np.random.seed(2021)
seeds = np.random.randint(0, 100, size=NUM_SEED)

oof_score = 0
y_pred_meta_lgb = np.zeros((Ytrain.shape[0], 4))
y_pred_final_lgb = np.zeros((Xtest.shape[0], 4))
counter = 0


for sidx, seed in enumerate(seeds):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain, Ytrain)):
        counter += 1

        train_x, train_y, train_y_oh = Xtrain[train], Ytrain[train], Ytrain_oh[train]
        val_x, val_y, val_y_oh = Xtrain[val], Ytrain[val], Ytrain_oh[val]

        lgtrain = lgb.Dataset(train_x, label=train_y.ravel())
        lgvalidation = lgb.Dataset(val_x, label=val_y.ravel())

        model = lgb.train(params, lgtrain, num_rounds, 
                          valid_sets=[lgtrain, lgvalidation], 
                          categorical_feature=cat_cols_indices,
                          early_stopping_rounds=200, verbose_eval=100)

        y_pred = model.predict(val_x, num_iteration=model.best_iteration)
        y_pred = np.clip(y_pred, p_min, p_max)
        y_pred_meta_lgb[val] += y_pred
        y_pred_final_lgb += model.predict(Xtest, num_iteration=model.best_iteration)
        
        score = log_loss(val_y_oh, y_pred)
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_lgb = y_pred_meta_lgb / float(NUM_SEED)
y_pred_final_lgb = y_pred_final_lgb / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07624	valid_1's multi_logloss: 1.10298
[200]	training's multi_logloss: 1.04579	valid_1's multi_logloss: 1.09702
[300]	training's multi_logloss: 1.02031	valid_1's multi_logloss: 1.09448
[400]	training's multi_logloss: 0.997913	valid_1's multi_logloss: 1.09379
[500]	training's multi_logloss: 0.977758	valid_1's multi_logloss: 1.0936
[600]	training's multi_logloss: 0.958772	valid_1's multi_logloss: 1.09398
[700]	training's multi_logloss: 0.940914	valid_1's multi_logloss: 1.09466
Early stopping, best iteration is:
[508]	training's multi_logloss: 0.976205	valid_1's multi_logloss: 1.09351
Seed-85 | Fold-0 | OOF Score: 1.0935144558843868


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07661	valid_1's multi_logloss: 1.10196
[200]	training's multi_logloss: 1.04632	valid_1's multi_logloss: 1.09517
[300]	training's multi_logloss: 1.02105	valid_1's multi_logloss: 1.0921
[400]	training's multi_logloss: 0.998635	valid_1's multi_logloss: 1.09087
[500]	training's multi_logloss: 0.978247	valid_1's multi_logloss: 1.09057
[600]	training's multi_logloss: 0.959476	valid_1's multi_logloss: 1.0907
[700]	training's multi_logloss: 0.941556	valid_1's multi_logloss: 1.09083
Early stopping, best iteration is:
[515]	training's multi_logloss: 0.975413	valid_1's multi_logloss: 1.09044
Seed-85 | Fold-1 | OOF Score: 1.0904386618211377


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07643	valid_1's multi_logloss: 1.10264
[200]	training's multi_logloss: 1.04603	valid_1's multi_logloss: 1.09646
[300]	training's multi_logloss: 1.02062	valid_1's multi_logloss: 1.09386
[400]	training's multi_logloss: 0.998232	valid_1's multi_logloss: 1.09325
[500]	training's multi_logloss: 0.977843	valid_1's multi_logloss: 1.09342
[600]	training's multi_logloss: 0.959001	valid_1's multi_logloss: 1.09381
Early stopping, best iteration is:
[404]	training's multi_logloss: 0.997374	valid_1's multi_logloss: 1.09322
Seed-85 | Fold-2 | OOF Score: 1.0932180332483243


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07647	valid_1's multi_logloss: 1.1023
[200]	training's multi_logloss: 1.04627	valid_1's multi_logloss: 1.09634
[300]	training's multi_logloss: 1.02093	valid_1's multi_logloss: 1.09376
[400]	training's multi_logloss: 0.9986	valid_1's multi_logloss: 1.09287
[500]	training's multi_logloss: 0.97807	valid_1's multi_logloss: 1.09305
[600]	training's multi_logloss: 0.958972	valid_1's multi_logloss: 1.09342
Early stopping, best iteration is:
[419]	training's multi_logloss: 0.994612	valid_1's multi_logloss: 1.09267
Seed-85 | Fold-3 | OOF Score: 1.0926702662184122


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07646	valid_1's multi_logloss: 1.10297
[200]	training's multi_logloss: 1.04589	valid_1's multi_logloss: 1.09676
[300]	training's multi_logloss: 1.02034	valid_1's multi_logloss: 1.09432
[400]	training's multi_logloss: 0.998127	valid_1's multi_logloss: 1.0933
[500]	training's multi_logloss: 0.977514	valid_1's multi_logloss: 1.09294
[600]	training's multi_logloss: 0.958609	valid_1's multi_logloss: 1.09318
[700]	training's multi_logloss: 0.940887	valid_1's multi_logloss: 1.09357
Early stopping, best iteration is:
[508]	training's multi_logloss: 0.975946	valid_1's multi_logloss: 1.09286
Seed-85 | Fold-4 | OOF Score: 1.0928621990652652


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07618	valid_1's multi_logloss: 1.10292
[200]	training's multi_logloss: 1.04584	valid_1's multi_logloss: 1.09744
[300]	training's multi_logloss: 1.02044	valid_1's multi_logloss: 1.09562
[400]	training's multi_logloss: 0.99805	valid_1's multi_logloss: 1.09506
[500]	training's multi_logloss: 0.977665	valid_1's multi_logloss: 1.09554
Early stopping, best iteration is:
[398]	training's multi_logloss: 0.998467	valid_1's multi_logloss: 1.09503
Seed-85 | Fold-5 | OOF Score: 1.0950292143723788


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07652	valid_1's multi_logloss: 1.10257
[200]	training's multi_logloss: 1.0461	valid_1's multi_logloss: 1.09579
[300]	training's multi_logloss: 1.02078	valid_1's multi_logloss: 1.09294
[400]	training's multi_logloss: 0.998424	valid_1's multi_logloss: 1.09166
[500]	training's multi_logloss: 0.978206	valid_1's multi_logloss: 1.09136
[600]	training's multi_logloss: 0.959352	valid_1's multi_logloss: 1.09138
[700]	training's multi_logloss: 0.941432	valid_1's multi_logloss: 1.09177
Early stopping, best iteration is:
[561]	training's multi_logloss: 0.966585	valid_1's multi_logloss: 1.09117
Seed-85 | Fold-6 | OOF Score: 1.0911695013043232


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07631	valid_1's multi_logloss: 1.10427
[200]	training's multi_logloss: 1.04597	valid_1's multi_logloss: 1.0987
[300]	training's multi_logloss: 1.02065	valid_1's multi_logloss: 1.096
[400]	training's multi_logloss: 0.998468	valid_1's multi_logloss: 1.09505
[500]	training's multi_logloss: 0.978133	valid_1's multi_logloss: 1.09501
[600]	training's multi_logloss: 0.959249	valid_1's multi_logloss: 1.09537
Early stopping, best iteration is:
[427]	training's multi_logloss: 0.992763	valid_1's multi_logloss: 1.09486
Seed-85 | Fold-7 | OOF Score: 1.0948591630577384


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07643	valid_1's multi_logloss: 1.10348
[200]	training's multi_logloss: 1.04604	valid_1's multi_logloss: 1.09703
[300]	training's multi_logloss: 1.02077	valid_1's multi_logloss: 1.0941
[400]	training's multi_logloss: 0.998288	valid_1's multi_logloss: 1.09298
[500]	training's multi_logloss: 0.97797	valid_1's multi_logloss: 1.09306
[600]	training's multi_logloss: 0.959042	valid_1's multi_logloss: 1.09319
Early stopping, best iteration is:
[460]	training's multi_logloss: 0.985918	valid_1's multi_logloss: 1.09294
Seed-85 | Fold-8 | OOF Score: 1.0929434805772738


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07651	valid_1's multi_logloss: 1.10279
[200]	training's multi_logloss: 1.04613	valid_1's multi_logloss: 1.09662
[300]	training's multi_logloss: 1.02076	valid_1's multi_logloss: 1.09391
[400]	training's multi_logloss: 0.998501	valid_1's multi_logloss: 1.09288
[500]	training's multi_logloss: 0.977994	valid_1's multi_logloss: 1.09261
[600]	training's multi_logloss: 0.959057	valid_1's multi_logloss: 1.09297
Early stopping, best iteration is:
[481]	training's multi_logloss: 0.981746	valid_1's multi_logloss: 1.09255
Seed-85 | Fold-9 | OOF Score: 1.0925513971592886

Seed: 85 | Aggregate OOF Score: 1.092925637270853




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07649	valid_1's multi_logloss: 1.10246
[200]	training's multi_logloss: 1.04603	valid_1's multi_logloss: 1.09565
[300]	training's multi_logloss: 1.02058	valid_1's multi_logloss: 1.09269
[400]	training's multi_logloss: 0.998401	valid_1's multi_logloss: 1.0913
[500]	training's multi_logloss: 0.978057	valid_1's multi_logloss: 1.09092
[600]	training's multi_logloss: 0.959214	valid_1's multi_logloss: 1.09116
[700]	training's multi_logloss: 0.941341	valid_1's multi_logloss: 1.09166
Early stopping, best iteration is:
[505]	training's multi_logloss: 0.977111	valid_1's multi_logloss: 1.0909
Seed-57 | Fold-0 | OOF Score: 1.090904142004507


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07663	valid_1's multi_logloss: 1.10161
[200]	training's multi_logloss: 1.04633	valid_1's multi_logloss: 1.09502
[300]	training's multi_logloss: 1.02101	valid_1's multi_logloss: 1.09259
[400]	training's multi_logloss: 0.998722	valid_1's multi_logloss: 1.09138
[500]	training's multi_logloss: 0.978446	valid_1's multi_logloss: 1.09115
[600]	training's multi_logloss: 0.959351	valid_1's multi_logloss: 1.09123
Early stopping, best iteration is:
[491]	training's multi_logloss: 0.980209	valid_1's multi_logloss: 1.09105
Seed-57 | Fold-1 | OOF Score: 1.0910510397628024


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07637	valid_1's multi_logloss: 1.10245
[200]	training's multi_logloss: 1.046	valid_1's multi_logloss: 1.096
[300]	training's multi_logloss: 1.02056	valid_1's multi_logloss: 1.09346
[400]	training's multi_logloss: 0.998105	valid_1's multi_logloss: 1.09303
[500]	training's multi_logloss: 0.977664	valid_1's multi_logloss: 1.09308
[600]	training's multi_logloss: 0.958824	valid_1's multi_logloss: 1.09359
Early stopping, best iteration is:
[465]	training's multi_logloss: 0.984677	valid_1's multi_logloss: 1.09281
Seed-57 | Fold-2 | OOF Score: 1.0928145315676747


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07644	valid_1's multi_logloss: 1.10266
[200]	training's multi_logloss: 1.04611	valid_1's multi_logloss: 1.09657
[300]	training's multi_logloss: 1.0208	valid_1's multi_logloss: 1.09405
[400]	training's multi_logloss: 0.99839	valid_1's multi_logloss: 1.09274
[500]	training's multi_logloss: 0.977916	valid_1's multi_logloss: 1.09276
[600]	training's multi_logloss: 0.959094	valid_1's multi_logloss: 1.09268
[700]	training's multi_logloss: 0.941174	valid_1's multi_logloss: 1.09287
Early stopping, best iteration is:
[557]	training's multi_logloss: 0.967116	valid_1's multi_logloss: 1.09256
Seed-57 | Fold-3 | OOF Score: 1.0925576078983157


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07635	valid_1's multi_logloss: 1.1044
[200]	training's multi_logloss: 1.04577	valid_1's multi_logloss: 1.09904
[300]	training's multi_logloss: 1.02025	valid_1's multi_logloss: 1.09688
[400]	training's multi_logloss: 0.997797	valid_1's multi_logloss: 1.09573
[500]	training's multi_logloss: 0.97742	valid_1's multi_logloss: 1.09563
[600]	training's multi_logloss: 0.958549	valid_1's multi_logloss: 1.09622
Early stopping, best iteration is:
[468]	training's multi_logloss: 0.983694	valid_1's multi_logloss: 1.09539
Seed-57 | Fold-4 | OOF Score: 1.0953852391127579


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0763	valid_1's multi_logloss: 1.10344
[200]	training's multi_logloss: 1.04589	valid_1's multi_logloss: 1.09762
[300]	training's multi_logloss: 1.02047	valid_1's multi_logloss: 1.09534
[400]	training's multi_logloss: 0.998036	valid_1's multi_logloss: 1.09461
[500]	training's multi_logloss: 0.977632	valid_1's multi_logloss: 1.09433
[600]	training's multi_logloss: 0.958737	valid_1's multi_logloss: 1.09468
[700]	training's multi_logloss: 0.94085	valid_1's multi_logloss: 1.0951
Early stopping, best iteration is:
[513]	training's multi_logloss: 0.975065	valid_1's multi_logloss: 1.09422
Seed-57 | Fold-5 | OOF Score: 1.0942175910440808


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07671	valid_1's multi_logloss: 1.10115
[200]	training's multi_logloss: 1.04644	valid_1's multi_logloss: 1.09405
[300]	training's multi_logloss: 1.0211	valid_1's multi_logloss: 1.09102
[400]	training's multi_logloss: 0.998774	valid_1's multi_logloss: 1.08964
[500]	training's multi_logloss: 0.978316	valid_1's multi_logloss: 1.08931
[600]	training's multi_logloss: 0.959414	valid_1's multi_logloss: 1.08956
[700]	training's multi_logloss: 0.94131	valid_1's multi_logloss: 1.08968
Early stopping, best iteration is:
[568]	training's multi_logloss: 0.965286	valid_1's multi_logloss: 1.0892
Seed-57 | Fold-6 | OOF Score: 1.0892036047530236


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07628	valid_1's multi_logloss: 1.10382
[200]	training's multi_logloss: 1.04594	valid_1's multi_logloss: 1.0981
[300]	training's multi_logloss: 1.02057	valid_1's multi_logloss: 1.09598
[400]	training's multi_logloss: 0.998335	valid_1's multi_logloss: 1.09494
[500]	training's multi_logloss: 0.978023	valid_1's multi_logloss: 1.09484
[600]	training's multi_logloss: 0.959075	valid_1's multi_logloss: 1.0952
Early stopping, best iteration is:
[424]	training's multi_logloss: 0.993299	valid_1's multi_logloss: 1.09465
Seed-57 | Fold-7 | OOF Score: 1.09465252109865


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07633	valid_1's multi_logloss: 1.10397
[200]	training's multi_logloss: 1.04594	valid_1's multi_logloss: 1.0987
[300]	training's multi_logloss: 1.02041	valid_1's multi_logloss: 1.09628
[400]	training's multi_logloss: 0.997988	valid_1's multi_logloss: 1.0959
[500]	training's multi_logloss: 0.977738	valid_1's multi_logloss: 1.0956
[600]	training's multi_logloss: 0.958842	valid_1's multi_logloss: 1.09587
Early stopping, best iteration is:
[495]	training's multi_logloss: 0.97871	valid_1's multi_logloss: 1.09556
Seed-57 | Fold-8 | OOF Score: 1.0955599712950506


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07627	valid_1's multi_logloss: 1.10333
[200]	training's multi_logloss: 1.04572	valid_1's multi_logloss: 1.09734
[300]	training's multi_logloss: 1.02032	valid_1's multi_logloss: 1.09496
[400]	training's multi_logloss: 0.997892	valid_1's multi_logloss: 1.09394
[500]	training's multi_logloss: 0.97752	valid_1's multi_logloss: 1.09415
[600]	training's multi_logloss: 0.958563	valid_1's multi_logloss: 1.09444
Early stopping, best iteration is:
[427]	training's multi_logloss: 0.992198	valid_1's multi_logloss: 1.09375
Seed-57 | Fold-9 | OOF Score: 1.093750597702308

Seed: 57 | Aggregate OOF Score: 1.093009684623917




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07624	valid_1's multi_logloss: 1.10383
[200]	training's multi_logloss: 1.04576	valid_1's multi_logloss: 1.09841
[300]	training's multi_logloss: 1.02032	valid_1's multi_logloss: 1.09589
[400]	training's multi_logloss: 0.998052	valid_1's multi_logloss: 1.0951
[500]	training's multi_logloss: 0.977712	valid_1's multi_logloss: 1.09508
[600]	training's multi_logloss: 0.958726	valid_1's multi_logloss: 1.09524
Early stopping, best iteration is:
[434]	training's multi_logloss: 0.990932	valid_1's multi_logloss: 1.09492
Seed-0 | Fold-0 | OOF Score: 1.0949183032589795


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07647	valid_1's multi_logloss: 1.10163
[200]	training's multi_logloss: 1.04599	valid_1's multi_logloss: 1.09537
[300]	training's multi_logloss: 1.02049	valid_1's multi_logloss: 1.09246
[400]	training's multi_logloss: 0.998219	valid_1's multi_logloss: 1.09182
[500]	training's multi_logloss: 0.977824	valid_1's multi_logloss: 1.09139
[600]	training's multi_logloss: 0.9589	valid_1's multi_logloss: 1.09176
Early stopping, best iteration is:
[493]	training's multi_logloss: 0.979201	valid_1's multi_logloss: 1.09136
Seed-0 | Fold-1 | OOF Score: 1.0913568635420157


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07634	valid_1's multi_logloss: 1.1033
[200]	training's multi_logloss: 1.04587	valid_1's multi_logloss: 1.09736
[300]	training's multi_logloss: 1.02051	valid_1's multi_logloss: 1.09513
[400]	training's multi_logloss: 0.99809	valid_1's multi_logloss: 1.09422
[500]	training's multi_logloss: 0.977816	valid_1's multi_logloss: 1.09378
[600]	training's multi_logloss: 0.958856	valid_1's multi_logloss: 1.09401
Early stopping, best iteration is:
[499]	training's multi_logloss: 0.978009	valid_1's multi_logloss: 1.09377
Seed-0 | Fold-2 | OOF Score: 1.0937735738645578


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07638	valid_1's multi_logloss: 1.10265
[200]	training's multi_logloss: 1.04603	valid_1's multi_logloss: 1.09654
[300]	training's multi_logloss: 1.02065	valid_1's multi_logloss: 1.09362
[400]	training's multi_logloss: 0.99831	valid_1's multi_logloss: 1.09247
[500]	training's multi_logloss: 0.978004	valid_1's multi_logloss: 1.09279
Early stopping, best iteration is:
[399]	training's multi_logloss: 0.998525	valid_1's multi_logloss: 1.09247
Seed-0 | Fold-3 | OOF Score: 1.0924703216285856


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07693	valid_1's multi_logloss: 1.10054
[200]	training's multi_logloss: 1.04657	valid_1's multi_logloss: 1.09337
[300]	training's multi_logloss: 1.02133	valid_1's multi_logloss: 1.09007
[400]	training's multi_logloss: 0.999012	valid_1's multi_logloss: 1.08875
[500]	training's multi_logloss: 0.978707	valid_1's multi_logloss: 1.08861
[600]	training's multi_logloss: 0.959842	valid_1's multi_logloss: 1.08885
Early stopping, best iteration is:
[446]	training's multi_logloss: 0.989522	valid_1's multi_logloss: 1.08851
Seed-0 | Fold-4 | OOF Score: 1.0885144732640986


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07658	valid_1's multi_logloss: 1.10228
[200]	training's multi_logloss: 1.04606	valid_1's multi_logloss: 1.09572
[300]	training's multi_logloss: 1.02059	valid_1's multi_logloss: 1.09299
[400]	training's multi_logloss: 0.998282	valid_1's multi_logloss: 1.09182
[500]	training's multi_logloss: 0.977924	valid_1's multi_logloss: 1.09149
[600]	training's multi_logloss: 0.959104	valid_1's multi_logloss: 1.09176
Early stopping, best iteration is:
[493]	training's multi_logloss: 0.979306	valid_1's multi_logloss: 1.09147
Seed-0 | Fold-5 | OOF Score: 1.0914669902229193


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07645	valid_1's multi_logloss: 1.1038
[200]	training's multi_logloss: 1.04608	valid_1's multi_logloss: 1.09754
[300]	training's multi_logloss: 1.0208	valid_1's multi_logloss: 1.09441
[400]	training's multi_logloss: 0.998779	valid_1's multi_logloss: 1.0935
[500]	training's multi_logloss: 0.978434	valid_1's multi_logloss: 1.09295
[600]	training's multi_logloss: 0.959376	valid_1's multi_logloss: 1.09273
[700]	training's multi_logloss: 0.94145	valid_1's multi_logloss: 1.09294
[800]	training's multi_logloss: 0.92445	valid_1's multi_logloss: 1.0935
Early stopping, best iteration is:
[603]	training's multi_logloss: 0.958829	valid_1's multi_logloss: 1.09269
Seed-0 | Fold-6 | OOF Score: 1.0926859646249618


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07633	valid_1's multi_logloss: 1.10307
[200]	training's multi_logloss: 1.04602	valid_1's multi_logloss: 1.09715
[300]	training's multi_logloss: 1.02064	valid_1's multi_logloss: 1.09494
[400]	training's multi_logloss: 0.998154	valid_1's multi_logloss: 1.09445
[500]	training's multi_logloss: 0.977703	valid_1's multi_logloss: 1.09422
[600]	training's multi_logloss: 0.958745	valid_1's multi_logloss: 1.09487
Early stopping, best iteration is:
[449]	training's multi_logloss: 0.987878	valid_1's multi_logloss: 1.09412
Seed-0 | Fold-7 | OOF Score: 1.0941202431824437


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07644	valid_1's multi_logloss: 1.10257
[200]	training's multi_logloss: 1.04585	valid_1's multi_logloss: 1.09675
[300]	training's multi_logloss: 1.02053	valid_1's multi_logloss: 1.09448
[400]	training's multi_logloss: 0.998134	valid_1's multi_logloss: 1.0937
[500]	training's multi_logloss: 0.977789	valid_1's multi_logloss: 1.09365
[600]	training's multi_logloss: 0.958796	valid_1's multi_logloss: 1.09367
Early stopping, best iteration is:
[457]	training's multi_logloss: 0.986335	valid_1's multi_logloss: 1.0934
Seed-0 | Fold-8 | OOF Score: 1.0933972251592365


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07627	valid_1's multi_logloss: 1.10441
[200]	training's multi_logloss: 1.04586	valid_1's multi_logloss: 1.0988
[300]	training's multi_logloss: 1.02055	valid_1's multi_logloss: 1.09656
[400]	training's multi_logloss: 0.998222	valid_1's multi_logloss: 1.0957
[500]	training's multi_logloss: 0.977735	valid_1's multi_logloss: 1.09564
[600]	training's multi_logloss: 0.959009	valid_1's multi_logloss: 1.09598
Early stopping, best iteration is:
[488]	training's multi_logloss: 0.980111	valid_1's multi_logloss: 1.09557
Seed-0 | Fold-9 | OOF Score: 1.0955747155449183

Seed: 0 | Aggregate OOF Score: 1.0928278674292717


Aggregate OOF Score: 1.092921063108014


In [7]:
np.savez_compressed('./LGB_Meta_Features.npz',
                    y_pred_meta_lgb=y_pred_meta_lgb, 
                    oof_score=oof_score,
                    y_pred_final_lgb=y_pred_final_lgb)

## Create submission file

In [8]:
y_pred_final_lgb = np.clip(y_pred_final_lgb, p_min, p_max)

test_df = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")
submit_df = pd.DataFrame()
submit_df['id'] = test_df['id']
submit_df['Class_1'] = y_pred_final_lgb[:,0]
submit_df['Class_2'] = y_pred_final_lgb[:,1]
submit_df['Class_3'] = y_pred_final_lgb[:,2]
submit_df['Class_4'] = y_pred_final_lgb[:,3]
submit_df.head()

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4
0,100000,0.094618,0.58206,0.189509,0.133813
1,100001,0.067342,0.648476,0.178464,0.105718
2,100002,0.0628,0.682327,0.169452,0.085421
3,100003,0.081586,0.518184,0.311256,0.088974
4,100004,0.068587,0.674762,0.165719,0.090931


In [9]:
submit_df.to_csv("./LGB_submission.csv", index=False)