## Import libraries

In [1]:
import gc
import pickle
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

## Prepare data for model training

In [2]:
with open("../input/tps-may-data-preprocess-v4/TPS_May_Dataset.txt", 'rb') as handle: 
    data = handle.read()

processed_data = pickle.loads(data)
train_df = processed_data['train_df']
test_df = processed_data['test_df']

del processed_data
gc.collect()

0

In [3]:
cat_cols = train_df.iloc[:,0:50].columns
train_df[cat_cols] = train_df[cat_cols].astype(int)
test_df[cat_cols] = test_df[cat_cols].astype(int)
cat_cols_indices = [train_df.columns.get_loc(col) for col in cat_cols]
print(cat_cols_indices)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]


In [4]:
Xtrain = train_df.loc[:, train_df.columns != 'target'].values
Ytrain = train_df['target'].values
Ytrain_oh = pd.get_dummies(train_df['target']).values
Xtest = test_df.values

print("Xtrain: {} \nYtrain: {} \nYtrain_oh: {} \nXtest: {}".format(Xtrain.shape, Ytrain.shape, 
                                                                   Ytrain_oh.shape, Xtest.shape))

del train_df
del test_df
gc.collect()

Xtrain: (99918, 372) 
Ytrain: (99918,) 
Ytrain_oh: (99918, 4) 
Xtest: (50000, 372)


20

## Build and validate the model

In [5]:
params = {}
params["objective"] = 'multiclass'
params['metric'] = 'multi_logloss'
params['boosting'] = 'gbdt'
#params["device_type"] = 'gpu'
params['num_class'] = 4
params['is_unbalance'] = True
params["learning_rate"] = 0.02
params["lambda_l2"] = 0.0256
params["num_leaves"] = 52
#params["max_depth"] = 10
params["feature_fraction"] = 0.503
params["bagging_fraction"] = 0.6741
params["bagging_freq"] = 8
params["bagging_seed"] = 10
params["min_data_in_leaf"] = 10
params["verbosity"] = -1
num_rounds = 5000

In [6]:
FOLD = 10
NUM_SEED = 3

# Prediction Clipping Thresholds
p_min = 0.025
p_max = 1 - p_min

np.random.seed(2021)
seeds = np.random.randint(0, 100, size=NUM_SEED)

oof_score = 0
y_pred_meta_lgb = np.zeros((Ytrain.shape[0], 4))
y_pred_final_lgb = np.zeros((Xtest.shape[0], 4))
counter = 0


for sidx, seed in enumerate(seeds):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain, Ytrain)):
        counter += 1

        train_x, train_y, train_y_oh = Xtrain[train], Ytrain[train], Ytrain_oh[train]
        val_x, val_y, val_y_oh = Xtrain[val], Ytrain[val], Ytrain_oh[val]

        lgtrain = lgb.Dataset(train_x, label=train_y.ravel())
        lgvalidation = lgb.Dataset(val_x, label=val_y.ravel())

        model = lgb.train(params, lgtrain, num_rounds, 
                          valid_sets=[lgtrain, lgvalidation], 
                          categorical_feature=cat_cols_indices,
                          early_stopping_rounds=200, verbose_eval=100)

        y_pred = model.predict(val_x, num_iteration=model.best_iteration)
        y_pred = np.clip(y_pred, p_min, p_max)
        y_pred_meta_lgb[val] += y_pred
        y_pred_final_lgb += model.predict(Xtest, num_iteration=model.best_iteration)
        
        score = log_loss(val_y_oh, y_pred)
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_lgb = y_pred_meta_lgb / float(NUM_SEED)
y_pred_final_lgb = y_pred_final_lgb / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0524	valid_1's multi_logloss: 1.0981
[200]	training's multi_logloss: 1.00862	valid_1's multi_logloss: 1.09456
[300]	training's multi_logloss: 0.97269	valid_1's multi_logloss: 1.09465
[400]	training's multi_logloss: 0.940728	valid_1's multi_logloss: 1.09616
Early stopping, best iteration is:
[263]	training's multi_logloss: 0.985365	valid_1's multi_logloss: 1.09403
Seed-85 | Fold-0 | OOF Score: 1.094029800025264


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0528	valid_1's multi_logloss: 1.09616
[200]	training's multi_logloss: 1.00928	valid_1's multi_logloss: 1.09178
[300]	training's multi_logloss: 0.973459	valid_1's multi_logloss: 1.09097
[400]	training's multi_logloss: 0.941566	valid_1's multi_logloss: 1.09133
Early stopping, best iteration is:
[254]	training's multi_logloss: 0.98929	valid_1's multi_logloss: 1.09071
Seed-85 | Fold-1 | OOF Score: 1.090708115206345


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05274	valid_1's multi_logloss: 1.09653
[200]	training's multi_logloss: 1.00878	valid_1's multi_logloss: 1.09284
[300]	training's multi_logloss: 0.972673	valid_1's multi_logloss: 1.09256
[400]	training's multi_logloss: 0.94072	valid_1's multi_logloss: 1.0937
Early stopping, best iteration is:
[247]	training's multi_logloss: 0.991131	valid_1's multi_logloss: 1.09236
Seed-85 | Fold-2 | OOF Score: 1.0923594896473314


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05232	valid_1's multi_logloss: 1.09713
[200]	training's multi_logloss: 1.00843	valid_1's multi_logloss: 1.09305
[300]	training's multi_logloss: 0.972441	valid_1's multi_logloss: 1.09285
[400]	training's multi_logloss: 0.940642	valid_1's multi_logloss: 1.09348
Early stopping, best iteration is:
[275]	training's multi_logloss: 0.981003	valid_1's multi_logloss: 1.09236
Seed-85 | Fold-3 | OOF Score: 1.0923605643856613


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05256	valid_1's multi_logloss: 1.09795
[200]	training's multi_logloss: 1.00872	valid_1's multi_logloss: 1.09394
[300]	training's multi_logloss: 0.972807	valid_1's multi_logloss: 1.09357
[400]	training's multi_logloss: 0.941047	valid_1's multi_logloss: 1.09435
[500]	training's multi_logloss: 0.911559	valid_1's multi_logloss: 1.09481
Early stopping, best iteration is:
[313]	training's multi_logloss: 0.968509	valid_1's multi_logloss: 1.09338
Seed-85 | Fold-4 | OOF Score: 1.093383274247526


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05236	valid_1's multi_logloss: 1.09825
[200]	training's multi_logloss: 1.00858	valid_1's multi_logloss: 1.0954
[300]	training's multi_logloss: 0.972766	valid_1's multi_logloss: 1.09591
[400]	training's multi_logloss: 0.940728	valid_1's multi_logloss: 1.09682
Early stopping, best iteration is:
[213]	training's multi_logloss: 1.00364	valid_1's multi_logloss: 1.09534
Seed-85 | Fold-5 | OOF Score: 1.0953418528385301


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05246	valid_1's multi_logloss: 1.09725
[200]	training's multi_logloss: 1.00879	valid_1's multi_logloss: 1.0921
[300]	training's multi_logloss: 0.972797	valid_1's multi_logloss: 1.09095
[400]	training's multi_logloss: 0.940754	valid_1's multi_logloss: 1.09126
Early stopping, best iteration is:
[281]	training's multi_logloss: 0.979249	valid_1's multi_logloss: 1.09062
Seed-85 | Fold-6 | OOF Score: 1.0906179952281418


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05265	valid_1's multi_logloss: 1.09855
[200]	training's multi_logloss: 1.00921	valid_1's multi_logloss: 1.09461
[300]	training's multi_logloss: 0.97311	valid_1's multi_logloss: 1.09439
[400]	training's multi_logloss: 0.94152	valid_1's multi_logloss: 1.09507
Early stopping, best iteration is:
[242]	training's multi_logloss: 0.993401	valid_1's multi_logloss: 1.09413
Seed-85 | Fold-7 | OOF Score: 1.0941289557975358


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05252	valid_1's multi_logloss: 1.09723
[200]	training's multi_logloss: 1.00878	valid_1's multi_logloss: 1.09287
[300]	training's multi_logloss: 0.972873	valid_1's multi_logloss: 1.09185
[400]	training's multi_logloss: 0.940882	valid_1's multi_logloss: 1.0918
[500]	training's multi_logloss: 0.911645	valid_1's multi_logloss: 1.09286
Early stopping, best iteration is:
[352]	training's multi_logloss: 0.955929	valid_1's multi_logloss: 1.09159
Seed-85 | Fold-8 | OOF Score: 1.0915880632072237


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05266	valid_1's multi_logloss: 1.09667
[200]	training's multi_logloss: 1.00904	valid_1's multi_logloss: 1.09248
[300]	training's multi_logloss: 0.973107	valid_1's multi_logloss: 1.09182
[400]	training's multi_logloss: 0.941114	valid_1's multi_logloss: 1.09286
[500]	training's multi_logloss: 0.911769	valid_1's multi_logloss: 1.09387
Early stopping, best iteration is:
[302]	training's multi_logloss: 0.972416	valid_1's multi_logloss: 1.09178
Seed-85 | Fold-9 | OOF Score: 1.0917831100682487

Seed: 85 | Aggregate OOF Score: 1.0926301220651808




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05286	valid_1's multi_logloss: 1.09666
[200]	training's multi_logloss: 1.00903	valid_1's multi_logloss: 1.09246
[300]	training's multi_logloss: 0.972863	valid_1's multi_logloss: 1.09161
[400]	training's multi_logloss: 0.941262	valid_1's multi_logloss: 1.09151
[500]	training's multi_logloss: 0.911861	valid_1's multi_logloss: 1.09244
Early stopping, best iteration is:
[380]	training's multi_logloss: 0.947349	valid_1's multi_logloss: 1.09143
Seed-57 | Fold-0 | OOF Score: 1.0914267480371018


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05263	valid_1's multi_logloss: 1.09564
[200]	training's multi_logloss: 1.00915	valid_1's multi_logloss: 1.09082
[300]	training's multi_logloss: 0.973026	valid_1's multi_logloss: 1.0914
[400]	training's multi_logloss: 0.941141	valid_1's multi_logloss: 1.09167
Early stopping, best iteration is:
[200]	training's multi_logloss: 1.00915	valid_1's multi_logloss: 1.09082
Seed-57 | Fold-1 | OOF Score: 1.0908183507168343


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05256	valid_1's multi_logloss: 1.0979
[200]	training's multi_logloss: 1.00881	valid_1's multi_logloss: 1.09387
[300]	training's multi_logloss: 0.972752	valid_1's multi_logloss: 1.09349
[400]	training's multi_logloss: 0.940631	valid_1's multi_logloss: 1.09448
Early stopping, best iteration is:
[277]	training's multi_logloss: 0.98066	valid_1's multi_logloss: 1.09343
Seed-57 | Fold-2 | OOF Score: 1.0934277885892356


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0531	valid_1's multi_logloss: 1.09713
[200]	training's multi_logloss: 1.00954	valid_1's multi_logloss: 1.09256
[300]	training's multi_logloss: 0.973248	valid_1's multi_logloss: 1.09232
[400]	training's multi_logloss: 0.94137	valid_1's multi_logloss: 1.09292
Early stopping, best iteration is:
[277]	training's multi_logloss: 0.981266	valid_1's multi_logloss: 1.09219
Seed-57 | Fold-3 | OOF Score: 1.092193416725403


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05274	valid_1's multi_logloss: 1.10017
[200]	training's multi_logloss: 1.00893	valid_1's multi_logloss: 1.09638
[300]	training's multi_logloss: 0.972578	valid_1's multi_logloss: 1.09616
[400]	training's multi_logloss: 0.940351	valid_1's multi_logloss: 1.09703
Early stopping, best iteration is:
[284]	training's multi_logloss: 0.978074	valid_1's multi_logloss: 1.09602
Seed-57 | Fold-4 | OOF Score: 1.0960233739702143


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05232	valid_1's multi_logloss: 1.09832
[200]	training's multi_logloss: 1.00856	valid_1's multi_logloss: 1.09461
[300]	training's multi_logloss: 0.972719	valid_1's multi_logloss: 1.09461
[400]	training's multi_logloss: 0.940715	valid_1's multi_logloss: 1.09509
Early stopping, best iteration is:
[252]	training's multi_logloss: 0.989386	valid_1's multi_logloss: 1.09424
Seed-57 | Fold-5 | OOF Score: 1.0942434460071895


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05279	valid_1's multi_logloss: 1.09555
[200]	training's multi_logloss: 1.0091	valid_1's multi_logloss: 1.09044
[300]	training's multi_logloss: 0.973034	valid_1's multi_logloss: 1.08993
[400]	training's multi_logloss: 0.941125	valid_1's multi_logloss: 1.09048
[500]	training's multi_logloss: 0.911676	valid_1's multi_logloss: 1.09168
Early stopping, best iteration is:
[314]	training's multi_logloss: 0.968413	valid_1's multi_logloss: 1.0898
Seed-57 | Fold-6 | OOF Score: 1.0898031108412773


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05258	valid_1's multi_logloss: 1.09776
[200]	training's multi_logloss: 1.00883	valid_1's multi_logloss: 1.09354
[300]	training's multi_logloss: 0.972818	valid_1's multi_logloss: 1.0931
[400]	training's multi_logloss: 0.94113	valid_1's multi_logloss: 1.09371
Early stopping, best iteration is:
[287]	training's multi_logloss: 0.977262	valid_1's multi_logloss: 1.09296
Seed-57 | Fold-7 | OOF Score: 1.092956534786265


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05211	valid_1's multi_logloss: 1.09938
[200]	training's multi_logloss: 1.0083	valid_1's multi_logloss: 1.09603
[300]	training's multi_logloss: 0.972113	valid_1's multi_logloss: 1.0958
[400]	training's multi_logloss: 0.940442	valid_1's multi_logloss: 1.09699
Early stopping, best iteration is:
[299]	training's multi_logloss: 0.972476	valid_1's multi_logloss: 1.09576
Seed-57 | Fold-8 | OOF Score: 1.0957598868249763


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05223	valid_1's multi_logloss: 1.09847
[200]	training's multi_logloss: 1.00847	valid_1's multi_logloss: 1.09512
[300]	training's multi_logloss: 0.972261	valid_1's multi_logloss: 1.09567
[400]	training's multi_logloss: 0.940094	valid_1's multi_logloss: 1.09621
Early stopping, best iteration is:
[240]	training's multi_logloss: 0.993389	valid_1's multi_logloss: 1.09491
Seed-57 | Fold-9 | OOF Score: 1.0949145778013785

Seed: 57 | Aggregate OOF Score: 1.0931567234299877




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05239	valid_1's multi_logloss: 1.09875
[200]	training's multi_logloss: 1.00866	valid_1's multi_logloss: 1.09552
[300]	training's multi_logloss: 0.972736	valid_1's multi_logloss: 1.09597
[400]	training's multi_logloss: 0.940867	valid_1's multi_logloss: 1.09643
Early stopping, best iteration is:
[247]	training's multi_logloss: 0.991184	valid_1's multi_logloss: 1.09514
Seed-0 | Fold-0 | OOF Score: 1.0951388697496454


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05282	valid_1's multi_logloss: 1.09665
[200]	training's multi_logloss: 1.00902	valid_1's multi_logloss: 1.09258
[300]	training's multi_logloss: 0.972806	valid_1's multi_logloss: 1.0918
[400]	training's multi_logloss: 0.94076	valid_1's multi_logloss: 1.09215
[500]	training's multi_logloss: 0.911258	valid_1's multi_logloss: 1.09288
Early stopping, best iteration is:
[311]	training's multi_logloss: 0.969081	valid_1's multi_logloss: 1.09168
Seed-0 | Fold-1 | OOF Score: 1.091675028202224


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05231	valid_1's multi_logloss: 1.09818
[200]	training's multi_logloss: 1.00832	valid_1's multi_logloss: 1.09384
[300]	training's multi_logloss: 0.972439	valid_1's multi_logloss: 1.09327
[400]	training's multi_logloss: 0.940714	valid_1's multi_logloss: 1.09349
Early stopping, best iteration is:
[272]	training's multi_logloss: 0.981921	valid_1's multi_logloss: 1.09308
Seed-0 | Fold-2 | OOF Score: 1.0930783238525374


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05263	valid_1's multi_logloss: 1.09628
[200]	training's multi_logloss: 1.00921	valid_1's multi_logloss: 1.09211
[300]	training's multi_logloss: 0.973123	valid_1's multi_logloss: 1.09148
[400]	training's multi_logloss: 0.941223	valid_1's multi_logloss: 1.09203
[500]	training's multi_logloss: 0.911965	valid_1's multi_logloss: 1.09279
Early stopping, best iteration is:
[309]	training's multi_logloss: 0.970138	valid_1's multi_logloss: 1.09135
Seed-0 | Fold-3 | OOF Score: 1.0913460279594038


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05276	valid_1's multi_logloss: 1.09466
[200]	training's multi_logloss: 1.00903	valid_1's multi_logloss: 1.08941
[300]	training's multi_logloss: 0.97322	valid_1's multi_logloss: 1.08893
[400]	training's multi_logloss: 0.941302	valid_1's multi_logloss: 1.08942
Early stopping, best iteration is:
[268]	training's multi_logloss: 0.984094	valid_1's multi_logloss: 1.0888
Seed-0 | Fold-4 | OOF Score: 1.0888022237292716


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05266	valid_1's multi_logloss: 1.09711
[200]	training's multi_logloss: 1.00903	valid_1's multi_logloss: 1.09248
[300]	training's multi_logloss: 0.972892	valid_1's multi_logloss: 1.09197
[400]	training's multi_logloss: 0.940967	valid_1's multi_logloss: 1.09269
Early stopping, best iteration is:
[252]	training's multi_logloss: 0.989719	valid_1's multi_logloss: 1.09177
Seed-0 | Fold-5 | OOF Score: 1.0917736681682542


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05265	valid_1's multi_logloss: 1.09807
[200]	training's multi_logloss: 1.00895	valid_1's multi_logloss: 1.09407
[300]	training's multi_logloss: 0.973185	valid_1's multi_logloss: 1.09354
[400]	training's multi_logloss: 0.941545	valid_1's multi_logloss: 1.09383
Early stopping, best iteration is:
[277]	training's multi_logloss: 0.980911	valid_1's multi_logloss: 1.09336
Seed-0 | Fold-6 | OOF Score: 1.0933608289881234


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05253	valid_1's multi_logloss: 1.09744
[200]	training's multi_logloss: 1.00868	valid_1's multi_logloss: 1.09344
[300]	training's multi_logloss: 0.972779	valid_1's multi_logloss: 1.09353
[400]	training's multi_logloss: 0.940598	valid_1's multi_logloss: 1.09494
Early stopping, best iteration is:
[262]	training's multi_logloss: 0.985871	valid_1's multi_logloss: 1.09305
Seed-0 | Fold-7 | OOF Score: 1.0930540922486116


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05247	valid_1's multi_logloss: 1.0972
[200]	training's multi_logloss: 1.00852	valid_1's multi_logloss: 1.09416
[300]	training's multi_logloss: 0.972463	valid_1's multi_logloss: 1.09398
[400]	training's multi_logloss: 0.941014	valid_1's multi_logloss: 1.0946
Early stopping, best iteration is:
[278]	training's multi_logloss: 0.979954	valid_1's multi_logloss: 1.09376
Seed-0 | Fold-8 | OOF Score: 1.09375771488981


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.05238	valid_1's multi_logloss: 1.09903
[200]	training's multi_logloss: 1.00869	valid_1's multi_logloss: 1.09491
[300]	training's multi_logloss: 0.972821	valid_1's multi_logloss: 1.09503
[400]	training's multi_logloss: 0.940775	valid_1's multi_logloss: 1.09591
Early stopping, best iteration is:
[253]	training's multi_logloss: 0.989076	valid_1's multi_logloss: 1.09464
Seed-0 | Fold-9 | OOF Score: 1.094638207466955

Seed: 0 | Aggregate OOF Score: 1.0926624985254836


Aggregate OOF Score: 1.0928164480068843


In [7]:
np.savez_compressed('./LGB_Meta_Features.npz',
                    y_pred_meta_lgb=y_pred_meta_lgb, 
                    oof_score=oof_score,
                    y_pred_final_lgb=y_pred_final_lgb)

## Create submission file

In [8]:
y_pred_final_lgb = np.clip(y_pred_final_lgb, p_min, p_max)

test_df = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")
submit_df = pd.DataFrame()
submit_df['id'] = test_df['id']
submit_df['Class_1'] = y_pred_final_lgb[:,0]
submit_df['Class_2'] = y_pred_final_lgb[:,1]
submit_df['Class_3'] = y_pred_final_lgb[:,2]
submit_df['Class_4'] = y_pred_final_lgb[:,3]
submit_df.head()

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4
0,100000,0.093238,0.608216,0.181194,0.117353
1,100001,0.065417,0.641462,0.182596,0.110524
2,100002,0.06334,0.68632,0.158017,0.092323
3,100003,0.082202,0.55377,0.274679,0.089348
4,100004,0.068181,0.658764,0.178083,0.094972


In [9]:
submit_df.to_csv("./LGB_submission.csv", index=False)