## Import libraries

In [1]:
import gc
import pickle
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

## Prepare data for model training

In [2]:
with open("../input/tps-may-data-preprocess-v3/TPS_May_Dataset_w_Org.txt", 'rb') as handle: 
    data = handle.read()

processed_data = pickle.loads(data)
train_df = processed_data['train_df']
test_df = processed_data['test_df']

del processed_data
gc.collect()

0

In [3]:
cat_cols = train_df.iloc[:,0:50].columns
train_df[cat_cols] = train_df[cat_cols].astype(int)
test_df[cat_cols] = test_df[cat_cols].astype(int)
cat_cols_indices = [train_df.columns.get_loc(col) for col in cat_cols]
print(cat_cols_indices)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]


In [4]:
Xtrain = train_df.loc[:, train_df.columns != 'target'].copy()
Ytrain = train_df['target'].copy()
Ytrain_oh = pd.get_dummies(train_df['target']).copy()
Xtest = test_df.copy()

print("Xtrain: {} \nYtrain: {} \nYtrain_oh: {} \nXtest: {}".format(Xtrain.shape, Ytrain.shape, 
                                                                   Ytrain_oh.shape, Xtest.shape))

del train_df
del test_df
gc.collect()

Xtrain: (99918, 1074) 
Ytrain: (99918,) 
Ytrain_oh: (99918, 4) 
Xtest: (50000, 1074)


20

## Build and validate the model

In [5]:
params = {}
params["objective"] = 'multiclass'
params['metric'] = 'multi_logloss'
params['boosting'] = 'gbdt'
#params["device_type"] = 'gpu'
params['num_class'] = 4
params['is_unbalance'] = True
params["learning_rate"] = 0.0193
params["lambda_l2"] = 0.042
params["num_leaves"] = 56
#params["max_depth"] = 6
params["feature_fraction"] = 0.6
params["bagging_fraction"] = 0.8
params["bagging_freq"] = 3
params["bagging_seed"] = 10
params["min_data_in_leaf"] = 10
params["verbosity"] = -1
num_rounds = 5000

In [6]:
FOLD = 10
NUM_SEED = 3

# Prediction Clipping Thresholds
p_min = 0.025
p_max = 1 - p_min

np.random.seed(2021)
seeds = np.random.randint(0, 100, size=NUM_SEED)

oof_score = 0
y_pred_meta_lgb = np.zeros((Ytrain.shape[0], 4))
y_pred_final_lgb = np.zeros((Xtest.shape[0], 4))
counter = 0


for sidx, seed in enumerate(seeds):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain.values, Ytrain.values)):
        counter += 1

        train_x, train_y, train_y_oh = Xtrain.iloc[train], Ytrain.iloc[train], Ytrain_oh.iloc[train]
        val_x, val_y, val_y_oh = Xtrain.iloc[val], Ytrain.iloc[val], Ytrain_oh.iloc[val]

        lgtrain = lgb.Dataset(train_x, label=train_y)
        lgvalidation = lgb.Dataset(val_x, label=val_y)

        model = lgb.train(params, lgtrain, num_rounds, 
                          valid_sets=[lgtrain, lgvalidation], 
                          categorical_feature=cat_cols_indices,
                          early_stopping_rounds=200, verbose_eval=100)

        y_pred = model.predict(val_x, num_iteration=model.best_iteration)
        y_pred = np.clip(y_pred, p_min, p_max)
        y_pred_meta_lgb[val] += y_pred
        y_pred_final_lgb += model.predict(Xtest, num_iteration=model.best_iteration)
        
        score = log_loss(val_y_oh, y_pred)
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_lgb = y_pred_meta_lgb / float(NUM_SEED)
y_pred_final_lgb = y_pred_final_lgb / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04618	valid_1's multi_logloss: 1.09825
[200]	training's multi_logloss: 0.996997	valid_1's multi_logloss: 1.09481
[300]	training's multi_logloss: 0.956169	valid_1's multi_logloss: 1.09468
[400]	training's multi_logloss: 0.920254	valid_1's multi_logloss: 1.09558
Early stopping, best iteration is:
[257]	training's multi_logloss: 0.972973	valid_1's multi_logloss: 1.09445
Seed-85 | Fold-0 | OOF Score: 1.0944513072425188


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04666	valid_1's multi_logloss: 1.09563
[200]	training's multi_logloss: 0.997421	valid_1's multi_logloss: 1.0912
[300]	training's multi_logloss: 0.956704	valid_1's multi_logloss: 1.0908
[400]	training's multi_logloss: 0.920978	valid_1's multi_logloss: 1.09093
Early stopping, best iteration is:
[255]	training's multi_logloss: 0.974265	valid_1's multi_logloss: 1.09055
Seed-85 | Fold-1 | OOF Score: 1.09054889357444


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04664	valid_1's multi_logloss: 1.09669
[200]	training's multi_logloss: 0.997604	valid_1's multi_logloss: 1.0931
[300]	training's multi_logloss: 0.957135	valid_1's multi_logloss: 1.09264
[400]	training's multi_logloss: 0.921328	valid_1's multi_logloss: 1.09359
Early stopping, best iteration is:
[296]	training's multi_logloss: 0.958592	valid_1's multi_logloss: 1.09256
Seed-85 | Fold-2 | OOF Score: 1.0925596089832446


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0465	valid_1's multi_logloss: 1.09733
[200]	training's multi_logloss: 0.997384	valid_1's multi_logloss: 1.09347
[300]	training's multi_logloss: 0.956649	valid_1's multi_logloss: 1.09315
[400]	training's multi_logloss: 0.921107	valid_1's multi_logloss: 1.09328
Early stopping, best iteration is:
[275]	training's multi_logloss: 0.966179	valid_1's multi_logloss: 1.09266
Seed-85 | Fold-3 | OOF Score: 1.0926623101859543


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0466	valid_1's multi_logloss: 1.09793
[200]	training's multi_logloss: 0.997388	valid_1's multi_logloss: 1.09418
[300]	training's multi_logloss: 0.956566	valid_1's multi_logloss: 1.09383
[400]	training's multi_logloss: 0.920821	valid_1's multi_logloss: 1.09446
Early stopping, best iteration is:
[285]	training's multi_logloss: 0.962415	valid_1's multi_logloss: 1.09377
Seed-85 | Fold-4 | OOF Score: 1.0937666642314794


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04626	valid_1's multi_logloss: 1.09953
[200]	training's multi_logloss: 0.996882	valid_1's multi_logloss: 1.09622
[300]	training's multi_logloss: 0.956393	valid_1's multi_logloss: 1.09644
[400]	training's multi_logloss: 0.920731	valid_1's multi_logloss: 1.09667
Early stopping, best iteration is:
[248]	training's multi_logloss: 0.976607	valid_1's multi_logloss: 1.09598
Seed-85 | Fold-5 | OOF Score: 1.0959771397663083


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04649	valid_1's multi_logloss: 1.09693
[200]	training's multi_logloss: 0.997374	valid_1's multi_logloss: 1.09282
[300]	training's multi_logloss: 0.956854	valid_1's multi_logloss: 1.09202
[400]	training's multi_logloss: 0.921092	valid_1's multi_logloss: 1.09219
[500]	training's multi_logloss: 0.888595	valid_1's multi_logloss: 1.09271
Early stopping, best iteration is:
[311]	training's multi_logloss: 0.952727	valid_1's multi_logloss: 1.09191
Seed-85 | Fold-6 | OOF Score: 1.0919092770719288


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04654	valid_1's multi_logloss: 1.09941
[200]	training's multi_logloss: 0.997621	valid_1's multi_logloss: 1.09598
[300]	training's multi_logloss: 0.957195	valid_1's multi_logloss: 1.09589
[400]	training's multi_logloss: 0.921263	valid_1's multi_logloss: 1.09661
Early stopping, best iteration is:
[270]	training's multi_logloss: 0.968664	valid_1's multi_logloss: 1.09566
Seed-85 | Fold-7 | OOF Score: 1.09565574199863


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04655	valid_1's multi_logloss: 1.09855
[200]	training's multi_logloss: 0.997419	valid_1's multi_logloss: 1.09407
[300]	training's multi_logloss: 0.95679	valid_1's multi_logloss: 1.09346
[400]	training's multi_logloss: 0.920981	valid_1's multi_logloss: 1.09398
[500]	training's multi_logloss: 0.888377	valid_1's multi_logloss: 1.09454
Early stopping, best iteration is:
[327]	training's multi_logloss: 0.946656	valid_1's multi_logloss: 1.09332
Seed-85 | Fold-8 | OOF Score: 1.0933173123112785


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04653	valid_1's multi_logloss: 1.09693
[200]	training's multi_logloss: 0.997418	valid_1's multi_logloss: 1.09362
[300]	training's multi_logloss: 0.956859	valid_1's multi_logloss: 1.0936
[400]	training's multi_logloss: 0.921213	valid_1's multi_logloss: 1.09422
Early stopping, best iteration is:
[242]	training's multi_logloss: 0.979562	valid_1's multi_logloss: 1.09316
Seed-85 | Fold-9 | OOF Score: 1.093156928416168

Seed: 85 | Aggregate OOF Score: 1.093400518378195




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04673	valid_1's multi_logloss: 1.09726
[200]	training's multi_logloss: 0.997496	valid_1's multi_logloss: 1.09285
[300]	training's multi_logloss: 0.956907	valid_1's multi_logloss: 1.092
[400]	training's multi_logloss: 0.921451	valid_1's multi_logloss: 1.09272
Early stopping, best iteration is:
[299]	training's multi_logloss: 0.957285	valid_1's multi_logloss: 1.092
Seed-57 | Fold-0 | OOF Score: 1.091999867678061


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04686	valid_1's multi_logloss: 1.09525
[200]	training's multi_logloss: 0.997917	valid_1's multi_logloss: 1.08982
[300]	training's multi_logloss: 0.957403	valid_1's multi_logloss: 1.08917
[400]	training's multi_logloss: 0.921739	valid_1's multi_logloss: 1.08965
[500]	training's multi_logloss: 0.889019	valid_1's multi_logloss: 1.09033
Early stopping, best iteration is:
[331]	training's multi_logloss: 0.945999	valid_1's multi_logloss: 1.08903
Seed-57 | Fold-1 | OOF Score: 1.0890310768729328


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04654	valid_1's multi_logloss: 1.09746
[200]	training's multi_logloss: 0.997502	valid_1's multi_logloss: 1.094
[300]	training's multi_logloss: 0.957042	valid_1's multi_logloss: 1.09439
[400]	training's multi_logloss: 0.921264	valid_1's multi_logloss: 1.09503
Early stopping, best iteration is:
[222]	training's multi_logloss: 0.988026	valid_1's multi_logloss: 1.09388
Seed-57 | Fold-2 | OOF Score: 1.093883293655829


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04683	valid_1's multi_logloss: 1.09642
[200]	training's multi_logloss: 0.997716	valid_1's multi_logloss: 1.09209
[300]	training's multi_logloss: 0.956955	valid_1's multi_logloss: 1.09147
[400]	training's multi_logloss: 0.92111	valid_1's multi_logloss: 1.09207
Early stopping, best iteration is:
[281]	training's multi_logloss: 0.964345	valid_1's multi_logloss: 1.09129
Seed-57 | Fold-3 | OOF Score: 1.0912926820151678


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04635	valid_1's multi_logloss: 1.09923
[200]	training's multi_logloss: 0.997131	valid_1's multi_logloss: 1.09576
[300]	training's multi_logloss: 0.95635	valid_1's multi_logloss: 1.09558
[400]	training's multi_logloss: 0.920366	valid_1's multi_logloss: 1.09637
Early stopping, best iteration is:
[228]	training's multi_logloss: 0.985177	valid_1's multi_logloss: 1.09533
Seed-57 | Fold-4 | OOF Score: 1.0953259143685725


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04629	valid_1's multi_logloss: 1.09864
[200]	training's multi_logloss: 0.997134	valid_1's multi_logloss: 1.09483
[300]	training's multi_logloss: 0.956617	valid_1's multi_logloss: 1.09414
[400]	training's multi_logloss: 0.920896	valid_1's multi_logloss: 1.09492
Early stopping, best iteration is:
[271]	training's multi_logloss: 0.967737	valid_1's multi_logloss: 1.09409
Seed-57 | Fold-5 | OOF Score: 1.094090391858606


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04678	valid_1's multi_logloss: 1.09566
[200]	training's multi_logloss: 0.997775	valid_1's multi_logloss: 1.09112
[300]	training's multi_logloss: 0.957417	valid_1's multi_logloss: 1.0899
[400]	training's multi_logloss: 0.921963	valid_1's multi_logloss: 1.0899
[500]	training's multi_logloss: 0.88913	valid_1's multi_logloss: 1.09026
Early stopping, best iteration is:
[334]	training's multi_logloss: 0.945012	valid_1's multi_logloss: 1.08959
Seed-57 | Fold-6 | OOF Score: 1.0895889930800537


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04653	valid_1's multi_logloss: 1.09894
[200]	training's multi_logloss: 0.997281	valid_1's multi_logloss: 1.09443
[300]	training's multi_logloss: 0.956938	valid_1's multi_logloss: 1.09391
[400]	training's multi_logloss: 0.921128	valid_1's multi_logloss: 1.09407
Early stopping, best iteration is:
[265]	training's multi_logloss: 0.970407	valid_1's multi_logloss: 1.09367
Seed-57 | Fold-7 | OOF Score: 1.0936659115616723


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04612	valid_1's multi_logloss: 1.0999
[200]	training's multi_logloss: 0.996883	valid_1's multi_logloss: 1.09698
[300]	training's multi_logloss: 0.956434	valid_1's multi_logloss: 1.09694
[400]	training's multi_logloss: 0.920542	valid_1's multi_logloss: 1.09761
Early stopping, best iteration is:
[257]	training's multi_logloss: 0.973075	valid_1's multi_logloss: 1.09668
Seed-57 | Fold-8 | OOF Score: 1.0966826470700368


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04642	valid_1's multi_logloss: 1.09784
[200]	training's multi_logloss: 0.997371	valid_1's multi_logloss: 1.09421
[300]	training's multi_logloss: 0.956778	valid_1's multi_logloss: 1.09426
[400]	training's multi_logloss: 0.921009	valid_1's multi_logloss: 1.09476
Early stopping, best iteration is:
[268]	training's multi_logloss: 0.969045	valid_1's multi_logloss: 1.09393
Seed-57 | Fold-9 | OOF Score: 1.093932326549297

Seed: 57 | Aggregate OOF Score: 1.0929493104710228




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04616	valid_1's multi_logloss: 1.09855
[200]	training's multi_logloss: 0.997122	valid_1's multi_logloss: 1.09516
[300]	training's multi_logloss: 0.956523	valid_1's multi_logloss: 1.09537
[400]	training's multi_logloss: 0.920968	valid_1's multi_logloss: 1.09583
Early stopping, best iteration is:
[230]	training's multi_logloss: 0.984201	valid_1's multi_logloss: 1.09489
Seed-0 | Fold-0 | OOF Score: 1.0948864380331043


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0466	valid_1's multi_logloss: 1.09666
[200]	training's multi_logloss: 0.997252	valid_1's multi_logloss: 1.09215
[300]	training's multi_logloss: 0.95649	valid_1's multi_logloss: 1.09173
[400]	training's multi_logloss: 0.920886	valid_1's multi_logloss: 1.09214
Early stopping, best iteration is:
[240]	training's multi_logloss: 0.980127	valid_1's multi_logloss: 1.0914
Seed-0 | Fold-1 | OOF Score: 1.091398887532671


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04624	valid_1's multi_logloss: 1.09919
[200]	training's multi_logloss: 0.997016	valid_1's multi_logloss: 1.09554
[300]	training's multi_logloss: 0.95651	valid_1's multi_logloss: 1.09487
[400]	training's multi_logloss: 0.920813	valid_1's multi_logloss: 1.09532
Early stopping, best iteration is:
[299]	training's multi_logloss: 0.956855	valid_1's multi_logloss: 1.09484
Seed-0 | Fold-2 | OOF Score: 1.0948434428795089


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04675	valid_1's multi_logloss: 1.09701
[200]	training's multi_logloss: 0.997393	valid_1's multi_logloss: 1.09325
[300]	training's multi_logloss: 0.957022	valid_1's multi_logloss: 1.09295
[400]	training's multi_logloss: 0.921272	valid_1's multi_logloss: 1.0935
Early stopping, best iteration is:
[232]	training's multi_logloss: 0.983678	valid_1's multi_logloss: 1.09281
Seed-0 | Fold-3 | OOF Score: 1.0928111572252335


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04687	valid_1's multi_logloss: 1.09448
[200]	training's multi_logloss: 0.997725	valid_1's multi_logloss: 1.08975
[300]	training's multi_logloss: 0.957252	valid_1's multi_logloss: 1.08927
[400]	training's multi_logloss: 0.921779	valid_1's multi_logloss: 1.08988
Early stopping, best iteration is:
[258]	training's multi_logloss: 0.973394	valid_1's multi_logloss: 1.08911
Seed-0 | Fold-4 | OOF Score: 1.0891141820437666


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04675	valid_1's multi_logloss: 1.09779
[200]	training's multi_logloss: 0.997659	valid_1's multi_logloss: 1.09322
[300]	training's multi_logloss: 0.956852	valid_1's multi_logloss: 1.09252
[400]	training's multi_logloss: 0.921345	valid_1's multi_logloss: 1.0929
Early stopping, best iteration is:
[282]	training's multi_logloss: 0.963753	valid_1's multi_logloss: 1.09237
Seed-0 | Fold-5 | OOF Score: 1.0923673247958297


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04655	valid_1's multi_logloss: 1.0982
[200]	training's multi_logloss: 0.997579	valid_1's multi_logloss: 1.0941
[300]	training's multi_logloss: 0.957091	valid_1's multi_logloss: 1.0934
[400]	training's multi_logloss: 0.921246	valid_1's multi_logloss: 1.09403
Early stopping, best iteration is:
[283]	training's multi_logloss: 0.963631	valid_1's multi_logloss: 1.09336
Seed-0 | Fold-6 | OOF Score: 1.0933597050260027


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04654	valid_1's multi_logloss: 1.09824
[200]	training's multi_logloss: 0.997277	valid_1's multi_logloss: 1.09434
[300]	training's multi_logloss: 0.956741	valid_1's multi_logloss: 1.09418
[400]	training's multi_logloss: 0.921051	valid_1's multi_logloss: 1.09472
Early stopping, best iteration is:
[275]	training's multi_logloss: 0.96632	valid_1's multi_logloss: 1.09391
Seed-0 | Fold-7 | OOF Score: 1.0939132531487494


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04624	valid_1's multi_logloss: 1.09695
[200]	training's multi_logloss: 0.996972	valid_1's multi_logloss: 1.09333
[300]	training's multi_logloss: 0.956514	valid_1's multi_logloss: 1.09331
[400]	training's multi_logloss: 0.920919	valid_1's multi_logloss: 1.09327
Early stopping, best iteration is:
[250]	training's multi_logloss: 0.9759	valid_1's multi_logloss: 1.09296
Seed-0 | Fold-8 | OOF Score: 1.0929561151445348


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.04624	valid_1's multi_logloss: 1.09946
[200]	training's multi_logloss: 0.996996	valid_1's multi_logloss: 1.09605
[300]	training's multi_logloss: 0.956425	valid_1's multi_logloss: 1.09632
[400]	training's multi_logloss: 0.920959	valid_1's multi_logloss: 1.09709
Early stopping, best iteration is:
[221]	training's multi_logloss: 0.98788	valid_1's multi_logloss: 1.09589
Seed-0 | Fold-9 | OOF Score: 1.095892070366559

Seed: 0 | Aggregate OOF Score: 1.0931542576195958


Aggregate OOF Score: 1.0931680288229377


In [7]:
np.savez_compressed('./LGB_Meta_Features.npz',
                    y_pred_meta_lgb=y_pred_meta_lgb, 
                    oof_score=oof_score,
                    y_pred_final_lgb=y_pred_final_lgb)

## Create submission file

In [8]:
y_pred_final_lgb = np.clip(y_pred_final_lgb, p_min, p_max)

test_df = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")
submit_df = pd.DataFrame()
submit_df['id'] = test_df['id']
submit_df['Class_1'] = y_pred_final_lgb[:,0]
submit_df['Class_2'] = y_pred_final_lgb[:,1]
submit_df['Class_3'] = y_pred_final_lgb[:,2]
submit_df['Class_4'] = y_pred_final_lgb[:,3]
submit_df.head()

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4
0,100000,0.089598,0.592365,0.188934,0.129103
1,100001,0.065664,0.660605,0.171028,0.102703
2,100002,0.064476,0.704215,0.146463,0.084847
3,100003,0.080247,0.485205,0.346802,0.087746
4,100004,0.069971,0.654408,0.181412,0.094209


In [9]:
submit_df.to_csv("./LGB_submission.csv", index=False)