## Import libraries

In [1]:
import gc
import pickle
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

## Prepare data for model training

In [2]:
with open("../input/tps-may-data-preprocess-v1-1/TPS_May_Dataset.txt", 'rb') as handle: 
    data = handle.read()

processed_data = pickle.loads(data)
train_df = processed_data['train_df']
test_df = processed_data['test_df']

del processed_data
gc.collect()

0

In [3]:
cat_cols = train_df.iloc[:,0:50].columns
train_df[cat_cols] = train_df[cat_cols].astype(int)
test_df[cat_cols] = test_df[cat_cols].astype(int)
cat_cols_indices = [train_df.columns.get_loc(col) for col in cat_cols]
print(cat_cols_indices)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]


In [4]:
Xtrain = train_df.loc[:, train_df.columns != 'target'].values
Ytrain = train_df['target'].values
Ytrain_oh = pd.get_dummies(train_df['target']).values
Xtest = test_df.values

print("Xtrain: {} \nYtrain: {} \nYtrain_oh: {} \nXtest: {}".format(Xtrain.shape, Ytrain.shape, 
                                                                   Ytrain_oh.shape, Xtest.shape))

del train_df
del test_df
gc.collect()

Xtrain: (99918, 951) 
Ytrain: (99918,) 
Ytrain_oh: (99918, 4) 
Xtest: (50000, 951)


20

## Build and validate the model

In [5]:
params = {}
params["objective"] = 'multiclass'
params['metric'] = 'multi_logloss'
params['boosting'] = 'gbdt'
#params["device_type"] = 'gpu'
params['num_class'] = 4
params['is_unbalance'] = True
params["learning_rate"] = 0.012
params["lambda_l2"] = 0.0256
params["num_leaves"] = 52
#params["max_depth"] = 10
params["feature_fraction"] = 0.503
params["bagging_fraction"] = 0.6741
params["bagging_freq"] = 8
params["bagging_seed"] = 10
params["min_data_in_leaf"] = 10
params["verbosity"] = -1
num_rounds = 5000

In [6]:
FOLD = 10
NUM_SEED = 3

# Prediction Clipping Thresholds
p_min = 0.025
p_max = 1 - p_min

np.random.seed(2021)
seeds = np.random.randint(0, 100, size=NUM_SEED)

oof_score = 0
y_pred_meta_lgb = np.zeros((Ytrain.shape[0], 4))
y_pred_final_lgb = np.zeros((Xtest.shape[0], 4))
counter = 0


for sidx, seed in enumerate(seeds):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain, Ytrain)):
        counter += 1

        train_x, train_y, train_y_oh = Xtrain[train], Ytrain[train], Ytrain_oh[train]
        val_x, val_y, val_y_oh = Xtrain[val], Ytrain[val], Ytrain_oh[val]

        lgtrain = lgb.Dataset(train_x, label=train_y.ravel())
        lgvalidation = lgb.Dataset(val_x, label=val_y.ravel())

        model = lgb.train(params, lgtrain, num_rounds, 
                          valid_sets=[lgtrain, lgvalidation], 
                          categorical_feature=cat_cols_indices,
                          early_stopping_rounds=200, verbose_eval=100)

        y_pred = model.predict(val_x, num_iteration=model.best_iteration)
        y_pred = np.clip(y_pred, p_min, p_max)
        y_pred_meta_lgb[val] += y_pred
        y_pred_final_lgb += model.predict(Xtest, num_iteration=model.best_iteration)
        
        score = log_loss(val_y_oh, y_pred)
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_lgb = y_pred_meta_lgb / float(NUM_SEED)
y_pred_final_lgb = y_pred_final_lgb / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08323	valid_1's multi_logloss: 1.10307
[200]	training's multi_logloss: 1.05934	valid_1's multi_logloss: 1.09738
[300]	training's multi_logloss: 1.04051	valid_1's multi_logloss: 1.09546
[400]	training's multi_logloss: 1.02458	valid_1's multi_logloss: 1.09515
[500]	training's multi_logloss: 1.0107	valid_1's multi_logloss: 1.09577
Early stopping, best iteration is:
[355]	training's multi_logloss: 1.03146	valid_1's multi_logloss: 1.09496
Seed-85 | Fold-0 | OOF Score: 1.094957392468202


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08367	valid_1's multi_logloss: 1.10066
[200]	training's multi_logloss: 1.05995	valid_1's multi_logloss: 1.09359
[300]	training's multi_logloss: 1.04133	valid_1's multi_logloss: 1.09077
[400]	training's multi_logloss: 1.02541	valid_1's multi_logloss: 1.08987
[500]	training's multi_logloss: 1.01153	valid_1's multi_logloss: 1.0903
Early stopping, best iteration is:
[394]	training's multi_logloss: 1.02632	valid_1's multi_logloss: 1.08981
Seed-85 | Fold-1 | OOF Score: 1.0898094352718828


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08349	valid_1's multi_logloss: 1.10151
[200]	training's multi_logloss: 1.0596	valid_1's multi_logloss: 1.09547
[300]	training's multi_logloss: 1.04078	valid_1's multi_logloss: 1.09298
[400]	training's multi_logloss: 1.02495	valid_1's multi_logloss: 1.09279
[500]	training's multi_logloss: 1.01105	valid_1's multi_logloss: 1.09319
Early stopping, best iteration is:
[358]	training's multi_logloss: 1.03124	valid_1's multi_logloss: 1.09269
Seed-85 | Fold-2 | OOF Score: 1.092692155358279


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0834	valid_1's multi_logloss: 1.10161
[200]	training's multi_logloss: 1.05958	valid_1's multi_logloss: 1.09545
[300]	training's multi_logloss: 1.0407	valid_1's multi_logloss: 1.09335
[400]	training's multi_logloss: 1.02477	valid_1's multi_logloss: 1.09301
[500]	training's multi_logloss: 1.01083	valid_1's multi_logloss: 1.09377
Early stopping, best iteration is:
[358]	training's multi_logloss: 1.03118	valid_1's multi_logloss: 1.09296
Seed-85 | Fold-3 | OOF Score: 1.0929551891128544


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08342	valid_1's multi_logloss: 1.10216
[200]	training's multi_logloss: 1.0595	valid_1's multi_logloss: 1.09627
[300]	training's multi_logloss: 1.04058	valid_1's multi_logloss: 1.09407
[400]	training's multi_logloss: 1.0247	valid_1's multi_logloss: 1.09376
[500]	training's multi_logloss: 1.0108	valid_1's multi_logloss: 1.0941
Early stopping, best iteration is:
[370]	training's multi_logloss: 1.02916	valid_1's multi_logloss: 1.09362
Seed-85 | Fold-4 | OOF Score: 1.0936173864972019


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08316	valid_1's multi_logloss: 1.10311
[200]	training's multi_logloss: 1.05924	valid_1's multi_logloss: 1.09766
[300]	training's multi_logloss: 1.04046	valid_1's multi_logloss: 1.09584
[400]	training's multi_logloss: 1.02453	valid_1's multi_logloss: 1.09581
[500]	training's multi_logloss: 1.01062	valid_1's multi_logloss: 1.09647
Early stopping, best iteration is:
[350]	training's multi_logloss: 1.0322	valid_1's multi_logloss: 1.09557
Seed-85 | Fold-5 | OOF Score: 1.0955747032648302


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08341	valid_1's multi_logloss: 1.10227
[200]	training's multi_logloss: 1.05958	valid_1's multi_logloss: 1.09553
[300]	training's multi_logloss: 1.04072	valid_1's multi_logloss: 1.09263
[400]	training's multi_logloss: 1.02481	valid_1's multi_logloss: 1.09209
[500]	training's multi_logloss: 1.01098	valid_1's multi_logloss: 1.09246
[600]	training's multi_logloss: 0.99829	valid_1's multi_logloss: 1.09297
Early stopping, best iteration is:
[400]	training's multi_logloss: 1.02481	valid_1's multi_logloss: 1.09209
Seed-85 | Fold-6 | OOF Score: 1.0920890145666367


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08327	valid_1's multi_logloss: 1.10284
[200]	training's multi_logloss: 1.05946	valid_1's multi_logloss: 1.097
[300]	training's multi_logloss: 1.04071	valid_1's multi_logloss: 1.09459
[400]	training's multi_logloss: 1.02486	valid_1's multi_logloss: 1.09373
[500]	training's multi_logloss: 1.01097	valid_1's multi_logloss: 1.09405
[600]	training's multi_logloss: 0.998084	valid_1's multi_logloss: 1.09458
Early stopping, best iteration is:
[416]	training's multi_logloss: 1.0225	valid_1's multi_logloss: 1.09361
Seed-85 | Fold-7 | OOF Score: 1.093613661349027


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08353	valid_1's multi_logloss: 1.10271
[200]	training's multi_logloss: 1.05977	valid_1's multi_logloss: 1.09611
[300]	training's multi_logloss: 1.041	valid_1's multi_logloss: 1.0933
[400]	training's multi_logloss: 1.02512	valid_1's multi_logloss: 1.0926
[500]	training's multi_logloss: 1.01118	valid_1's multi_logloss: 1.09282
Early stopping, best iteration is:
[394]	training's multi_logloss: 1.02599	valid_1's multi_logloss: 1.09254
Seed-85 | Fold-8 | OOF Score: 1.0925373360124555


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08344	valid_1's multi_logloss: 1.10163
[200]	training's multi_logloss: 1.05971	valid_1's multi_logloss: 1.09538
[300]	training's multi_logloss: 1.04103	valid_1's multi_logloss: 1.09299
[400]	training's multi_logloss: 1.02515	valid_1's multi_logloss: 1.09266
[500]	training's multi_logloss: 1.01107	valid_1's multi_logloss: 1.09298
Early stopping, best iteration is:
[361]	training's multi_logloss: 1.03111	valid_1's multi_logloss: 1.09254
Seed-85 | Fold-9 | OOF Score: 1.0925367920368207

Seed: 85 | Aggregate OOF Score: 1.0930383065938192




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08336	valid_1's multi_logloss: 1.10159
[200]	training's multi_logloss: 1.05969	valid_1's multi_logloss: 1.09512
[300]	training's multi_logloss: 1.04081	valid_1's multi_logloss: 1.09231
[400]	training's multi_logloss: 1.02497	valid_1's multi_logloss: 1.09127
[500]	training's multi_logloss: 1.0111	valid_1's multi_logloss: 1.09137
[600]	training's multi_logloss: 0.998129	valid_1's multi_logloss: 1.09202
Early stopping, best iteration is:
[437]	training's multi_logloss: 1.01965	valid_1's multi_logloss: 1.09105
Seed-57 | Fold-0 | OOF Score: 1.0910548182316049


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08353	valid_1's multi_logloss: 1.10161
[200]	training's multi_logloss: 1.05988	valid_1's multi_logloss: 1.09484
[300]	training's multi_logloss: 1.04117	valid_1's multi_logloss: 1.09265
[400]	training's multi_logloss: 1.02529	valid_1's multi_logloss: 1.09218
[500]	training's multi_logloss: 1.01133	valid_1's multi_logloss: 1.0926
Early stopping, best iteration is:
[391]	training's multi_logloss: 1.02661	valid_1's multi_logloss: 1.0921
Seed-57 | Fold-1 | OOF Score: 1.0921018585559064


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08326	valid_1's multi_logloss: 1.10224
[200]	training's multi_logloss: 1.05939	valid_1's multi_logloss: 1.09618
[300]	training's multi_logloss: 1.04062	valid_1's multi_logloss: 1.094
[400]	training's multi_logloss: 1.02469	valid_1's multi_logloss: 1.09385
[500]	training's multi_logloss: 1.01067	valid_1's multi_logloss: 1.09432
Early stopping, best iteration is:
[386]	training's multi_logloss: 1.02678	valid_1's multi_logloss: 1.09375
Seed-57 | Fold-2 | OOF Score: 1.0937481495432937


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0836	valid_1's multi_logloss: 1.10123
[200]	training's multi_logloss: 1.05971	valid_1's multi_logloss: 1.09469
[300]	training's multi_logloss: 1.041	valid_1's multi_logloss: 1.09252
[400]	training's multi_logloss: 1.02513	valid_1's multi_logloss: 1.09199
[500]	training's multi_logloss: 1.0111	valid_1's multi_logloss: 1.09243
[600]	training's multi_logloss: 0.998328	valid_1's multi_logloss: 1.09323
Early stopping, best iteration is:
[424]	training's multi_logloss: 1.02161	valid_1's multi_logloss: 1.09183
Seed-57 | Fold-3 | OOF Score: 1.0918324819784635


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08327	valid_1's multi_logloss: 1.10418
[200]	training's multi_logloss: 1.05943	valid_1's multi_logloss: 1.0987
[300]	training's multi_logloss: 1.04054	valid_1's multi_logloss: 1.097
[400]	training's multi_logloss: 1.02463	valid_1's multi_logloss: 1.09646
[500]	training's multi_logloss: 1.01055	valid_1's multi_logloss: 1.09734
Early stopping, best iteration is:
[378]	training's multi_logloss: 1.0279	valid_1's multi_logloss: 1.09638
Seed-57 | Fold-4 | OOF Score: 1.096377678101434


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08317	valid_1's multi_logloss: 1.10302
[200]	training's multi_logloss: 1.0594	valid_1's multi_logloss: 1.09717
[300]	training's multi_logloss: 1.04067	valid_1's multi_logloss: 1.09535
[400]	training's multi_logloss: 1.0247	valid_1's multi_logloss: 1.09519
[500]	training's multi_logloss: 1.01086	valid_1's multi_logloss: 1.0956
Early stopping, best iteration is:
[371]	training's multi_logloss: 1.02902	valid_1's multi_logloss: 1.09491
Seed-57 | Fold-5 | OOF Score: 1.0949050619344467


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08373	valid_1's multi_logloss: 1.10103
[200]	training's multi_logloss: 1.0599	valid_1's multi_logloss: 1.09404
[300]	training's multi_logloss: 1.04111	valid_1's multi_logloss: 1.09107
[400]	training's multi_logloss: 1.02535	valid_1's multi_logloss: 1.09011
[500]	training's multi_logloss: 1.01129	valid_1's multi_logloss: 1.09016
[600]	training's multi_logloss: 0.998545	valid_1's multi_logloss: 1.09096
Early stopping, best iteration is:
[431]	training's multi_logloss: 1.02086	valid_1's multi_logloss: 1.08996
Seed-57 | Fold-6 | OOF Score: 1.0899647213572548


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08338	valid_1's multi_logloss: 1.10262
[200]	training's multi_logloss: 1.05948	valid_1's multi_logloss: 1.09687
[300]	training's multi_logloss: 1.04067	valid_1's multi_logloss: 1.09469
[400]	training's multi_logloss: 1.02489	valid_1's multi_logloss: 1.09383
[500]	training's multi_logloss: 1.01102	valid_1's multi_logloss: 1.09397
[600]	training's multi_logloss: 0.998171	valid_1's multi_logloss: 1.09502
Early stopping, best iteration is:
[457]	training's multi_logloss: 1.01677	valid_1's multi_logloss: 1.09375
Seed-57 | Fold-7 | OOF Score: 1.0937493790770676


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08335	valid_1's multi_logloss: 1.10309
[200]	training's multi_logloss: 1.05942	valid_1's multi_logloss: 1.09712
[300]	training's multi_logloss: 1.04047	valid_1's multi_logloss: 1.09518
[400]	training's multi_logloss: 1.02456	valid_1's multi_logloss: 1.09497
[500]	training's multi_logloss: 1.01072	valid_1's multi_logloss: 1.09567
Early stopping, best iteration is:
[372]	training's multi_logloss: 1.02881	valid_1's multi_logloss: 1.09479
Seed-57 | Fold-8 | OOF Score: 1.0947865328343196


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08335	valid_1's multi_logloss: 1.10222
[200]	training's multi_logloss: 1.05944	valid_1's multi_logloss: 1.09631
[300]	training's multi_logloss: 1.04056	valid_1's multi_logloss: 1.09388
[400]	training's multi_logloss: 1.02463	valid_1's multi_logloss: 1.09324
[500]	training's multi_logloss: 1.01059	valid_1's multi_logloss: 1.09348
Early stopping, best iteration is:
[351]	training's multi_logloss: 1.03216	valid_1's multi_logloss: 1.09319
Seed-57 | Fold-9 | OOF Score: 1.0931941279074453

Seed: 57 | Aggregate OOF Score: 1.0931714809521238




New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08339	valid_1's multi_logloss: 1.10352
[200]	training's multi_logloss: 1.05951	valid_1's multi_logloss: 1.09783
[300]	training's multi_logloss: 1.04059	valid_1's multi_logloss: 1.09604
[400]	training's multi_logloss: 1.02477	valid_1's multi_logloss: 1.09572
[500]	training's multi_logloss: 1.01096	valid_1's multi_logloss: 1.09639
Early stopping, best iteration is:
[392]	training's multi_logloss: 1.02597	valid_1's multi_logloss: 1.09568
Seed-0 | Fold-0 | OOF Score: 1.0956824270775354


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08341	valid_1's multi_logloss: 1.10143
[200]	training's multi_logloss: 1.05957	valid_1's multi_logloss: 1.09508
[300]	training's multi_logloss: 1.04076	valid_1's multi_logloss: 1.09231
[400]	training's multi_logloss: 1.02492	valid_1's multi_logloss: 1.09197
[500]	training's multi_logloss: 1.01099	valid_1's multi_logloss: 1.09225
Early stopping, best iteration is:
[380]	training's multi_logloss: 1.02789	valid_1's multi_logloss: 1.09191
Seed-0 | Fold-1 | OOF Score: 1.091912414688964


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08324	valid_1's multi_logloss: 1.10303
[200]	training's multi_logloss: 1.05938	valid_1's multi_logloss: 1.09704
[300]	training's multi_logloss: 1.04049	valid_1's multi_logloss: 1.09462
[400]	training's multi_logloss: 1.02475	valid_1's multi_logloss: 1.09418
[500]	training's multi_logloss: 1.01078	valid_1's multi_logloss: 1.09452
[600]	training's multi_logloss: 0.998034	valid_1's multi_logloss: 1.09538
Early stopping, best iteration is:
[405]	training's multi_logloss: 1.02401	valid_1's multi_logloss: 1.09414
Seed-0 | Fold-2 | OOF Score: 1.0941413902232886


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0836	valid_1's multi_logloss: 1.10167
[200]	training's multi_logloss: 1.05982	valid_1's multi_logloss: 1.09544
[300]	training's multi_logloss: 1.04104	valid_1's multi_logloss: 1.09262
[400]	training's multi_logloss: 1.02527	valid_1's multi_logloss: 1.09185
[500]	training's multi_logloss: 1.01142	valid_1's multi_logloss: 1.09244
[600]	training's multi_logloss: 0.998535	valid_1's multi_logloss: 1.09332
Early stopping, best iteration is:
[400]	training's multi_logloss: 1.02527	valid_1's multi_logloss: 1.09185
Seed-0 | Fold-3 | OOF Score: 1.0918546002947083


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08374	valid_1's multi_logloss: 1.10045
[200]	training's multi_logloss: 1.06002	valid_1's multi_logloss: 1.09316
[300]	training's multi_logloss: 1.04125	valid_1's multi_logloss: 1.08996
[400]	training's multi_logloss: 1.02543	valid_1's multi_logloss: 1.08915
[500]	training's multi_logloss: 1.01153	valid_1's multi_logloss: 1.08946
[600]	training's multi_logloss: 0.998661	valid_1's multi_logloss: 1.09042
Early stopping, best iteration is:
[438]	training's multi_logloss: 1.01999	valid_1's multi_logloss: 1.08909
Seed-0 | Fold-4 | OOF Score: 1.0890860089289418


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08351	valid_1's multi_logloss: 1.10156
[200]	training's multi_logloss: 1.05975	valid_1's multi_logloss: 1.09505
[300]	training's multi_logloss: 1.04087	valid_1's multi_logloss: 1.0926
[400]	training's multi_logloss: 1.025	valid_1's multi_logloss: 1.09189
[500]	training's multi_logloss: 1.0111	valid_1's multi_logloss: 1.09213
Early stopping, best iteration is:
[392]	training's multi_logloss: 1.02615	valid_1's multi_logloss: 1.09183
Seed-0 | Fold-5 | OOF Score: 1.0918251374647447


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0834	valid_1's multi_logloss: 1.10241
[200]	training's multi_logloss: 1.05955	valid_1's multi_logloss: 1.09628
[300]	training's multi_logloss: 1.04086	valid_1's multi_logloss: 1.09353
[400]	training's multi_logloss: 1.02517	valid_1's multi_logloss: 1.09268
[500]	training's multi_logloss: 1.01132	valid_1's multi_logloss: 1.09295
Early stopping, best iteration is:
[392]	training's multi_logloss: 1.02634	valid_1's multi_logloss: 1.09264
Seed-0 | Fold-6 | OOF Score: 1.0926443093938198


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08331	valid_1's multi_logloss: 1.10254
[200]	training's multi_logloss: 1.0595	valid_1's multi_logloss: 1.09647
[300]	training's multi_logloss: 1.04072	valid_1's multi_logloss: 1.09457
[400]	training's multi_logloss: 1.02483	valid_1's multi_logloss: 1.0944
[500]	training's multi_logloss: 1.01087	valid_1's multi_logloss: 1.09511
[600]	training's multi_logloss: 0.997923	valid_1's multi_logloss: 1.0963
Early stopping, best iteration is:
[414]	training's multi_logloss: 1.02273	valid_1's multi_logloss: 1.09434
Seed-0 | Fold-7 | OOF Score: 1.0943361474695368


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08345	valid_1's multi_logloss: 1.10133
[200]	training's multi_logloss: 1.05957	valid_1's multi_logloss: 1.0949
[300]	training's multi_logloss: 1.04071	valid_1's multi_logloss: 1.09274
[400]	training's multi_logloss: 1.025	valid_1's multi_logloss: 1.09228
[500]	training's multi_logloss: 1.01097	valid_1's multi_logloss: 1.09268
[600]	training's multi_logloss: 0.998207	valid_1's multi_logloss: 1.09341
Early stopping, best iteration is:
[408]	training's multi_logloss: 1.02381	valid_1's multi_logloss: 1.09226
Seed-0 | Fold-8 | OOF Score: 1.0922559293621112


New categorical_feature is [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.08313	valid_1's multi_logloss: 1.1036
[200]	training's multi_logloss: 1.05916	valid_1's multi_logloss: 1.09823
[300]	training's multi_logloss: 1.04034	valid_1's multi_logloss: 1.09628
[400]	training's multi_logloss: 1.02442	valid_1's multi_logloss: 1.09636
[500]	training's multi_logloss: 1.01034	valid_1's multi_logloss: 1.09696
Early stopping, best iteration is:
[345]	training's multi_logloss: 1.03286	valid_1's multi_logloss: 1.09605
Seed-0 | Fold-9 | OOF Score: 1.096048605816949

Seed: 0 | Aggregate OOF Score: 1.0929786970720596


Aggregate OOF Score: 1.0930628282060009


In [7]:
np.savez_compressed('./LGB_Meta_Features.npz',
                    y_pred_meta_lgb=y_pred_meta_lgb, 
                    oof_score=oof_score,
                    y_pred_final_lgb=y_pred_final_lgb)

## Create submission file

In [8]:
y_pred_final_lgb = np.clip(y_pred_final_lgb, p_min, p_max)

test_df = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")
submit_df = pd.DataFrame()
submit_df['id'] = test_df['id']
submit_df['Class_1'] = y_pred_final_lgb[:,0]
submit_df['Class_2'] = y_pred_final_lgb[:,1]
submit_df['Class_3'] = y_pred_final_lgb[:,2]
submit_df['Class_4'] = y_pred_final_lgb[:,3]
submit_df.head()

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4
0,100000,0.09523,0.586324,0.19202,0.126425
1,100001,0.069203,0.653195,0.177344,0.100258
2,100002,0.06625,0.685988,0.157051,0.090711
3,100003,0.081788,0.543207,0.289045,0.08596
4,100004,0.06747,0.663538,0.168722,0.100269


In [9]:
submit_df.to_csv("./LGB_submission.csv", index=False)