## Import libraries

In [1]:
import gc
import pickle
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

## Prepare data for model training

In [2]:
with open("../input/tps-may-data-preprocess/TPS_May_Dataset_w_Quantile.txt", 'rb') as handle: 
    data = handle.read()

processed_data = pickle.loads(data)
train_df = processed_data['train_df']
test_df = processed_data['test_df']

del processed_data
gc.collect()

0

In [3]:
cat_cols = ['feature_0','feature_2','feature_5','feature_9','feature_10','feature_11',
            'feature_12','feature_13','feature_17','feature_18','feature_22',
            'feature_29','feature_36','feature_37','feature_44']

train_df[cat_cols] = train_df[cat_cols].astype(int)
test_df[cat_cols] = test_df[cat_cols].astype(int)
cat_cols_indices = [train_df.columns.get_loc(col) for col in cat_cols]
print(cat_cols_indices)

[0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]


In [4]:
Xtrain = train_df.loc[:, train_df.columns != 'target'].values
Ytrain = train_df['target'].values
Ytrain_oh = pd.get_dummies(train_df['target']).values
Xtest = test_df.values

print("Xtrain: {} \nYtrain: {} \nYtrain_oh: {} \nXtest: {}".format(Xtrain.shape, Ytrain.shape, 
                                                                   Ytrain_oh.shape, Xtest.shape))

del train_df
del test_df
gc.collect()

Xtrain: (99918, 951) 
Ytrain: (99918,) 
Ytrain_oh: (99918, 4) 
Xtest: (50000, 951)


20

## Build and validate the model

In [5]:
params = {}
params["objective"] = 'multiclass'
params['metric'] = 'multi_logloss'
params['boosting'] = 'gbdt'
#params["device_type"] = 'gpu'
params['num_class'] = 4
params['is_unbalance'] = True
params["learning_rate"] = 0.02
params["lambda_l2"] = 0.0256
params["num_leaves"] = 52
#params["max_depth"] = 10
params["feature_fraction"] = 0.503
params["bagging_fraction"] = 0.741
params["bagging_freq"] = 12
params["bagging_seed"] = 10
params["min_data_in_leaf"] = 10
params["verbosity"] = -1
num_rounds = 5000

In [6]:
FOLD = 10
NUM_SEED = 3

# Prediction Clipping Thresholds
p_min = 0.025
p_max = 1 - p_min

np.random.seed(3)
seeds = np.random.randint(0, 100, size=NUM_SEED)

oof_score = 0
y_pred_meta_lgb = np.zeros((Ytrain.shape[0], 4))
y_pred_final_lgb = np.zeros((Xtest.shape[0], 4))
counter = 0


for sidx, seed in enumerate(seeds):
    seed_score = 0
    
    kfold = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=seed)

    for idx, (train, val) in enumerate(kfold.split(Xtrain, Ytrain)):
        counter += 1

        train_x, train_y, train_y_oh = Xtrain[train], Ytrain[train], Ytrain_oh[train]
        val_x, val_y, val_y_oh = Xtrain[val], Ytrain[val], Ytrain_oh[val]

        lgtrain = lgb.Dataset(train_x, label=train_y.ravel())
        lgvalidation = lgb.Dataset(val_x, label=val_y.ravel())

        model = lgb.train(params, lgtrain, num_rounds, 
                          valid_sets=[lgtrain, lgvalidation], 
                          categorical_feature=cat_cols_indices,
                          early_stopping_rounds=200, verbose_eval=100)

        y_pred = model.predict(val_x, num_iteration=model.best_iteration)
        y_pred = np.clip(y_pred, p_min, p_max)
        y_pred_meta_lgb[val] += y_pred
        y_pred_final_lgb += model.predict(Xtest, num_iteration=model.best_iteration)
        
        score = log_loss(val_y_oh, y_pred)
        oof_score += score
        seed_score += score
        print("Seed-{} | Fold-{} | OOF Score: {}".format(seed, idx, score))
    
    print("\nSeed: {} | Aggregate OOF Score: {}\n\n".format(seed, (seed_score / FOLD)))


y_pred_meta_lgb = y_pred_meta_lgb / float(NUM_SEED)
y_pred_final_lgb = y_pred_final_lgb / float(counter)
oof_score /= float(counter)
print("Aggregate OOF Score: {}".format(oof_score))

New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07308	valid_1's multi_logloss: 1.09851
[200]	training's multi_logloss: 1.04479	valid_1's multi_logloss: 1.09374
[300]	training's multi_logloss: 1.02227	valid_1's multi_logloss: 1.09248
[400]	training's multi_logloss: 1.00283	valid_1's multi_logloss: 1.09263
[500]	training's multi_logloss: 0.985113	valid_1's multi_logloss: 1.0931
Early stopping, best iteration is:
[306]	training's multi_logloss: 1.02101	valid_1's multi_logloss: 1.09239
Seed-24 | Fold-0 | OOF Score: 1.0923912573461216


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07299	valid_1's multi_logloss: 1.09891
[200]	training's multi_logloss: 1.04471	valid_1's multi_logloss: 1.09334
[300]	training's multi_logloss: 1.02221	valid_1's multi_logloss: 1.09152
[400]	training's multi_logloss: 1.00258	valid_1's multi_logloss: 1.09146
[500]	training's multi_logloss: 0.985229	valid_1's multi_logloss: 1.09151
[600]	training's multi_logloss: 0.968644	valid_1's multi_logloss: 1.09131
[700]	training's multi_logloss: 0.953276	valid_1's multi_logloss: 1.09196
Early stopping, best iteration is:
[546]	training's multi_logloss: 0.977495	valid_1's multi_logloss: 1.09118
Seed-24 | Fold-1 | OOF Score: 1.0911784213439766


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0727	valid_1's multi_logloss: 1.10056
[200]	training's multi_logloss: 1.04427	valid_1's multi_logloss: 1.09657
[300]	training's multi_logloss: 1.0218	valid_1's multi_logloss: 1.09605
[400]	training's multi_logloss: 1.00219	valid_1's multi_logloss: 1.09644
Early stopping, best iteration is:
[297]	training's multi_logloss: 1.02244	valid_1's multi_logloss: 1.09602
Seed-24 | Fold-2 | OOF Score: 1.0960168566564652


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07324	valid_1's multi_logloss: 1.09752
[200]	training's multi_logloss: 1.0451	valid_1's multi_logloss: 1.09202
[300]	training's multi_logloss: 1.02271	valid_1's multi_logloss: 1.08991
[400]	training's multi_logloss: 1.00322	valid_1's multi_logloss: 1.08975
[500]	training's multi_logloss: 0.985602	valid_1's multi_logloss: 1.08959
Early stopping, best iteration is:
[328]	training's multi_logloss: 1.01706	valid_1's multi_logloss: 1.08948
Seed-24 | Fold-3 | OOF Score: 1.0894806373797181


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07294	valid_1's multi_logloss: 1.0996
[200]	training's multi_logloss: 1.04483	valid_1's multi_logloss: 1.0949
[300]	training's multi_logloss: 1.02235	valid_1's multi_logloss: 1.09373
[400]	training's multi_logloss: 1.00289	valid_1's multi_logloss: 1.09392
[500]	training's multi_logloss: 0.985381	valid_1's multi_logloss: 1.09456
Early stopping, best iteration is:
[342]	training's multi_logloss: 1.01391	valid_1's multi_logloss: 1.09357
Seed-24 | Fold-4 | OOF Score: 1.0935693364828991


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07289	valid_1's multi_logloss: 1.10012
[200]	training's multi_logloss: 1.04457	valid_1's multi_logloss: 1.09575
[300]	training's multi_logloss: 1.02216	valid_1's multi_logloss: 1.09436
[400]	training's multi_logloss: 1.00269	valid_1's multi_logloss: 1.09436
[500]	training's multi_logloss: 0.98504	valid_1's multi_logloss: 1.09486
Early stopping, best iteration is:
[389]	training's multi_logloss: 1.00469	valid_1's multi_logloss: 1.09425
Seed-24 | Fold-5 | OOF Score: 1.0942463857715476


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07304	valid_1's multi_logloss: 1.09895
[200]	training's multi_logloss: 1.0448	valid_1's multi_logloss: 1.09358
[300]	training's multi_logloss: 1.02229	valid_1's multi_logloss: 1.0922
[400]	training's multi_logloss: 1.00274	valid_1's multi_logloss: 1.09234
[500]	training's multi_logloss: 0.985014	valid_1's multi_logloss: 1.09249
Early stopping, best iteration is:
[367]	training's multi_logloss: 1.00898	valid_1's multi_logloss: 1.09205
Seed-24 | Fold-6 | OOF Score: 1.0920511302521705


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07282	valid_1's multi_logloss: 1.09943
[200]	training's multi_logloss: 1.04442	valid_1's multi_logloss: 1.09503
[300]	training's multi_logloss: 1.0219	valid_1's multi_logloss: 1.09321
[400]	training's multi_logloss: 1.00238	valid_1's multi_logloss: 1.09331
[500]	training's multi_logloss: 0.984503	valid_1's multi_logloss: 1.09332
Early stopping, best iteration is:
[329]	training's multi_logloss: 1.01595	valid_1's multi_logloss: 1.09298
Seed-24 | Fold-7 | OOF Score: 1.0929840801439932


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07313	valid_1's multi_logloss: 1.09844
[200]	training's multi_logloss: 1.04491	valid_1's multi_logloss: 1.09262
[300]	training's multi_logloss: 1.02251	valid_1's multi_logloss: 1.09091
[400]	training's multi_logloss: 1.00287	valid_1's multi_logloss: 1.09119
[500]	training's multi_logloss: 0.98501	valid_1's multi_logloss: 1.09109
Early stopping, best iteration is:
[302]	training's multi_logloss: 1.02208	valid_1's multi_logloss: 1.09087
Seed-24 | Fold-8 | OOF Score: 1.0908732061816035


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07271	valid_1's multi_logloss: 1.09992
[200]	training's multi_logloss: 1.04424	valid_1's multi_logloss: 1.09542
[300]	training's multi_logloss: 1.02175	valid_1's multi_logloss: 1.0946
[400]	training's multi_logloss: 1.0022	valid_1's multi_logloss: 1.09465
[500]	training's multi_logloss: 0.984427	valid_1's multi_logloss: 1.09528
Early stopping, best iteration is:
[341]	training's multi_logloss: 1.01333	valid_1's multi_logloss: 1.09453
Seed-24 | Fold-9 | OOF Score: 1.0945258599928105

Seed: 24 | Aggregate OOF Score: 1.0927317171551305




New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0729	valid_1's multi_logloss: 1.09842
[200]	training's multi_logloss: 1.04466	valid_1's multi_logloss: 1.09307
[300]	training's multi_logloss: 1.02209	valid_1's multi_logloss: 1.09156
[400]	training's multi_logloss: 1.00266	valid_1's multi_logloss: 1.0916
[500]	training's multi_logloss: 0.984888	valid_1's multi_logloss: 1.09158
[600]	training's multi_logloss: 0.968585	valid_1's multi_logloss: 1.09235
Early stopping, best iteration is:
[446]	training's multi_logloss: 0.994053	valid_1's multi_logloss: 1.09138
Seed-3 | Fold-0 | OOF Score: 1.0913817739612948


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07314	valid_1's multi_logloss: 1.0991
[200]	training's multi_logloss: 1.04486	valid_1's multi_logloss: 1.0938
[300]	training's multi_logloss: 1.02226	valid_1's multi_logloss: 1.09268
[400]	training's multi_logloss: 1.00286	valid_1's multi_logloss: 1.09271
[500]	training's multi_logloss: 0.985198	valid_1's multi_logloss: 1.09288
Early stopping, best iteration is:
[335]	training's multi_logloss: 1.01512	valid_1's multi_logloss: 1.09244
Seed-3 | Fold-1 | OOF Score: 1.092441349619549


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07265	valid_1's multi_logloss: 1.09972
[200]	training's multi_logloss: 1.04431	valid_1's multi_logloss: 1.09496
[300]	training's multi_logloss: 1.02211	valid_1's multi_logloss: 1.09397
[400]	training's multi_logloss: 1.00235	valid_1's multi_logloss: 1.09397
[500]	training's multi_logloss: 0.984772	valid_1's multi_logloss: 1.09427
Early stopping, best iteration is:
[313]	training's multi_logloss: 1.01942	valid_1's multi_logloss: 1.09382
Seed-3 | Fold-2 | OOF Score: 1.0938175472818445


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07277	valid_1's multi_logloss: 1.10004
[200]	training's multi_logloss: 1.04442	valid_1's multi_logloss: 1.09534
[300]	training's multi_logloss: 1.02192	valid_1's multi_logloss: 1.09438
[400]	training's multi_logloss: 1.00241	valid_1's multi_logloss: 1.09458
Early stopping, best iteration is:
[297]	training's multi_logloss: 1.02255	valid_1's multi_logloss: 1.09434
Seed-3 | Fold-3 | OOF Score: 1.0943353446600463


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07301	valid_1's multi_logloss: 1.09836
[200]	training's multi_logloss: 1.04452	valid_1's multi_logloss: 1.09348
[300]	training's multi_logloss: 1.02172	valid_1's multi_logloss: 1.09244
[400]	training's multi_logloss: 1.00237	valid_1's multi_logloss: 1.09261
[500]	training's multi_logloss: 0.984731	valid_1's multi_logloss: 1.09293
Early stopping, best iteration is:
[304]	training's multi_logloss: 1.02086	valid_1's multi_logloss: 1.09233
Seed-3 | Fold-4 | OOF Score: 1.0923300297694016


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0727	valid_1's multi_logloss: 1.09981
[200]	training's multi_logloss: 1.04437	valid_1's multi_logloss: 1.0952
[300]	training's multi_logloss: 1.02197	valid_1's multi_logloss: 1.09414
[400]	training's multi_logloss: 1.00232	valid_1's multi_logloss: 1.09455
[500]	training's multi_logloss: 0.984713	valid_1's multi_logloss: 1.09522
Early stopping, best iteration is:
[303]	training's multi_logloss: 1.02134	valid_1's multi_logloss: 1.0941
Seed-3 | Fold-5 | OOF Score: 1.0941015193586543


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07305	valid_1's multi_logloss: 1.09884
[200]	training's multi_logloss: 1.04469	valid_1's multi_logloss: 1.09372
[300]	training's multi_logloss: 1.02226	valid_1's multi_logloss: 1.09282
[400]	training's multi_logloss: 1.00272	valid_1's multi_logloss: 1.09315
Early stopping, best iteration is:
[283]	training's multi_logloss: 1.02585	valid_1's multi_logloss: 1.09273
Seed-3 | Fold-6 | OOF Score: 1.0927292167485625


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07278	valid_1's multi_logloss: 1.09958
[200]	training's multi_logloss: 1.04475	valid_1's multi_logloss: 1.09511
[300]	training's multi_logloss: 1.02252	valid_1's multi_logloss: 1.09341
[400]	training's multi_logloss: 1.00302	valid_1's multi_logloss: 1.09303
[500]	training's multi_logloss: 0.985219	valid_1's multi_logloss: 1.09321
[600]	training's multi_logloss: 0.968755	valid_1's multi_logloss: 1.09384
Early stopping, best iteration is:
[425]	training's multi_logloss: 0.998449	valid_1's multi_logloss: 1.09288
Seed-3 | Fold-7 | OOF Score: 1.0928762069053837


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07317	valid_1's multi_logloss: 1.09892
[200]	training's multi_logloss: 1.04496	valid_1's multi_logloss: 1.09368
[300]	training's multi_logloss: 1.0226	valid_1's multi_logloss: 1.09225
[400]	training's multi_logloss: 1.00298	valid_1's multi_logloss: 1.09223
[500]	training's multi_logloss: 0.985533	valid_1's multi_logloss: 1.09247
[600]	training's multi_logloss: 0.969374	valid_1's multi_logloss: 1.09315
Early stopping, best iteration is:
[406]	training's multi_logloss: 1.00186	valid_1's multi_logloss: 1.09219
Seed-3 | Fold-8 | OOF Score: 1.0921854740825463


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07297	valid_1's multi_logloss: 1.09962
[200]	training's multi_logloss: 1.0446	valid_1's multi_logloss: 1.09472
[300]	training's multi_logloss: 1.02216	valid_1's multi_logloss: 1.0931
[400]	training's multi_logloss: 1.00252	valid_1's multi_logloss: 1.09323
[500]	training's multi_logloss: 0.984821	valid_1's multi_logloss: 1.09421
Early stopping, best iteration is:
[380]	training's multi_logloss: 1.00629	valid_1's multi_logloss: 1.09299
Seed-3 | Fold-9 | OOF Score: 1.092986801855593

Seed: 3 | Aggregate OOF Score: 1.0929185264242876




New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07267	valid_1's multi_logloss: 1.0989
[200]	training's multi_logloss: 1.04447	valid_1's multi_logloss: 1.09378
[300]	training's multi_logloss: 1.02199	valid_1's multi_logloss: 1.09247
[400]	training's multi_logloss: 1.00219	valid_1's multi_logloss: 1.09194
[500]	training's multi_logloss: 0.98447	valid_1's multi_logloss: 1.09242
Early stopping, best iteration is:
[393]	training's multi_logloss: 1.00348	valid_1's multi_logloss: 1.09189
Seed-56 | Fold-0 | OOF Score: 1.091892315007627


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07297	valid_1's multi_logloss: 1.09981
[200]	training's multi_logloss: 1.04462	valid_1's multi_logloss: 1.09466
[300]	training's multi_logloss: 1.02224	valid_1's multi_logloss: 1.09367
[400]	training's multi_logloss: 1.00243	valid_1's multi_logloss: 1.09359
[500]	training's multi_logloss: 0.984883	valid_1's multi_logloss: 1.09379
Early stopping, best iteration is:
[356]	training's multi_logloss: 1.01077	valid_1's multi_logloss: 1.09345
Seed-56 | Fold-1 | OOF Score: 1.0934496840569374


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07292	valid_1's multi_logloss: 1.09906
[200]	training's multi_logloss: 1.04463	valid_1's multi_logloss: 1.09457
[300]	training's multi_logloss: 1.02195	valid_1's multi_logloss: 1.09386
[400]	training's multi_logloss: 1.00242	valid_1's multi_logloss: 1.0938
[500]	training's multi_logloss: 0.984675	valid_1's multi_logloss: 1.09471
Early stopping, best iteration is:
[380]	training's multi_logloss: 1.00617	valid_1's multi_logloss: 1.09355
Seed-56 | Fold-2 | OOF Score: 1.093549450340459


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07277	valid_1's multi_logloss: 1.09967
[200]	training's multi_logloss: 1.04425	valid_1's multi_logloss: 1.09502
[300]	training's multi_logloss: 1.02161	valid_1's multi_logloss: 1.09439
[400]	training's multi_logloss: 1.00205	valid_1's multi_logloss: 1.09459
Early stopping, best iteration is:
[288]	training's multi_logloss: 1.02421	valid_1's multi_logloss: 1.09435
Seed-56 | Fold-3 | OOF Score: 1.094346844056189


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07336	valid_1's multi_logloss: 1.09681
[200]	training's multi_logloss: 1.04509	valid_1's multi_logloss: 1.09056
[300]	training's multi_logloss: 1.02271	valid_1's multi_logloss: 1.08907
[400]	training's multi_logloss: 1.00291	valid_1's multi_logloss: 1.08822
[500]	training's multi_logloss: 0.985266	valid_1's multi_logloss: 1.08858
[600]	training's multi_logloss: 0.968793	valid_1's multi_logloss: 1.08912
Early stopping, best iteration is:
[431]	training's multi_logloss: 0.997245	valid_1's multi_logloss: 1.0881
Seed-56 | Fold-4 | OOF Score: 1.0880967980648584


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07315	valid_1's multi_logloss: 1.0971
[200]	training's multi_logloss: 1.04519	valid_1's multi_logloss: 1.09173
[300]	training's multi_logloss: 1.02264	valid_1's multi_logloss: 1.09033
[400]	training's multi_logloss: 1.00319	valid_1's multi_logloss: 1.09033
[500]	training's multi_logloss: 0.985615	valid_1's multi_logloss: 1.09087
Early stopping, best iteration is:
[320]	training's multi_logloss: 1.01865	valid_1's multi_logloss: 1.09023
Seed-56 | Fold-5 | OOF Score: 1.0902299256673373


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0728	valid_1's multi_logloss: 1.09989
[200]	training's multi_logloss: 1.04446	valid_1's multi_logloss: 1.09512
[300]	training's multi_logloss: 1.02211	valid_1's multi_logloss: 1.09369
[400]	training's multi_logloss: 1.00246	valid_1's multi_logloss: 1.09325
[500]	training's multi_logloss: 0.984612	valid_1's multi_logloss: 1.09367
[600]	training's multi_logloss: 0.968158	valid_1's multi_logloss: 1.09383
Early stopping, best iteration is:
[412]	training's multi_logloss: 1.00027	valid_1's multi_logloss: 1.09311
Seed-56 | Fold-6 | OOF Score: 1.0931118947959229


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.0728	valid_1's multi_logloss: 1.10026
[200]	training's multi_logloss: 1.04465	valid_1's multi_logloss: 1.09497
[300]	training's multi_logloss: 1.02221	valid_1's multi_logloss: 1.09294
[400]	training's multi_logloss: 1.0027	valid_1's multi_logloss: 1.09206
[500]	training's multi_logloss: 0.985155	valid_1's multi_logloss: 1.09238
[600]	training's multi_logloss: 0.968801	valid_1's multi_logloss: 1.09266
Early stopping, best iteration is:
[455]	training's multi_logloss: 0.992901	valid_1's multi_logloss: 1.09186
Seed-56 | Fold-7 | OOF Score: 1.0918572694985857


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07317	valid_1's multi_logloss: 1.09872
[200]	training's multi_logloss: 1.0447	valid_1's multi_logloss: 1.09325
[300]	training's multi_logloss: 1.02209	valid_1's multi_logloss: 1.09192
[400]	training's multi_logloss: 1.00256	valid_1's multi_logloss: 1.09194
[500]	training's multi_logloss: 0.98487	valid_1's multi_logloss: 1.09224
Early stopping, best iteration is:
[344]	training's multi_logloss: 1.01322	valid_1's multi_logloss: 1.09173
Seed-56 | Fold-8 | OOF Score: 1.0917323486550343


New categorical_feature is [0, 2, 5, 9, 10, 11, 12, 13, 17, 18, 22, 29, 36, 37, 44]
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


Training until validation scores don't improve for 200 rounds
[100]	training's multi_logloss: 1.07259	valid_1's multi_logloss: 1.10132
[200]	training's multi_logloss: 1.04436	valid_1's multi_logloss: 1.09711
[300]	training's multi_logloss: 1.02194	valid_1's multi_logloss: 1.09619
[400]	training's multi_logloss: 1.00225	valid_1's multi_logloss: 1.09661
Early stopping, best iteration is:
[292]	training's multi_logloss: 1.02357	valid_1's multi_logloss: 1.09611
Seed-56 | Fold-9 | OOF Score: 1.0961073562129975

Seed: 56 | Aggregate OOF Score: 1.0924373886355947


Aggregate OOF Score: 1.0926958774050042


In [7]:
np.savez_compressed('./LGB_Meta_Features.npz',
                    y_pred_meta_lgb=y_pred_meta_lgb, 
                    oof_score=oof_score,
                    y_pred_final_lgb=y_pred_final_lgb)

## Create submission file

In [8]:
y_pred_final_lgb = np.clip(y_pred_final_lgb, p_min, p_max)

test_df = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")
submit_df = pd.DataFrame()
submit_df['id'] = test_df['id']
submit_df['Class_1'] = y_pred_final_lgb[:,0]
submit_df['Class_2'] = y_pred_final_lgb[:,1]
submit_df['Class_3'] = y_pred_final_lgb[:,2]
submit_df['Class_4'] = y_pred_final_lgb[:,3]
submit_df.head()

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4
0,100000,0.084972,0.607259,0.185489,0.12228
1,100001,0.077911,0.668733,0.173727,0.079629
2,100002,0.08517,0.637511,0.175553,0.101767
3,100003,0.079365,0.540842,0.2654,0.114392
4,100004,0.076234,0.608782,0.211749,0.103235


In [9]:
submit_df.to_csv("./LGB_submission.csv", index=False)