In [None]:
!pip install catboost -q
!pip install GPUtil -q
!pip install --upgrade --force-reinstall xlrd -q
!pip install pytorch-tabnet -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import seaborn as sns

from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

from imblearn.over_sampling import BorderlineSMOTE
from sklearn.preprocessing import StandardScaler

from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Conv1D, MaxPooling1D, Flatten, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping


from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import KFold

from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabnet.metrics import Metric

import gc
import os
from numpy import mean
from numpy import std

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
#from sklearn.model_selection import GridSearchCV
#from scipy.stats import ks_2samp

In [None]:
from GPUtil import showUtilization as gpu_usage
from numba import cuda

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage() 
    torch.cuda.empty_cache()
    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

In [None]:
df = pd.read_excel('../input/taiwancd/default of credit card clients.xls', index_col = 0)

In [None]:
df.head()

In [None]:
######### Pre-processing #########

In [None]:
df.rename({'default payment next month': 'target'}, axis=1, inplace=True)

In [None]:
#### Detect global outliers
ISF = IsolationForest(random_state=0)
df = df[ISF.fit_predict(df) == 1]

In [None]:
##### Detect local outliers
LOF = LocalOutlierFactor(n_neighbors=2)
df = df[LOF.fit_predict(df) == 1]

In [None]:
cat_cols=['SEX','EDUCATION','MARRIAGE','PAY_0','PAY_2','PAY_3','PAY_4','PAY_5','PAY_6']

In [None]:
#One-Hot-Endcoding
df = pd.get_dummies(df, columns = cat_cols)

In [None]:
df.shape

In [None]:
y = df.target
df.drop('target', axis = 1, inplace = True)

In [None]:
#Normalize
scaler = StandardScaler()
X = scaler.fit_transform(df)

In [None]:
#Oversampling
sm = BorderlineSMOTE(random_state=42, n_jobs = -1)
X, y = sm.fit_resample(X, y)

In [None]:
gc.collect()

In [None]:
############# Base model ############

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True, stratify = y)
print('Size of train dataframe: ', X_train.shape)
print('Size of train dataframe: ', y_train.shape)
print('Size of test dataframe: ', X_test.shape)
print('Size of train dataframe: ', y_test.shape)

In [None]:
n_fold = 5

In [None]:
############### Random Forest model #############

In [None]:
rf = RandomForestClassifier(n_estimators=361,
                               random_state=42,
                           n_jobs = -1,
                           verbose = 1)

In [None]:
kf = KFold(n_splits=n_fold, shuffle = False)
acc_score_rf = []
auc_score_rf = []
f1_rf = []
meta_train_rf = []
meta_test_rf = []
i = 1
for train_index, valid_index in kf.split(X_train, y_train):
    print('KFold {} of {}'.format(i,kf.n_splits))
    train_X, val_X = X_train[train_index], X_train[valid_index]
    train_y, val_y = y_train.iloc[train_index], y_train.iloc[valid_index]
    rf.fit(train_X, train_y)
    ####meta
    meta_train_rf = np.append(meta_train_rf, rf.predict_proba(val_X)[:,1]).reshape(-1,1)
    
    if len(meta_test_rf) == 0:
        meta_test_rf = rf.predict_proba(X_test)[:,1].reshape(-1,1)
    else:
        meta_test_rf = np.add(meta_test_rf, rf.predict_proba(X_test)[:,1].reshape(-1,1))
    #####
    
    yhat = rf.predict(X_test)
    acc_score_rf.append(accuracy_score(yhat,y_test))
    auc_score_rf.append(roc_auc_score(yhat,y_test))
    f1_rf.append(f1_score(yhat,y_test))
    i += 1
meta_test_rf = np.divide(meta_test_rf, n_fold)

In [None]:
print("###Random Forest Classifier###\n")
print("Accuracy: " + str(mean(acc_score_rf)) + " +- "+ str(std(acc_score_rf)))
print("ROC-AUC: " + str(mean(auc_score_rf)) + " +- " + str(std(auc_score_rf)))
print("F1-Score: " + str(mean(f1_rf)) +" +- "+ str(std(f1_rf)))

yhat = rf.predict(X_test)
print(classification_report(y_test, yhat))
cm = confusion_matrix(yhat, y_test, labels=[0,1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
RocCurveDisplay.from_predictions(yhat, y_test)
plt.show()

In [None]:
gc.collect()

In [None]:
######### LightGBM model ###########

In [None]:
lgb = LGBMClassifier(
                boosting_type='gbdt',
                max_depth=10,
                learning_rate=0.05,
                n_estimators=450,
                objective = 'binary',
                random_state=42
)

In [None]:
kf = KFold(n_splits=n_fold, shuffle = False)
acc_score_lgb = []
auc_score_lgb = []
f1_lgb = []
meta_train_lgb = []
meta_test_lgb = []
i = 1
for train_index, valid_index in kf.split(X_train, y_train):
    print('KFold {} of {}'.format(i,kf.n_splits))
    train_X, val_X = X_train[train_index], X_train[valid_index]
    train_y, val_y = y_train.iloc[train_index], y_train.iloc[valid_index]
    lgb.fit(train_X, train_y, eval_set=[(val_X, val_y)], early_stopping_rounds=200, verbose = 0)
    ####meta
    meta_train_lgb = np.append(meta_train_lgb, lgb.predict_proba(val_X)[:,1]).reshape(-1,1)
    
    if len(meta_test_lgb) == 0:
        meta_test_lgb = lgb.predict_proba(X_test)[:,1].reshape(-1,1)
    else:
        meta_test_lgb = np.add(meta_test_lgb, lgb.predict_proba(X_test)[:,1].reshape(-1,1))
    #####
    yhat = lgb.predict(X_test)
    acc_score_lgb.append(accuracy_score(yhat,y_test))
    auc_score_lgb.append(roc_auc_score(yhat,y_test))
    f1_lgb.append(f1_score(yhat,y_test))
    i += 1
meta_test_lgb = np.divide(meta_test_lgb, n_fold)


In [None]:
print("###LightGBM Classifier###\n")
print("Accuracy: " + str(mean(acc_score_lgb)) + " +- "+ str(std(acc_score_lgb)))
print("ROC-AUC: " + str(mean(auc_score_lgb)) + " +- " + str(std(auc_score_lgb)))
print("F1-Score: " + str(mean(f1_lgb)) +" +- "+ str(std(f1_lgb)))

yhat = lgb.predict(X_test)
print(classification_report(y_test, yhat))
cm = confusion_matrix(yhat, y_test, labels=[0,1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
RocCurveDisplay.from_predictions(yhat, y_test)
plt.show()

In [None]:
# feature_importances
feature_imp = pd.DataFrame(sorted(zip(lgb.feature_importances_,df.columns)), columns=['Value','Feature'])
plt.figure(figsize=(20, 10))
sns.barplot(x="Value", y="Feature", data=feature_imp.sort_values(by="Value", ascending=False))
plt.title('LightGBM Features (avg over folds)')
plt.tight_layout()
plt.show()

In [None]:
gc.collect()

In [None]:
######## XGBoost model ##########

In [None]:
xgb  = XGBClassifier(
 learning_rate =0.05,
 n_estimators=410,
 max_depth=18,
 min_child_weight=4,
 subsample=0.5,
 objective= 'binary:logistic',
 random_state=42,
 tree_method='gpu_hist',
 gpu_id=0)

In [None]:
kf = KFold(n_splits=n_fold, shuffle = False)
acc_score_xgb = []
auc_score_xgb = []
f1_xgb = []
meta_train_xgb = []
meta_test_xgb = []
i = 1
for train_index, valid_index in kf.split(X_train, y_train):
    print('KFold {} of {}'.format(i,kf.n_splits))
    train_X, val_X = X_train[train_index], X_train[valid_index]
    train_y, val_y = y_train.iloc[train_index], y_train.iloc[valid_index]
    xgb.fit(train_X, train_y, eval_set=[(val_X, val_y)], early_stopping_rounds=200, verbose = 0)
    ####meta
    meta_train_xgb = np.append(meta_train_xgb, xgb.predict_proba(val_X)[:,1]).reshape(-1,1)
    
    if len(meta_test_xgb) == 0:
        meta_test_xgb = xgb.predict_proba(X_test)[:,1].reshape(-1,1)
    else:
        meta_test_xgb = np.add(meta_test_xgb, xgb.predict_proba(X_test)[:,1].reshape(-1,1))
    #####
    yhat = xgb.predict(X_test)
    acc_score_xgb.append(accuracy_score(yhat,y_test))
    auc_score_xgb.append(roc_auc_score(yhat,y_test))
    f1_xgb.append(f1_score(yhat,y_test))
    i += 1
meta_test_xgb = np.divide(meta_test_xgb, n_fold)

In [None]:
print("###XGBoost Classifier###\n")
print("Accuracy: " + str(mean(acc_score_xgb)) + " +- "+ str(std(acc_score_xgb)))
print("ROC-AUC: " + str(mean(auc_score_xgb)) + " +- " + str(std(auc_score_xgb)))
print("F1-Score: " + str(mean(f1_xgb)) +" +- "+ str(std(f1_xgb)))

yhat = xgb.predict(X_test)
print(classification_report(y_test, yhat))
cm = confusion_matrix(yhat, y_test, labels=[0,1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
RocCurveDisplay.from_predictions(yhat, y_test)
plt.show()

In [None]:
# feature_importances
feature_imp = pd.DataFrame(sorted(zip(xgb.feature_importances_,df.columns)), columns=['Value','Feature'])
plt.figure(figsize=(20, 10))
sns.barplot(x="Value", y="Feature", data=feature_imp.sort_values(by="Value", ascending=False))
plt.title('XGBoost Features (avg over folds)')
plt.tight_layout()
plt.show()

In [None]:
gc.collect()

In [None]:
############ CatBoost ################

In [None]:
catb = CatBoostClassifier(iterations=450,
                           learning_rate=0.05,
                           depth=10,
                          task_type = "GPU")

In [None]:
kf = KFold(n_splits=n_fold, shuffle = False)
acc_score_catb = []
auc_score_catb = []
f1_catb = []
meta_train_catb = []
meta_test_catb = []
i = 1
for train_index, valid_index in kf.split(X_train, y_train):
    print('KFold {} of {}'.format(i,kf.n_splits))
    train_X, val_X = X_train[train_index], X_train[valid_index]
    train_y, val_y = y_train.iloc[train_index], y_train.iloc[valid_index]
    catb.fit(train_X, train_y, eval_set=[(val_X, val_y)], early_stopping_rounds=200, verbose = 0)
    ####meta
    meta_train_catb = np.append(meta_train_catb, catb.predict_proba(val_X)[:,1]).reshape(-1,1)
    
    if len(meta_test_catb) == 0:
        meta_test_catb = catb.predict_proba(X_test)[:,1].reshape(-1,1)
    else:
        meta_test_catb = np.add(meta_test_catb, catb.predict_proba(X_test)[:,1].reshape(-1,1))
    #####
    yhat = catb.predict(X_test)
    acc_score_catb.append(accuracy_score(yhat,y_test))
    auc_score_catb.append(roc_auc_score(yhat,y_test))
    f1_catb.append(f1_score(yhat,y_test))
    i += 1
meta_test_catb = np.divide(meta_test_catb, n_fold)

In [None]:
print("###Catboost Classifier###\n")
print("Accuracy: " + str(mean(acc_score_catb)) + " +- "+ str(std(acc_score_catb)))
print("ROC-AUC: " + str(mean(auc_score_catb)) + " +- " + str(std(auc_score_catb)))
print("F1-Score: " + str(mean(f1_catb)) +" +- "+ str(std(f1_catb)))

yhat = catb.predict(X_test)
print(classification_report(y_test, yhat))
cm = confusion_matrix(yhat, y_test, labels=[0,1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
RocCurveDisplay.from_predictions(yhat, y_test)
plt.show()

In [None]:
# feature_importances
feature_imp = pd.DataFrame(sorted(zip(catb.feature_importances_,df.columns)), columns=['Value','Feature'])
plt.figure(figsize=(20, 10))
sns.barplot(x="Value", y="Feature", data=feature_imp.sort_values(by="Value", ascending=False))
plt.title('CatBoost Features (avg over folds)')
plt.tight_layout()
plt.show()

In [None]:
gc.collect()
free_gpu_cache()

In [None]:
######### GBDT #############

In [None]:
gbdt = GradientBoostingClassifier(n_estimators=438,
                                  learning_rate=0.06,
                                  max_depth=10,
                                  subsample = 0.55,
                                  verbose = 1,
                                  random_state=0)

In [None]:
kf = KFold(n_splits=n_fold, shuffle = False)
acc_score_gbdt = []
auc_score_gbdt = []
f1_gbdt = []
meta_train_gbdt = []
meta_test_gbdt = []
i = 1
for train_index, valid_index in kf.split(X_train, y_train):
    print('KFold {} of {}'.format(i,kf.n_splits))
    train_X, val_X = X_train[train_index], X_train[valid_index]
    train_y, val_y = y_train.iloc[train_index], y_train.iloc[valid_index]
    gbdt.fit(train_X, train_y)
    ####meta
    meta_train_gbdt = np.append(meta_train_gbdt, gbdt.predict_proba(val_X)[:,1]).reshape(-1,1)
    
    if len(meta_test_gbdt) == 0:
        meta_test_gbdt = gbdt.predict_proba(X_test)[:,1].reshape(-1,1)
    else:
        meta_test_gbdt = np.add(meta_test_gbdt, gbdt.predict_proba(X_test)[:,1].reshape(-1,1))
    #####
    yhat = gbdt.predict(X_test)
    acc_score_gbdt.append(accuracy_score(yhat,y_test))
    auc_score_gbdt.append(roc_auc_score(yhat,y_test))
    f1_gbdt.append(f1_score(yhat,y_test))
    i += 1
meta_test_gbdt = np.divide(meta_test_gbdt, 5)

In [None]:
print("###GBDT Classifier###\n")
print("Accuracy: " + str(mean(acc_score_gbdt)) + " +- "+ str(std(acc_score_gbdt)))
print("ROC-AUC: " + str(mean(auc_score_gbdt)) + " +- " + str(std(auc_score_gbdt)))
print("F1-Score: " + str(mean(f1_gbdt)) +" +- "+ str(std(f1_gbdt)))

yhat = gbdt.predict(X_test)
print(classification_report(y_test, yhat))
cm = confusion_matrix(yhat, y_test, labels=[0,1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
RocCurveDisplay.from_predictions(yhat, y_test)
plt.show()

In [None]:
gc.collect()

In [None]:
########### 1D-CNN model #############

In [None]:
def EarlyStop(patience):
    return EarlyStopping(
    monitor="val_accuracy",
    min_delta=0,
    patience=patience,
    verbose=0,
    mode="auto",
)

def ModelCheckpointFull(model_name):
    return ModelCheckpoint(
                     filepath=model_name, 
                     save_freq='epoch', verbose=1, monitor='val_accuracy', 
                     save_weights_only=True, save_best_only=True
                 )   

In [None]:
# Define 1D - CNN model
def create_model(input_shape):
    # Model Building
    model = Sequential()
    model = Sequential()
    model.add(Conv1D(filters=512, kernel_size=3, activation='relu', input_shape=(input_shape,1)))
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu'))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.2))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model




X_train_cnn = X_train.reshape(-1, X_train.shape[1], 1)
X_test_cnn = X_test.reshape(-1, X_train.shape[1], 1)

# 1D-CNN model
cnn1d = create_model(input_shape = X_train_cnn.shape[1])   


In [None]:
kf = KFold(n_splits=n_fold, shuffle = False)
acc_score_cnn1d = []
auc_score_cnn1d = []
f1_cnn1d = []
meta_train_cnn1d = []
meta_test_cnn1d = []
i = 1
for train_index, valid_index in kf.split(X_train_cnn, y_train):
    print('KFold {} of {}'.format(i,kf.n_splits))
    train_X, val_X = X_train_cnn[train_index], X_train_cnn[valid_index]
    train_y, val_y = y_train.iloc[train_index], y_train.iloc[valid_index]
    cnn1d.fit(train_X, train_y, validation_data=(val_X, val_y), epochs = 500, 
            batch_size = 256, 
            callbacks = [EarlyStop(30), ModelCheckpointFull('./cnn1d.h5')],
            verbose = 1)
    ####meta
    meta_train_cnn1d = np.append(meta_train_cnn1d, cnn1d.predict(val_X))
    
    if len(meta_test_cnn1d) == 0:
        meta_test_cnn1d = cnn1d.predict(X_test_cnn)
    else:
        meta_test_cnn1d = np.add(meta_test_cnn1d, cnn1d.predict(X_test_cnn))
    #####
    yhat = cnn1d.predict(X_test_cnn).round()
    acc_score_cnn1d.append(accuracy_score(yhat,y_test))
    auc_score_cnn1d.append(roc_auc_score(yhat,y_test))
    f1_cnn1d.append(f1_score(yhat,y_test))
    i += 1
meta_test_cnn1d = np.divide(meta_test_cnn1d, 5)

In [None]:
print("###1D-CNN Classifier###\n")
print("Accuracy: " + str(mean(acc_score_cnn1d)) + " +- "+ str(std(acc_score_cnn1d)))
print("ROC-AUC: " + str(mean(auc_score_cnn1d)) + " +- " + str(std(auc_score_cnn1d)))
print("F1-Score: " + str(mean(f1_cnn1d)) +" +- "+ str(std(f1_cnn1d)))

yhat = cnn1d.predict(X_test_cnn).round()
print(classification_report(y_test, yhat))
cm = confusion_matrix(yhat, y_test, labels=[0,1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
RocCurveDisplay.from_predictions(yhat, y_test)
plt.show()

In [None]:
gc.collect()
free_gpu_cache()

In [None]:
########### TabNet #############

In [None]:
class roc_auc(Metric):
    def __init__(self):
        self._name = "roc_auc"
        self._maximize = True

    def __call__(self, y_true, y_score):
        roc_auc = roc_auc_score(y_true, y_score[:, 1])
        return roc_auc

In [None]:
tabnet = TabNetClassifier(
                        n_d = 64,
                        n_a = 64,
                        n_steps = 3,
                        gamma = 1.3,
                        n_independent = 1,
                        n_shared = 2,
                        momentum = 0.02,
                        clip_value = None,
                        lambda_sparse = 1e-3,
                        optimizer_fn = torch.optim.Adam,
                        optimizer_params = dict(lr = 1e-3, weight_decay=1e-3),
                        scheduler_fn = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts,
                        scheduler_params = {'T_0':5,
                                            'eta_min':1e-4,
                                            'T_mult':1,
                                            'last_epoch':-1},
                        mask_type = 'entmax',
                        verbose = 1
)

In [None]:
kf = KFold(n_splits=n_fold, shuffle = False)
acc_score_tabnet = []
auc_score_tabnet = []
f1_tabnet = []
meta_train_tabnet = []
meta_test_tabnet = []
i = 1
for train_index, valid_index in kf.split(X_train, y_train):
    print('KFold {} of {}'.format(i,kf.n_splits))
    train_X, val_X = X_train[train_index], X_train[valid_index]
    train_y, val_y = y_train.iloc[train_index], y_train.iloc[valid_index]
    tabnet.fit(train_X, train_y.ravel(),
      eval_set=[(val_X, val_y.ravel())],
      eval_metric=[roc_auc, "accuracy"],
      max_epochs = 500,
      patience = 50,
      batch_size = 256)
    ####meta
    meta_train_tabnet = np.append(meta_train_tabnet, tabnet.predict_proba(val_X)[:,1].reshape(-1,1))
    
    if len(meta_test_tabnet) == 0:
        meta_test_tabnet = tabnet.predict_proba(X_test)[:,1].reshape(-1,1)
    else:
        meta_test_tabnet = np.add(meta_test_tabnet, tabnet.predict_proba(X_test)[:,1].reshape(-1,1))
    #####
    yhat = tabnet.predict(X_test).round()
    acc_score_tabnet.append(accuracy_score(yhat,y_test))
    auc_score_tabnet.append(roc_auc_score(yhat,y_test))
    f1_tabnet.append(f1_score(yhat,y_test))
    i += 1
meta_test_tabnet = np.divide(meta_test_tabnet, 5)

In [None]:
print("Accuracy: " + str(mean(acc_score_tabnet)) + " +- "+ str(std(acc_score_tabnet)))
print("ROC-AUC: " + str(mean(auc_score_tabnet)) + " +- " + str(std(auc_score_tabnet)))
print("F1-Score: " + str(mean(f1_tabnet)) +" +- "+ str(std(f1_tabnet)))


yhat = tabnet.predict(X_test).round()
print(classification_report(y_test, yhat))
cm = confusion_matrix(yhat, y_test,labels=[0,1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1])
disp.plot()
RocCurveDisplay.from_predictions(y_test, yhat)
plt.show()

In [None]:
# feature_importances
feature_imp = pd.DataFrame(sorted(zip(tabnet.feature_importances_,df.columns)), columns=['Value','Feature'])
plt.figure(figsize=(20, 10))
sns.barplot(x="Value", y="Feature", data=feature_imp.sort_values(by="Value", ascending=False))
plt.title('TabNet Features (avg over folds)')
plt.tight_layout()
plt.show()

In [None]:
gc.collect()
free_gpu_cache()

In [None]:
################# STACKING #####################

In [None]:
meta_train = pd.concat([ pd.DataFrame(X_train, columns=df.columns),
                          pd.DataFrame(meta_train_rf, columns=['RF']),
                          pd.DataFrame(meta_train_lgb, columns=['LightGBM']),                        
                       pd.DataFrame(meta_train_xgb, columns=['XGBoost']),
                     pd.DataFrame(meta_train_catb, columns=['CatBoost']),
                        pd.DataFrame(meta_train_gbdt, columns=['GBDT']),
                        pd.DataFrame(meta_train_cnn1d, columns=['1DCNN']),
                       pd.DataFrame(meta_train_tabnet, columns=['TabNet'])], axis = 1)

In [None]:
meta_test = pd.concat([pd.DataFrame(X_test, columns=df.columns),
                          pd.DataFrame(meta_test_rf, columns=['RF']),
                          pd.DataFrame(meta_test_lgb, columns=['LightGBM']),                        
                       pd.DataFrame(meta_test_xgb, columns=['XGBoost']),
                     pd.DataFrame(meta_test_catb, columns=['CatBoost']),
                        pd.DataFrame(meta_test_gbdt, columns=['GBDT']),
                        pd.DataFrame(meta_test_cnn1d, columns=['1DCNN']),
                       pd.DataFrame(meta_test_tabnet, columns=['TabNet'])], axis = 1)

In [None]:
meta_train.head()

In [None]:
meta_test.head()

In [None]:
meta_train.to_pickle('meta_train.pkl')
meta_test.to_pickle('meta_test.pkl')

y_train.to_pickle('y_train.pkl')
y_test.to_pickle('y_test.pkl')