# About

Here I will be using the pytorch implementation of [TabNet](https://github.com/dreamquark-ai/tabnet), which is an Attentive Interpretable Tabular Learning model. It has served me well in other competitions so I am also giving it a try on this one. It trains on GPU 😀

TabNet paper can be found [here](https://arxiv.org/pdf/1908.07442.pdf).

On this example I am not using aggregated features.



# Installs/Imports

In [None]:
! pip install pytorch-tabnet -q

In [None]:
import os
import random
import time
import psutil
import gc

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold, StratifiedKFold

import torch
from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabnet.metrics import Metric

# Read

In [None]:
train = pd.read_parquet('../input/amex-data-integer-dtypes-parquet-format/train.parquet')
train['S_2'] = pd.to_datetime(train['S_2']).astype('datetime64[ns]')

train = train.groupby('customer_ID').tail(1).reset_index(drop=True)
train = train.fillna(-1)

print(train.shape)
display(train.head())

# CFG

In [None]:
class CFG:
  DEBUG = True
  model = 'tabnet'
  N_folds = 5
  seed = 42

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(seed = CFG.seed)

In [None]:
psutil.virtual_memory().percent

# Features and Target

In [None]:
target = pd.read_csv('../input/amex-default-prediction/train_labels.csv')
print('target shape: ', target.shape)

train = train.merge(target, on = 'customer_ID')
print('train shape: ', train.shape)

In [None]:
all_features = [col for col in train.columns if col not in ['target', 'customer_ID', 'S_2']]
n_features = len(all_features)
print('n features: ', n_features)

# Categorical features

In [None]:
cat_features = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']

In [None]:
cat_index = []
for cat in range(len(cat_features)):
  cat_index.append(train.columns.get_loc(cat_features[cat]))

print(len(cat_index))

# Metric

I will use [Vopani's implementation](https://www.kaggle.com/code/rohanrao/amex-competition-metric-implementations).

In [None]:
def amex_metric_numpy(y_true: np.array, y_pred: np.array) -> float:

    # count of positives and negatives
    n_pos = y_true.sum()
    n_neg = y_true.shape[0] - n_pos

    # sorting by descring prediction values
    indices = np.argsort(y_pred)[::-1]
    preds, target = y_pred[indices], y_true[indices]

    # filter the top 4% by cumulative row weights
    weight = 20.0 - target * 19.0
    cum_norm_weight = (weight / weight.sum()).cumsum()
    four_pct_filter = cum_norm_weight <= 0.04

    # default rate captured at 4%
    d = target[four_pct_filter].sum() / n_pos

    # weighted gini coefficient
    lorentz = (target / n_pos).cumsum()
    gini = ((lorentz - cum_norm_weight) * weight).sum()

    # max weighted gini coefficient
    gini_max = 10 * n_neg * (1 - 19 / (n_pos + 20 * n_neg))

    # normalized weighted gini coefficient
    g = gini / gini_max

    return 0.5 * (g + d)

In [None]:
# using amex metric to evaluate tabnet
class Amex_tabnet(Metric):
    
  def __init__(self):
    self._name = 'amex_tabnet'
    self._maximize = True

  def __call__(self, y_true, y_pred):
    amex = amex_metric_numpy(y_true, y_pred[:, 1])
    return max(amex, 0.)

# Training

In [None]:
def run_training(X = train[all_features], y = train['target'],
                 nfolds = CFG.N_folds):

    print('\n ', '-'*50)
    print('\nTraining: ', CFG.model)
    print('\n ', '-'*50)

    print('\nSeed: ', CFG.seed)
    print('N folds: ', CFG.N_folds)
    print('train shape: ', X.shape)
    print('targets shape: ', y.shape)


    print('\nN features: ', len(all_features))
    print('\n')

    models = list()
    
    kfold = StratifiedKFold(n_splits = CFG.N_folds, shuffle=True, random_state = CFG.seed)

    for k, (train_idx, valid_idx) in enumerate(kfold.split(X, y)):

        ## DEBUG MODE
        if CFG.DEBUG == True:
            if k > 0:
                print('\nDEBUG mode activated: Will train only one fold...\n')
                break      

        start = time.time()

        X_train, y_train = X.loc[train_idx], y.loc[train_idx]
        X_valid, y_valid = X.loc[valid_idx], y.loc[valid_idx]        
        
        model = TabNetClassifier(n_d = 32,
                                 n_a = 64,
                                 n_steps = 3,
                                 gamma = 1.3,
                                 cat_idxs = cat_index,                                 
                                 n_independent = 2,
                                 n_shared = 2,
                                 momentum = 0.02,
                                 clip_value = None,
                                 lambda_sparse = 1e-3,
                                 optimizer_fn = torch.optim.Adam,
                                 scheduler_fn = torch.optim.lr_scheduler.CosineAnnealingLR,
                                 scheduler_params = {"T_max" : 6},
                                 mask_type = 'sparsemax',
                                 seed = CFG.seed)

        ## train
        model.fit(np.array(X_train),
                  np.array(y_train.values.ravel()),
                  eval_set = [(np.array(X_valid), np.array(y_valid.values.ravel()))],
                  max_epochs = 50,
                  patience = 10,
                  batch_size = 2048,
                  eval_metric = ['auc', 'accuracy', Amex_tabnet])

        models.append(model)

        end = time.time()
        time_delta = np.round((end - start)/60, 2)
     
        print(f'\nFold {k+1}/{CFG.N_folds} | {time_delta:.2f} min')

        ### free memory
        del X_train, y_train
        del X_valid, y_valid
        gc.collect()

    return models

In [None]:
%%time
### RUN TRAINING: 
models = run_training()

In [None]:
models

# Importances

In [None]:
features_importances = models[-1].feature_importances_
argsort = np.argsort(features_importances)
features_importances_sorted = features_importances[argsort]

feature_names = train[all_features].columns
features_sorted = feature_names[argsort]

# plot feature importances
plt.figure(figsize = (12, 16))

### n features to plot
n = 50

plt.barh(features_sorted[-n:], features_importances_sorted[-n:])
plt.title(f"Feature Importances: {CFG.model}");

In [None]:
psutil.virtual_memory().percent