### Installing Packages and Dependencies

```
conda create -n interpretabnet python=3.10
conda activate interpretabnet
```

```
pip install -r requirements.txt
```

### Importing Libraries

In [5]:
# Data generation function import
from syn_data_generation import generate_data
from matplotlib import pyplot as plt
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score
import pandas as pd
import numpy as np
from pathlib import Path

### Data Processing - Synthetic Data from L2X (https://arxiv.org/abs/1802.07814)

In [6]:
# Synthetic data type
idx = 2
data_sets = ['Syn1','Syn2','Syn3','Syn4','Syn5','Syn6']
data_type = data_sets[idx]

# Data output can be either binary (Y) or Probability (Prob)
data_out_sets = ['Y','Prob']
data_out = data_out_sets[0]

# Number of Training and Testing samples
train_N = 10000
test_N = 10000

# Seeds (different seeds for training and testing)
train_seed = 0
test_seed = 1

In [7]:
def create_data(data_type, data_out):

    x_train, y_train, g_train = generate_data(n = train_N, data_type = data_type, seed = train_seed, out = data_out)
    x_test,  y_test,  g_test  = generate_data(n = test_N,  data_type = data_type, seed = test_seed,  out = data_out)

    return x_train, y_train, g_train, x_test, y_test, g_test

x_train, y_train, g_train, x_test, y_test, g_test = create_data(data_type, data_out)

# binary
y_train = y_train[:, 0]
y_test = y_test[:, 0]
cat_idxs = []
cat_dims = []

### Tuning Hyperparameters

In [8]:
nd_na = [16, 32, 128]
gammas = [1.0, 1.2, 1.5, 2.0]
lambda_sparses = [0.001, 0.01, 0.1, 0.3]
learn_r = [0.005, 0.01, 0.02, 0.025]

opt_ndna = 32
opt_gamma = 1.5
opt_lambda = 0.001
opt_lr = 0.025

opt_nsteps = 4
opt_reg_m = 4750

ndna_test_acc = 0
for ndna in nd_na:
    clf = TabNetClassifier(
        n_d=ndna,
        n_a=ndna,
        n_steps=opt_nsteps,
        gamma=gammas[0],
        lambda_sparse=lambda_sparses[0],
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        optimizer_params=dict(lr=learn_r[0]),
        reg_m=opt_reg_m,
        mask_type = 'softmax'
    )

    clf.fit(
        X_train=x_train, y_train=y_train,
        eval_set=[(x_train, y_train), (x_test, y_test)],
        eval_name=['train', 'valid'], batch_size=256,
        virtual_batch_size=256,
        max_epochs=10, eval_metric=['auc']
    )

    y_pred = clf.predict_proba(x_test)
    test_acc = roc_auc_score(y_score=y_pred[:,1], y_true=y_test)

    if test_acc > ndna_test_acc:
        opt_ndna = ndna
        ndna_test_acc = test_acc
        print("Optimum Hyperparameters Training", [opt_ndna, opt_gamma, opt_lambda, opt_lr, opt_reg_m])

gams_test_acc = 0
for gams in gammas:
    clf = TabNetClassifier(
        n_d=opt_ndna,
        n_a=opt_ndna,
        n_steps=opt_nsteps,
        gamma=gams,
        lambda_sparse=lambda_sparses[0],
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        optimizer_params=dict(lr=learn_r[0]),
        reg_m=opt_reg_m,
        mask_type = 'softmax'
    )

    clf.fit(
        X_train=x_train, y_train=y_train,
        eval_set=[(x_train, y_train), (x_test, y_test)],
        eval_name=['train', 'valid'], batch_size=256,
        virtual_batch_size=256,
        max_epochs=10, eval_metric=['accuracy']
    )

    y_pred = clf.predict_proba(x_test)
    test_acc = roc_auc_score(y_score=y_pred[:,1], y_true=y_test)

    if test_acc > gams_test_acc:
        opt_gamma = gams
        gams_test_acc = test_acc
        print("Optimum Hyperparameters Training", [opt_ndna, opt_nsteps, opt_gamma, opt_lambda, opt_lr, opt_reg_m])

lamb_test_acc = 0
for lambs in lambda_sparses:
    clf = TabNetClassifier(
        n_d=opt_ndna,
        n_a=opt_ndna,
        n_steps=opt_nsteps,
        gamma=opt_gamma,
        lambda_sparse=lambs,
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        optimizer_params=dict(lr=learn_r[0]),
        reg_m=opt_reg_m,
        mask_type = 'softmax'
    )

    clf.fit(
        X_train=x_train, y_train=y_train,
        eval_set=[(x_train, y_train), (x_test, y_test)],
        eval_name=['train', 'valid'], batch_size=256,
        virtual_batch_size=256,
        max_epochs=10, eval_metric=['accuracy']
    )

    y_pred = clf.predict_proba(x_test)
    test_acc = roc_auc_score(y_score=y_pred[:,1], y_true=y_test)
    if test_acc > lamb_test_acc:
        opt_lambda = lambs
        lamb_test_acc = test_acc
        print("Optimum Hyperparameters Training", [opt_ndna, opt_nsteps, opt_gamma, opt_lambda, opt_lr, opt_reg_m])

lr_test_accuracy = 0
for lr in learn_r:
    clf = TabNetClassifier(
        n_d=opt_ndna,
        n_a=opt_ndna,
        n_steps=opt_nsteps,
        gamma=opt_gamma,
        lambda_sparse=opt_lambda,
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        optimizer_params=dict(lr=lr),
        reg_m=opt_reg_m,
        mask_type = 'softmax'
    )

    clf.fit(
        X_train=x_train, y_train=y_train,
        eval_set=[(x_train, y_train), (x_test, y_test)],
        eval_name=['train', 'valid'], batch_size=256,
        virtual_batch_size=256,
        max_epochs=10, eval_metric=['accuracy']
    )

    y_pred = clf.predict_proba(x_test)
    test_acc = roc_auc_score(y_score=y_pred[:,1], y_true=y_test)

    if test_acc > lr_test_accuracy:
        opt_lr = lr
        lr_test_accuracy = test_acc
        print("Optimum Hyperparameters Training", [opt_ndna, opt_nsteps, opt_gamma, opt_lambda, opt_lr, opt_reg_m])

print("Optimum Hyperparameters", [opt_ndna, opt_nsteps, opt_gamma, opt_lambda, opt_lr, opt_reg_m])

Device used cuda: 0


epoch 0  | loss: 20.96395| train_auc: 0.85258 | valid_auc: 0.84815 |  0:00:13s
epoch 1  | loss: 20.98631| train_auc: 0.88572 | valid_auc: 0.88042 |  0:00:27s
epoch 2  | loss: 20.85886| train_auc: 0.89681 | valid_auc: 0.88959 |  0:00:41s
epoch 3  | loss: 21.04312| train_auc: 0.89998 | valid_auc: 0.89502 |  0:00:54s
epoch 4  | loss: 20.82679| train_auc: 0.9063  | valid_auc: 0.89823 |  0:01:08s
epoch 5  | loss: 20.80269| train_auc: 0.90816 | valid_auc: 0.89915 |  0:01:21s
epoch 6  | loss: 20.82927| train_auc: 0.90968 | valid_auc: 0.8987  |  0:01:35s
epoch 7  | loss: 20.96574| train_auc: 0.91086 | valid_auc: 0.89971 |  0:01:49s
epoch 8  | loss: 20.83868| train_auc: 0.91103 | valid_auc: 0.89865 |  0:02:03s
epoch 9  | loss: 20.87111| train_auc: 0.91326 | valid_auc: 0.89962 |  0:02:16s
Stop training because you reached max_epochs = 10 with best_epoch = 7 and best_valid_auc = 0.89971
Best weights from best epoch are automatically used!
Optimum Hyperparameters Training [16, 1.5, 0.001, 0.025, 4

KeyboardInterrupt: 

In [34]:
# Optimized Hyperparameters
opt_ndna = 16
opt_nsteps = 4
opt_gamma = 1.2
opt_lambda = 0.001
opt_lr = 0.025

### Optimized Run with tuned $r_M = 4750$ for Syn3

In [12]:
# rm_lst = [10, 100, 1000, 10000, 100000]
rm_lst = [4750]
for rm in rm_lst:
    opt_ndna = 16
    opt_nsteps = 4
    opt_gamma = 1.2
    opt_lambda = 0.001
    opt_lr = 0.025
    opt_reg_m = rm

    clf = TabNetClassifier(
        n_d=opt_ndna,
        n_a=opt_ndna,
        n_steps=opt_nsteps,
        gamma=opt_gamma,
        lambda_sparse=opt_lambda,
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        optimizer_params=dict(lr=opt_lr),
        mask_type = 'softmax',
        reg_m=opt_reg_m
    )
    # max epoch 50
    clf.fit(
        X_train=x_train, y_train=y_train,
        eval_set=[(x_train, y_train), (x_test, y_test)],
        eval_name=['train', 'valid'],
        max_epochs=50, eval_metric=['auc']
    )

    y_pred = clf.predict_proba(x_test)
    test_acc = roc_auc_score(y_score=y_pred[:,1], y_true=y_test)
    print(f"FINAL ROC AUC SCORE FOR {data_type} : {test_acc}")

Device used cuda: 0


epoch 0  | loss: 21.11836| train_auc: 0.84381 | valid_auc: 0.84349 |  0:00:05s
epoch 1  | loss: 20.8597 | train_auc: 0.87899 | valid_auc: 0.87941 |  0:00:10s
epoch 2  | loss: 21.01706| train_auc: 0.89204 | valid_auc: 0.88811 |  0:00:15s
epoch 3  | loss: 20.92565| train_auc: 0.8967  | valid_auc: 0.89135 |  0:00:20s
epoch 4  | loss: 20.91524| train_auc: 0.90092 | valid_auc: 0.89249 |  0:00:25s
epoch 5  | loss: 20.8521 | train_auc: 0.90385 | valid_auc: 0.89471 |  0:00:31s
epoch 6  | loss: 20.79781| train_auc: 0.90564 | valid_auc: 0.89469 |  0:00:36s
epoch 7  | loss: 20.84079| train_auc: 0.9049  | valid_auc: 0.89714 |  0:00:41s
epoch 8  | loss: 20.70139| train_auc: 0.90345 | valid_auc: 0.89659 |  0:00:46s
epoch 9  | loss: 20.80239| train_auc: 0.90518 | valid_auc: 0.89802 |  0:00:51s
epoch 10 | loss: 20.80586| train_auc: 0.90446 | valid_auc: 0.89739 |  0:00:57s
epoch 11 | loss: 20.82054| train_auc: 0.90452 | valid_auc: 0.89727 |  0:01:02s
epoch 12 | loss: 20.73434| train_auc: 0.90453 | vali

Full results of synthetic experiments can be found in Appendix D of our paper: https://arxiv.org/pdf/2406.00426