### Create a simple dataset to test the correctness of our approach

In [6]:
import numpy as np
import torch
from pytorch_tabnet.tab_model import TabNetClassifier

In [7]:
x = torch.randn(100000, 2)
noise = torch.randn(100000,)
y = ((1.0*x[:,0]+2.0*x[:,1]+noise)>0).type(torch.int64)

In [8]:
y_np = y.numpy()
x_np = x.numpy()
y_train, y_test = y_np[:50000], y_np[50000:]
x_train, x_test = x_np[:50000, :], x_np[50000:, :]
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(x_train, y_train)
y_pred = log_reg.predict(x_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.86766


### Using TabNet

#### Network parameters

In [9]:
clf = TabNetClassifier(
    n_d=64, n_a=64, n_steps=5,
    gamma=1.5, n_independent=2, n_shared=2,
    lambda_sparse=1e-4, momentum=0.3, clip_value=2.,
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2),
    scheduler_params = {"gamma": 0.95,
                     "step_size": 20},
    scheduler_fn=torch.optim.lr_scheduler.StepLR, epsilon=1e-15
)

Device used : cpu


In [13]:
max_epochs = 10
clf.fit(X_train=x_train, y_train=y_train,
               patience=5,max_epochs=max_epochs,
               eval_metric=['auc'])

No early stopping will be performed, last training weights will be used.
epoch 0  | loss: 0.31061 |  0:00:45s
epoch 1  | loss: 0.3031  |  0:01:26s
epoch 2  | loss: 0.30349 |  0:02:10s
epoch 3  | loss: 0.30148 |  0:03:24s
epoch 4  | loss: 0.3042  |  0:04:15s
epoch 5  | loss: 0.30104 |  0:04:55s
epoch 6  | loss: 0.29989 |  0:05:34s
epoch 7  | loss: 0.30325 |  0:06:11s
epoch 8  | loss: 0.3019  |  0:06:48s
epoch 9  | loss: 0.30088 |  0:07:24s


In [14]:
ypred = clf.predict(x_test)
print(f'Accuracy score of TabNet model:{accuracy_score(ypred, y_test)}')

Accuracy score of TabNet model:0.86684
