## TabNet Implentation for Tabular Data

TabNet is proposed in [this article] (https://arxiv.org/abs/1908.07442) as a neural network architecture capable of learning a canonical representation of tabular data. This architecture has shown to perform well against the current gold-standard gradient boosting models for learning on tabular data.

TabNet uses a sequential attention mechanism to choose a subset of semantically meaningful
features to process at each decision step. Instance-wise feature selection enables efficient learning as the model capacity is fully used for the most salient features, and also yields
more interpretable decision making via visualization of selection masks. 

**Taken**

This implementation closely follows [the TabNet implementation in PyTorch linked here](https://github.com/dreamquark-ai/tabnet/tree/b6e1ebaf694f37ad40a6ba525aa016fd3cec15da). The description of that implementation is [explained in this helpful video by Sebastian Fischman](https://www.youtube.com/watch?v=ysBaZO8YmX8).

<img src="images/tabnet_schematic.jpg" width="1000" height="800" align="center"/>

In [1]:
import multiprocessing as mp
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow_addons.activations import sparsemax
from typing import Optional, Union, Tuple

import global_variables as gv
import utilities

from sklearn.model_selection import train_test_split

In [2]:
target = gv.outcomes[2]

In [None]:
# import preprocessed dataset 
  
df0 = pd.read_csv(gv.tabnet_data)
df0.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
df_inputs = df0.iloc[:,:61]

In [3]:
df1 = pd.read_csv(gv.data_link)
pd.set_option('display.max_columns', None)
df1.drop('Unnamed: 0', axis=1, inplace=True)
df1.head()

Unnamed: 0,30850-0.0,30780-0.0,30690-0.0,30790-0.0,23101-0.0,23099-0.0,48-0.0,23100-0.0,30710-0.0,30760-0.0,30640-0.0,30750-0.0,49-0.0,30770-0.0,30740-0.0,30630-0.0,30870-0.0,21001-0.0,1488-0.0,4079-0.0,1299-0.0,21003-0.0,1160-0.0,1438-0.0,4080-0.0,1458-0.0,1528-0.0,1319-0.0,845-0.0,1289-0.0,1309-0.0,1418-0.0,1329-0.0,1220-0.0,1428-0.0,1249-0.0,1349-0.0,1369-0.0,20117-0.0,2100-0.0,2654-0.0,1339-0.0,21000-0.0,2050-0.0,1408-0.0,1200-0.0,1538-0.0,31-0.0,6138-0.0,1359-0.0,1389-0.0,1478-0.0,2090-0.0,1508-0.0,1379-0.0,6142-0.0,1468-0.0,1548-0.0,1239-0.0,1448-0.0,hypertension,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,outcome_myocardial_infarction,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia,multi-labels,age,gender,race
0,0.508,3.888,6.477,65.1984,45.2,35.6,74.0,25.0,0.34,1.706,1.211,35.065,102.0,26.339,5.622,1.593,0.977,24.579,6.0,77.0,10.0,54.0,7.0,10.0,110.0,3.73,2.0,0.0,23.52,6.0,2.0,3,2,0,0,1,1,1,2,1,6,2,0,2,1,3,2,0,1,2,1,1,1,3,1,1,3,2,0,3,0,0,0,0,0,0,0,0,1,"[0, 0, 0, 0, 0, 0, 0, 1]",54,Female,British
1,13.088,3.52,5.512,15.4,74.6,36.5,120.0,42.9,3.94,1.173,1.019,40.9,113.0,10.701,5.052,1.39,2.358,35.0861,2.0,91.0,2.0,65.0,9.0,12.0,166.0,7.0,2.4,0.0,16.0,2.0,1.0,2,2,0,1,1,4,2,2,0,7,2,0,1,3,2,0,1,3,3,1,1,0,2,2,1,5,2,0,1,1,0,1,0,1,0,0,0,0,"[1, 0, 1, 0, 0, 0, 0, 0]",65,Male,British
2,9.73364,4.10892,6.47949,50.8588,71.7,29.7,112.0,30.3,3.88,1.58546,1.22432,84.1,107.0,18.763,13.71763,1.74423,2.78764,30.7934,0.0,99.0,2.0,55.0,7.0,10.0,135.0,7.0,2.0,0.0,21.0,3.0,1.0,2,1,0,0,1,2,1,2,0,7,2,0,1,2,2,1,1,3,3,2,1,0,2,2,1,4,2,0,3,1,0,1,0,0,0,1,1,1,"[0, 0, 1, 0, 0, 1, 1, 1]",55,Male,British
3,1.788,2.887,5.565,56.5183,40.2,29.8,67.0,17.0,0.87,2.115,0.81,36.4,91.0,31.672,4.827,1.891,1.157,20.7577,0.0,71.0,5.0,49.0,8.0,14.0,116.0,5.0,3.0,1.0,18.0,5.0,1.0,2,2,0,0,4,1,2,2,0,7,2,2,1,2,1,2,0,6,2,2,1,0,2,2,1,3,2,0,3,0,0,0,0,0,0,0,0,1,"[0, 0, 0, 0, 0, 0, 0, 1]",49,Female,Irish
4,0.756,2.67,4.68,4.77,46.5,30.1,85.0,20.0,0.18,1.493,0.733,34.2,105.0,42.209,5.063,1.869,1.677,25.9766,7.0,73.0,4.0,61.0,7.0,2.0,113.0,7.0,4.0,2.0,16.0,3.0,3.0,3,2,1,1,4,1,1,2,0,7,3,0,1,3,1,0,0,3,3,1,2,0,1,1,1,4,2,0,3,1,0,0,0,1,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]",61,Female,British


## Build TabNet Architecture

#### GLU Block

Gated Linear Units act as an attention mechanism where the gates formed involve taking two dense layer outputs, applying a sigmoid to one of them, and then multiplying them together

Following GLU blcok contains two dense layers, two ghost batch normalization layers, identity and sigmoid activation functions and multiplication operation.

In [25]:
class GLUBlock(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None,
                 virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(GLUBlock, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
            
        self.fc_outout = tf.keras.layers.Dense(self.units, 
                                               use_bias=False)
        self.bn_outout = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                            momentum=self.momentum)
        
        self.fc_gate = tf.keras.layers.Dense(self.units, 
                                             use_bias=False)
        self.bn_gate = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                          momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None):
        output = self.bn_outout(self.fc_outout(inputs), 
                                training=training)
        gate = self.bn_gate(self.fc_gate(inputs), 
                            training=training)
    
        return output * tf.keras.activations.sigmoid(gate) # GLU  

### Feature Transformer Block

Builds two GLU blocks with a skip connection from the output of the first

<img src="images/tabnet_feature_transformer.jpg" width="700" height="500" align="center"/>

In [5]:
class FeatureTransformerBlock(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] = 0.02, skip=False):
        super(FeatureTransformerBlock, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        self.skip = skip
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
        
        self.initial = GLUBlock(units = self.units, 
                                virtual_batch_size=self.virtual_batch_size, 
                                momentum=self.momentum)
        self.residual =  GLUBlock(units = self.units, 
                                  virtual_batch_size=self.virtual_batch_size, 
                                  momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None):
        initial = self.initial(inputs, training=training)
        
        if self.skip == True:
            initial += inputs

        residual = self.residual(initial, training=training) # skip
        
        return (initial + residual) * np.sqrt(0.5) # decision and attention dimension

#### Attentive Transformer Block

Use TabNet prior as an input to layer and reserve to handle prior updates in TabNet step layer

> *prior is used to encourage orthogonal feature selection across decision steps, tell us what we know about features and how we have used them in the previous step

<img src="images/tabnet_attentive_transformer.jpg" width="200" height="200" align="center"/>

In [6]:
class AttentiveTransformer(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(AttentiveTransformer, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
            
        self.fc = tf.keras.layers.Dense(self.units, 
                                        use_bias=False)
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                     momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], priors: Optional[Union[tf.Tensor, np.ndarray]] = None, training: Optional[bool] = None) -> tf.Tensor:
        feature = self.bn(self.fc(inputs), 
                          training=training)
        if priors is None:
            output = feature
        else:
            output = feature * priors
        
        return tfa.activations.sparsemax(output)

### TabNetStep

Inputs are batch normalized features, output a shared feature transformer, and priors of the current step. Block outputs the feature embedding at our split point, the masked feature to used in teh shared feature transformer blcok of the next step, and the mask used in our attention operation. Block is built from the Feature Transformer block and AttentiveTransformer block

> Mask provided local and global feature attributions for each output

In [7]:
class TabNetStep(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] =0.02):
        super(TabNetStep, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
        
        self.unique = FeatureTransformerBlock(units = self.units, 
                                              virtual_batch_size=self.virtual_batch_size, 
                                              momentum=self.momentum,
                                              skip=True)
        self.attention = AttentiveTransformer(units = input_shape[-1], 
                                              virtual_batch_size=self.virtual_batch_size, 
                                              momentum=self.momentum)
        
    def call(self, inputs, shared, priors, training=None) -> Tuple[tf.Tensor]:  
        split = self.unique(shared, training=training)
        keys = self.attention(split, priors, training=training)
        masked = keys * inputs
        
        return split, masked, keys

### TabNet Encoder

Entire model architecture as a single layer. Accumulates feature embeddings at each desicion step, updates priors, and computes entropy loss to limit the frequency of feature use across steps.

In [8]:
class TabNetEncoder(tf.keras.layers.Layer):
    def __init__(self, units: int =1, 
                 n_steps: int = 3, 
                 n_features: int = 8,
                 outputs: int = 1, 
                 gamma: float = 1.3,
                 epsilon: float = 1e-8, 
                 sparsity: float = 1e-5, 
                 virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] =0.02):
        super(TabNetEncoder, self).__init__()
        
        self.units = units
        self.n_steps = n_steps
        self.n_features = n_features
        self.virtual_batch_size = virtual_batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.momentum = momentum
        self.sparsity = sparsity
        
    def build(self, input_shape: tf.TensorShape):            
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size,
        momentum=self.momentum)
        self.shared_block = FeatureTransformerBlock(units = self.n_features, 
                                                    virtual_batch_size=self.virtual_batch_size, 
                                                    momentum=self.momentum)        
        self.initial_step = TabNetStep(units = self.n_features, 
                                       virtual_batch_size=self.virtual_batch_size, 
                                       momentum=self.momentum)
        self.steps = [TabNetStep(units = self.n_features, 
                                 virtual_batch_size=self.virtual_batch_size, 
                                 momentum=self.momentum) for _ in range(self.n_steps)]
        self.final = tf.keras.layers.Dense(units = self.units, 
                                           use_bias=False) 
        
    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> Tuple[tf.Tensor]:        
        entropy_loss = 0.
        encoded = 0.
        output = 0.
        importance = 0.
        prior = tf.reduce_mean(tf.ones_like(X), axis=0)
        
        B = prior * self.bn(X, training=training)
        shared = self.shared_block(B, training=training)
        _, masked, keys = self.initial_step(B, shared, prior, training=training)

        for step in self.steps:
            entropy_loss += tf.reduce_mean(tf.reduce_sum(-keys * tf.math.log(keys + self.epsilon), axis=-1)) / tf.cast(self.n_steps, tf.float32)
            prior *= (self.gamma - tf.reduce_mean(keys, axis=0))
            importance += keys
            
            shared = self.shared_block(masked, training=training)
            split, masked, keys = step(B, shared, prior, training=training)
            features = tf.keras.activations.relu(split)
            
            output += features
            encoded += split
            
        self.add_loss(self.sparsity * entropy_loss)
          
        prediction = self.final(output)
        return prediction, encoded, importance
            

### apply to dataset

In [9]:
df1.loc[:, gv.numerical_cols+gv.continuous_cols] = df1.loc[:, gv.numerical_cols+gv.continuous_cols].astype(np.float32)
df1.loc[:, gv.categorical_cols] = df1.loc[:, gv.categorical_cols].astype(str)

In [10]:
def get_labels(x: pd.Series) -> pd.Series:
    """
    Converts strings to unqiue ints for use in Pytorch Embedding
    """
    labels, levels = pd.factorize(x)
    return pd.Series(labels, name=x.name, index=x.index)

X, E, y = (df1
           .loc[:, gv.numerical_cols+gv.continuous_cols]
           .astype('float32')
           .join(pd.get_dummies(df1.loc[:, gv.categorical_cols])),
           df1
           .loc[:, gv.numerical_cols+gv.continuous_cols]
           .astype('float32')
           .join(df1.loc[:, gv.categorical_cols].apply(get_labels).add(1).astype('int32')),
           df1[target] == 'Y')

X_train, X_valid, E_train, E_valid, y_train, y_valid = train_test_split(X.to_numpy(), E, y.to_numpy(), test_size=0.2)

In [17]:
def get_feature(x: pd.DataFrame, dimension=1) -> Union[tf.feature_column.numeric_column,tf.feature_column.embedding_column]:
    if x.dtype == np.float32:
        return tf.feature_column.numeric_column(x.name)
    else:
        return tf.feature_column.embedding_column(
        tf.feature_column.categorical_column_with_identity(x.name, num_buckets=x.max() + 1, default_value=0),
        dimension=dimension)
    
def df_to_dataset(X: pd.DataFrame, y: pd.Series, shuffle=False, batch_size=50000):
    ds = tf.data.Dataset.from_tensor_slices((dict(X.copy()), y.copy()))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(X))
    ds = ds.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return ds

columns = [get_feature(f) for k, f in E_train.iteritems()]
feature_column = tf.keras.layers.DenseFeatures(columns, trainable=True)

train, valid = df_to_dataset(E_train, y_train), df_to_dataset(E_valid, y_valid)

In [24]:
valid

<PrefetchDataset element_spec=({'1488-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '4079-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1299-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '21003-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1160-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1438-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '4080-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1458-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1528-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1319-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '845-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1289-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '1309-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '30850-0.0': TensorSpec(shape=(None,), dtype=tf.float32, name=None), '30780-0.0': Te

In [21]:
class TabNetClassifier(tf.keras.Model):
    def __init__(self, outputs: int = 1, 
                 n_steps: int = 3, 
                 n_features: int = 61,
                 gamma: float = 1.3, 
                 epsilon: float = 1e-8, 
                 sparsity: float = 1e-5, 
                 feature_column: Optional[tf.keras.layers.DenseFeatures] = None, 
                 pretrained_encoder: Optional[tf.keras.layers.Layer] = None,
                 virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(TabNetClassifier, self).__init__()
        
        self.outputs = outputs
        self.n_steps = n_steps
        self.n_features = n_features
        self.feature_column = feature_column
        self.pretrained_encoder = pretrained_encoder
        self.virtual_batch_size = virtual_batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.momentum = momentum
        self.sparsity = sparsity
        
        if feature_column is None:
            self.feature = tf.keras.layers.Lambda(identity)
        else:
            self.feature = feature_column
        if pretrained_encoder is None:
            self.encoder = TabNetEncoder(units=outputs, 
                                        n_steps=n_steps, 
                                        n_features = n_features,
                                        outputs=outputs, 
                                        gamma=gamma, 
                                        epsilon=epsilon, 
                                        sparsity=sparsity,
                                        virtual_batch_size=self.virtual_batch_size, 
                                        momentum=momentum)
        else:
            self.encoder = pretrained_encoder

    def forward(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> Tuple[tf.Tensor]:
        X = self.feature(X)
        output, encoded, importance = self.encoder(X)
          
        prediction = tf.keras.activations.sigmoid(output)
        return prediction, encoded, importance
    
    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        prediction, _, _ = self.forward(X)
        return prediction
    
    def transform(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        _, encoded, _ = self.forward(X)
        return encoded
    
    def explain(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        _, _, importance = self.forward(X)
        return importance    

In [26]:
m = TabNetClassifier(outputs=1, n_steps=3, n_features = 2, feature_column=feature_column, virtual_batch_size=250)
m.compile(tf.keras.optimizers.Adam(learning_rate=0.025), tf.keras.losses.binary_crossentropy)
m.fit(train, epochs=100)

Epoch 1/100


NameError: in user code:

    File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\engine\training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_filefime2xxa.py", line 10, in tf__call
        (prediction, _, _) = ag__.converted_call(ag__.ld(self).forward, (ag__.ld(X),), None, fscope)
    File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_fileoqzqbhwa.py", line 11, in tf__forward
        (output, encoded, importance) = ag__.converted_call(ag__.ld(self).encoder, (ag__.ld(X),), None, fscope)
    File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_filevqecsale.py", line 17, in tf__call
        (_, masked, keys) = ag__.converted_call(ag__.ld(self).initial_step, (ag__.ld(B), ag__.ld(shared), ag__.ld(prior)), dict(training=ag__.ld(training)), fscope)
    File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_file__tv5qyc.py", line 11, in tf__call
        keys = ag__.converted_call(ag__.ld(self).attention, (ag__.ld(split), ag__.ld(priors)), dict(training=ag__.ld(training)), fscope)
    File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_filetda3k_81.py", line 30, in tf__call
        retval_ = ag__.converted_call(ag__.ld(tfa).activations.sparsemax, (ag__.ld(output),), None, fscope)

    NameError: Exception encountered when calling layer "tab_net_classifier_3" (type TabNetClassifier).
    
    in user code:
    
        File "C:\Users\anali\AppData\Local\Temp/ipykernel_21548/1448419760.py", line 50, in call  *
            prediction, _, _ = self.forward(X)
        File "C:\Users\anali\AppData\Local\Temp/ipykernel_21548/1448419760.py", line 44, in forward  *
            output, encoded, importance = self.encoder(X)
        File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_filevqecsale.py", line 17, in tf__call
            (_, masked, keys) = ag__.converted_call(ag__.ld(self).initial_step, (ag__.ld(B), ag__.ld(shared), ag__.ld(prior)), dict(training=ag__.ld(training)), fscope)
        File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_file__tv5qyc.py", line 11, in tf__call
            keys = ag__.converted_call(ag__.ld(self).attention, (ag__.ld(split), ag__.ld(priors)), dict(training=ag__.ld(training)), fscope)
        File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_filetda3k_81.py", line 30, in tf__call
            retval_ = ag__.converted_call(ag__.ld(tfa).activations.sparsemax, (ag__.ld(output),), None, fscope)
    
        NameError: Exception encountered when calling layer "tab_net_encoder_3" (type TabNetEncoder).
        
        in user code:
        
            File "C:\Users\anali\AppData\Local\Temp/ipykernel_21548/52959058.py", line 46, in call  *
                _, masked, keys = self.initial_step(B, shared, prior, training=training)
            File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
            File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_file__tv5qyc.py", line 11, in tf__call
                keys = ag__.converted_call(ag__.ld(self).attention, (ag__.ld(split), ag__.ld(priors)), dict(training=ag__.ld(training)), fscope)
            File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_filetda3k_81.py", line 30, in tf__call
                retval_ = ag__.converted_call(ag__.ld(tfa).activations.sparsemax, (ag__.ld(output),), None, fscope)
        
            NameError: Exception encountered when calling layer "tab_net_step" (type TabNetStep).
            
            in user code:
            
                File "C:\Users\anali\AppData\Local\Temp/ipykernel_21548/828386580.py", line 23, in call  *
                    keys = self.attention(split, priors, training=training)
                File "C:\Users\anali\AppData\Roaming\Python\Python38\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
                    raise e.with_traceback(filtered_tb) from None
                File "C:\Users\anali\AppData\Local\Temp\__autograph_generated_filetda3k_81.py", line 30, in tf__call
                    retval_ = ag__.converted_call(ag__.ld(tfa).activations.sparsemax, (ag__.ld(output),), None, fscope)
            
                NameError: Exception encountered when calling layer "attentive_transformer" (type AttentiveTransformer).
                
                in user code:
                
                    File "C:\Users\anali\AppData\Local\Temp/ipykernel_21548/525209892.py", line 26, in call  *
                        return tfa.activations.sparsemax(output)
                
                    NameError: name 'tfa' is not defined
                
                
                Call arguments received by layer "attentive_transformer" (type AttentiveTransformer):
                  • inputs=tf.Tensor(shape=(None, 2), dtype=float32)
                  • priors=tf.Tensor(shape=(61,), dtype=float32)
                  • training=True
            
            
            Call arguments received by layer "tab_net_step" (type TabNetStep):
              • inputs=tf.Tensor(shape=(None, 61), dtype=float32)
              • shared=tf.Tensor(shape=(None, 2), dtype=float32)
              • priors=tf.Tensor(shape=(61,), dtype=float32)
              • training=True
        
        
        Call arguments received by layer "tab_net_encoder_3" (type TabNetEncoder):
          • X=tf.Tensor(shape=(None, 61), dtype=float32)
          • training=True
    
    
    Call arguments received by layer "tab_net_classifier_3" (type TabNetClassifier):
      • X={'1488-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '4079-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1299-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '21003-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1160-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1438-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '4080-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1458-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1528-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1319-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '845-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1289-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1309-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30850-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30780-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30690-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30790-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '23101-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '23099-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '48-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '23100-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30710-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30760-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30640-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30750-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '49-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30770-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30740-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30630-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '30870-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '21001-0.0': 'tf.Tensor(shape=(None,), dtype=float32)', '1418-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1329-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1220-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1428-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1249-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1349-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1369-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '20117-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '2100-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '2654-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1339-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '21000-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '2050-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1408-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1200-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1538-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '31-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '6138-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1359-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1389-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1478-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '2090-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1508-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1379-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '6142-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1468-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1548-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1239-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', '1448-0.0': 'tf.Tensor(shape=(None,), dtype=int32)', 'hypertension': 'tf.Tensor(shape=(None,), dtype=int32)'}
      • training=True
