In [1]:
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score
from tensorflow.keras.callbacks import EarlyStopping

from tabtransformertf.models.tabtransformer import TabTransformer
from tabtransformertf.utils.preprocessing import df_to_dataset, build_categorical_prep


 The versions of TensorFlow you are currently using is 2.7.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


## Download Data

In [2]:
CSV_HEADER = [
    "age",
    "workclass",
    "fnlwgt",
    "education",
    "education_num",
    "marital_status",
    "occupation",
    "relationship",
    "race",
    "gender",
    "capital_gain",
    "capital_loss",
    "hours_per_week",
    "native_country",
    "income_bracket",
]

train_data_url = (
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
)
train_data = pd.read_csv(train_data_url, header=None, names=CSV_HEADER)

test_data_url = (
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test"
)
test_data = pd.read_csv(test_data_url, header=None, names=CSV_HEADER)

print(f"Train dataset shape: {train_data.shape}")
print(f"Test dataset shape: {test_data.shape}")

Train dataset shape: (32561, 15)
Test dataset shape: (16282, 15)


In [3]:
train_data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


## Preprocess

In [4]:
# Column information
NUMERIC_FEATURES = train_data.select_dtypes(include=np.number).columns
CATEGORICAL_FEATURES = train_data.select_dtypes(exclude=np.number).columns[:-1] # exclude label column and DT

FEATURES = list(NUMERIC_FEATURES) + list(CATEGORICAL_FEATURES)
LABEL = 'income_bracket'

In [5]:
# encoding as binary target
train_data[LABEL] = train_data[LABEL].apply(lambda x: int(x == ' >50K')) 
test_data[LABEL] = test_data[LABEL].apply(lambda x: int(x == ' >50K.'))
train_data[LABEL].mean(), test_data[LABEL].mean()

(0.2408095574460244, 0.23621176759611842)

In [6]:
test_data = test_data.iloc[1:, :] # drop invalid row

In [7]:
# Set data types
train_data[CATEGORICAL_FEATURES] = train_data[CATEGORICAL_FEATURES].astype(str)
test_data[CATEGORICAL_FEATURES] = test_data[CATEGORICAL_FEATURES].astype(str)

train_data[NUMERIC_FEATURES] = train_data[NUMERIC_FEATURES].astype(float)
test_data[NUMERIC_FEATURES] = test_data[NUMERIC_FEATURES].astype(float)

In [8]:
# Train/test split
X_train, X_val = train_test_split(train_data, test_size=0.2)

In [9]:
def tf_interp(x, xs, y) -> tf.Tensor:
    # determine the output data type
    ys = tf.convert_to_tensor(ys)
    dtype = ys.dtype
    
    # normalize data types
    ys = tf.cast(ys, tf.float64)
    xs = tf.cast(xs, tf.float64)
    x = tf.cast(x, tf.float64)

    # pad control points for extrapolation
    xs = tf.concat([[xs.dtype.min], xs, [xs.dtype.max]], axis=0)
    ys = tf.concat([ys[:1], ys, ys[-1:]], axis=0)

    # compute slopes, pad at the edges to flatten
    ms = (ys[1:] - ys[:-1]) / (xs[1:] - xs[:-1])
    ms = tf.pad(ms[:-1], [(1, 1)])

    # solve for intercepts
    bs = ys - ms*xs

    # search for the line parameters at each input data point
    # create a grid of the inputs and piece breakpoints for thresholding
    # rely on argmax stopping on the first true when there are duplicates,
    # which gives us an index into the parameter vectors
    i = tf.math.argmax(xs[..., tf.newaxis, :] > x[..., tf.newaxis], axis=-1)
    m = tf.gather(ms, i, axis=-1)
    b = tf.gather(bs, i, axis=-1)

    # apply the linear mapping at each input data point
    y = m*x + b
    return tf.cast(tf.reshape(y, tf.shape(x)), dtype)

In [136]:
from itertools import islice

class PLE(tf.keras.layers.Layer):
    def __init__(self, n_bins=10):
        super(PLE, self).__init__()
        self.n_bins = n_bins

    def adapt(self, data):
        interval = 1/self.n_bins
        bins = [np.quantile(data, q) for q in np.arange(0.0, 1 + interval, interval)]
        init = tf.lookup.KeyValueTensorInitializer([i for i in range(len(bins))], bins)
        self.lookup_table = tf.lookup.StaticHashTable(
            init,
            default_value=-1
        )
        self.lookup_size = self.lookup_table.size()
    
    def other_case(f, k, v):
        e = (f - self.lookup_table.lookup(k-1)) / (v - self.lookup_table.lookup(k-1))
        return e
    
    def ple_number(self, f):
        ple = []
        for i in tf.range(self.lookup_size):
            if i != 0:
                i = tf.cast(i, tf.int32)
                v = self.lookup_table.lookup(i)
                print(f)
                zero = (f < self.lookup_table.lookup(i-1)) & (i > 1)
                one = (f >= v) & (i < n_bins)
                e = tf.cond(zero, lambda: 0., lambda: tf.cond(one, lambda: 1., lambda: other_case(f, i, v)))
                ple.append(e)

        ple = tf.stack(ple, axis=0)
        return ple
                                                              

    def call(self, x):
        x = tf.keras.layers.Flatten(x)
        final_ple = []
        for f in x:
            final_ple.append(self.ple_number(f))

        return tf.stack(final_ple)




In [32]:
d.as_numpy_iterator().next()

array([29., 21., 42., 52., 39., 27., 66., 27., 22., 19., 72., 45., 48.,
       30., 24., 73., 31., 44., 47., 23., 40., 38., 50., 20., 34., 45.,
       42., 31., 33., 33., 17., 44., 37., 49., 21., 53., 25., 53., 17.,
       45., 50., 53., 59., 23., 24., 29., 25., 24., 27., 42., 20., 72.,
       40., 39., 21., 24., 40., 26., 22., 37., 25., 18., 31., 45., 23.,
       44., 45., 28., 29., 19., 34., 51., 30., 57., 56., 42., 37., 26.,
       39., 90., 45., 64., 57., 19., 30., 30., 42., 35., 39., 28., 37.,
       57., 19., 40., 47., 32., 61., 22., 39., 22., 18., 27., 39., 54.,
       29., 23., 19., 31., 21., 19., 37., 21., 25., 73., 18., 53., 35.,
       46., 41., 19., 23., 61., 42., 39., 58., 90., 26., 51., 33., 46.,
       20., 33., 32., 37., 44., 69., 36., 38., 31., 50., 37., 35., 40.,
       30., 39., 42., 60., 18., 40., 44., 39., 28., 45., 18., 41., 52.,
       45., 63., 25., 36., 69., 28., 37., 41., 42., 69., 51., 27., 65.,
       35., 40., 39., 62., 38., 30., 30., 22., 45., 41., 49., 73

In [91]:
from itertools import islice

class PLE(tf.keras.layers.Layer):
    def __init__(self, n_bins=10):
        super(PLE, self).__init__()
        self.n_bins = n_bins

    def adapt(self, data):
        interval = 1/self.n_bins
        bins = [tf.cast(np.quantile(data, q), tf.float32) for q in np.arange(0.0, 1 + interval, interval)]
        init = tf.lookup.KeyValueTensorInitializer([i for i in range(len(bins))], bins)
        self.lookup_table = tf.lookup.StaticHashTable(
            init,
            default_value=-1
        )
        self.lookup_size = self.lookup_table.size()                                                 

    def call(self, x):
        ple_encoding_one = tf.ones((len(x), self.n_bins))
        ple_encoding_zero = tf.zeros((len(x), self.n_bins))
        
        left_masks = tf.zeros([0])
        right_masks = tf.zeros([0])
        other_case = tf.zeros([0])
        
        for i in tf.range(1, self.n_bins+1):
            left_mask = (x < self.lookup_table.lookup(i-1)) & (i > 1)
            right_mask = (x >= self.lookup_table.lookup(i)) & (i < self.n_bins)
            v = (x - self.lookup_table.lookup(i-1)) / (self.lookup_table.lookup(i) - self.lookup_table.lookup(i-1))
            left_masks = tf.concat([left_masks, left_mask], axis=1)
            right_masks = tf.concat([right_masks, right_mask], axis=1)
            other_case = tf.concat([other_case, v], axis=1)
        
        other_mask = right_masks == left_masks # both are false
        other_case = tf.cast(other_case, tf.float32)
        enc = tf.where(left_masks, ple_encoding_zero, ple_encoding_one)
        enc = tf.where(other_mask, other_case, enc)

        return enc


num_emb_layer = PLE(20)
num_emb_layer.adapt(X_train[NUMERIC_FEATURES[0]].astype(np.float32))
b = d.as_numpy_iterator().next()
num_emb_layer(b)[:4, :], b[:4, :]

InvalidArgumentError: Exception encountered when calling layer "ple_31" (type PLE).

cannot compute ConcatV2 as input #1(zero-based) was expected to be a float tensor but is a bool tensor [Op:ConcatV2] name: concat

Call arguments received:
  • x=tf.Tensor(shape=(512, 1), dtype=float32)

In [89]:
my_list = tf.zeros([])
tf.concat([my_list, [2, 2, 3]], axis=0)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([2., 2., 3.], dtype=float32)>

In [95]:
ta = tf.TensorArray(tf.bool, size=0, dynamic_size=True)

In [69]:
my_list = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
my_list.append(0)

AttributeError: 'TensorArray' object has no attribute 'append'

In [60]:
# from numpy.random import randint
# from sklearn.base import BaseEstimator, TransformerMixin


# class PLE(BaseEstimator, TransformerMixin):
#     def __init__(self, bins):
#         self.bins = bins
        
#     def fit(self, X):
#         interval = 1 / self.bins
#         self.bin_dict = {}
#         for f in X.columns:
#             bins = [np.quantile(X[f], q) for q in np.arange(0.0, 1.05, interval)]
#             self.bin_dict[f] = {i: b for i, b in enumerate(bins)}
        
#         return self
    
#     def ple_embed(self, f, bin_dict):
#         ple = []
#         n_bins = len(bin_dict.items())
#         for k, v in islice(bin_dict.items(), 1, None):    
#             if (f < bin_dict[k-1]) & (k > 1):
#                 ple.append(0.)
#             elif (f >= v) & (k < n_bins):
#                 ple.append(1.)
#             else:
#                 e = np.round((f - bin_dict[k-1]) / (v - bin_dict[k-1]), 5)
#                 ple.append(e)

#         return ple
    
#     def transform(self, X):
#         for f in X.columns:
#             X[f] = X[f].apply(lambda x: self.ple_embed(x, self.bin_dict[f]))

#         return X
    
# ple_transfromer = PLE(bins=20)
# ple_transfromer.fit(X_train[NUMERIC_FEATURES])

In [61]:
# X_train[NUMERIC_FEATURES] = ple_transfromer.transform(X_train[NUMERIC_FEATURES])
# X_val[NUMERIC_FEATURES] = ple_transfromer.transform(X_val[NUMERIC_FEATURES])
# test_data[NUMERIC_FEATURES] = ple_transfromer.transform(test_data[NUMERIC_FEATURES])


## Modelling Prep

In [62]:
# Category preprocessing layers
category_prep_layers = build_categorical_prep(X_train, CATEGORICAL_FEATURES)

numerical_prep_layers = {}
# Numerical prep layers
for f in NUMERIC_FEATURES:
    num_emb_layer = PLE(20)
    num_emb_layer.adapt(X_train[f])
    numerical_prep_layers[f] = num_emb_layer

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 235.09it/s]


In [63]:
# To TF Dataset
X_train[NUMERIC_FEATURES] = X_train[NUMERIC_FEATURES].astype(np.float32)
X_val[NUMERIC_FEATURES] = X_val[NUMERIC_FEATURES].astype(np.float32)
test_data[NUMERIC_FEATURES] = test_data[NUMERIC_FEATURES].astype(np.float32)

train_dataset = df_to_dataset(X_train[FEATURES + [LABEL]], LABEL)
val_dataset = df_to_dataset(X_val[FEATURES + [LABEL]], LABEL, shuffle=False)  # No shuffle
test_dataset = df_to_dataset(test_data[FEATURES + [LABEL]], shuffle=False) # No target, no shuffle

## TabTransformer

In [64]:
import tensorflow as tf
from tensorflow.keras.activations import gelu
from tensorflow.keras.layers import (
    Add,
    BatchNormalization,
    Concatenate,
    Dense,
    Dropout,
    Embedding,
    Flatten,
    Layer,
    LayerNormalization,
    MultiHeadAttention,
)
from tabtransformertf.utils.helper import build_mlp
from tabtransformertf.models.tabtransformer import TransformerBlock



class TabTransformerEncoder(tf.keras.Model):
    def __init__(
        self,
        categorical_features: list,
        numerical_features: list,
        categorical_lookup: dict,
        embedding_dim: int = 32,
        depth: int = 4,
        heads: int = 8,
        attn_dropout: float = 0.1,
        ff_dropout: float = 0.1,
        numerical_embeddings: dict = None,
        use_column_embedding: bool = True,
    ):
        """TabTransformer Tensorflow Model
        Args:
            categorical_features (list): names of categorical features
            numerical_features (list): names of numeric features
            categorical_lookup (dict): dictionary with categorical feature names as keys and adapted StringLookup layers as values
            out_dim (int): model output dimensions
            out_activation (str): model output activation
            embedding_dim (int, optional): embedding dimensions. Defaults to 32.
            depth (int, optional): number of transformer blocks. Defaults to 4.
            heads (int, optional): number of attention heads. Defaults to 8.
            attn_dropout (float, optional): dropout rate in transformer. Defaults to 0.1.
            ff_dropout (float, optional): dropout rate in mlps. Defaults to 0.1.
            mlp_hidden_factors (list[int], optional): numbers by which we divide dimensionality. Defaults to [2, 4].
            numerical_discretisers (dict, optional): dictionary with numerical feature names as keys and adapted Discretizer and IntegerLookup layers as values. Defaults to None.
            use_column_embedding (bool, optional): flag to use fixed column positional embeddings. Defaults to True.
        """

        super(TabTransformerEncoder, self).__init__()
        self.numerical = numerical_features
        self.categorical = categorical_features
        self.embed_numeric = numerical_embeddings is not None
        self.num_categories = [
            categorical_lookup[c].vocabulary_size() for c in self.categorical
        ]

        # ---------- Numerical Input -----------
        if len(self.numerical) > 0:
            # If we want to quantise numeric features
            if self.embed_numeric:
                # Layers to transform numeric into embedding
                self.numerical_embeddings = numerical_embeddings
                # Linear layer after embedding
                self.numerical_embedding_linear = [
                    Dense(embedding_dim, activation='relu') for n in self.numerical
                ]
                
            else:
                # If not quantising, then simply normalise and concatenate
                self.continuous_normalization = LayerNormalization()
                self.numerical_concatenation = Concatenate(axis=1)

        # ---------- Categorical Input -----------

        # String lookups for categorical
        self.categorical_lookups = [categorical_lookup[c] for c in self.categorical]

        # Categorical input embedding
        self.cat_embedding_layers = []
        for number_of_classes in self.num_categories:
            category_embedding = Embedding(
                input_dim=number_of_classes, output_dim=embedding_dim
            )
            self.cat_embedding_layers.append(category_embedding)

        # Column embedding
        self.use_column_embedding = use_column_embedding
        if use_column_embedding:
            num_columns = len(self.categorical)
            if self.embed_numeric:
                num_columns += len(self.numerical)
            self.column_embedding = Embedding(
                input_dim=num_columns, output_dim=embedding_dim
            )
            self.column_indices = tf.range(start=0, limit=num_columns, delta=1)

        # Embedding concatenation layer
        self.embedded_concatenation = Concatenate(axis=1)

        # adding transformers
        self.transformers = []
        for _ in range(depth):
            self.transformers.append(
                TransformerBlock(
                    embedding_dim,
                    heads,
                    embedding_dim,
                    att_dropout=attn_dropout,
                    ff_dropout=ff_dropout,
                )
            )
        self.flatten_transformer_output = Flatten()

        # MLP
        self.pre_mlp_concatenation = Concatenate()

    def call(self, inputs):
        numerical_feature_list = []
        categorical_feature_list = []
        print(self.numerical_embeddings)

        if len(self.numerical) > 0:
            # Each numeric feature needs to be binned, looked up, and embedded
            for i, n in enumerate(self.numerical):
                if self.embed_numeric:
                    print(self.numerical_embeddings[n])
                    num_embedded = self.numerical_embeddings[n](inputs[n])
                    print(num_embedded)
                    num_embedded = self.numerical_embedding_linear[i](num_embedded)
                    numerical_feature_list.append(num_embedded)
                else:
                    # Otherwise we pass it as it is
                    numerical_feature_list.append(inputs[n])
        
        print('passed numeric')

        for i, c in enumerate(self.categorical):
            cat_encoded = self.categorical_lookups[i](inputs[c])
            cat_embedded = self.cat_embedding_layers[i](cat_encoded)
            categorical_feature_list.append(cat_embedded)

        if self.embed_numeric:
            # Stack categorical embeddings for the Tansformer.
            transformer_inputs = self.embedded_concatenation(
                numerical_feature_list + categorical_feature_list
            )
        else:
            transformer_inputs = self.embedded_concatenation(categorical_feature_list)

        if self.use_column_embedding:
            # Add column embeddings
            transformer_inputs += self.column_embedding(self.column_indices)

        for transformer in self.transformers:
            transformer_inputs = transformer(transformer_inputs)

        # Flatten the "contextualized" embeddings of the features.
        mlp_input = self.flatten_transformer_output(transformer_inputs)

        # In case we don't quantize, we want to normalise and concatenate numerical features with embeddings
        if (self.embed_numeric is False) and (len(self.numerical) > 0):
            numerical_inputs = self.numerical_concatenation(numerical_feature_list)
            numerical_inputs = self.continuous_normalization(numerical_inputs)
            mlp_input = self.pre_mlp_concatenation([mlp_input, numerical_inputs])

        return mlp_input



In [65]:
tab = TabTransformerEncoder(
    numerical_features = NUMERIC_FEATURES,
    categorical_features = CATEGORICAL_FEATURES,
    categorical_lookup=category_prep_layers,
    numerical_embeddings=numerical_prep_layers,
    embedding_dim=16
)
preds = tab.predict(val_dataset)

{'age': <__main__.PLE object at 0x7fd890841310>, 'fnlwgt': <__main__.PLE object at 0x7fd88ff7c850>, 'education_num': <__main__.PLE object at 0x7fd8912a2700>, 'capital_gain': <__main__.PLE object at 0x7fd8900261f0>, 'capital_loss': <__main__.PLE object at 0x7fd890834880>, 'hours_per_week': <__main__.PLE object at 0x7fd890834b80>}
<__main__.PLE object at 0x7fd890841310>
[<tf.Tensor 'tab_transformer_encoder_1/ple_25/while/and:0' shape=(None, 1) dtype=bool>]


InaccessibleTensorError: in user code:

    File "/Users/antonsruberts/miniconda/envs/blog/lib/python3.9/site-packages/keras/engine/training.py", line 1621, in predict_function  *
        return step_function(self, iterator)
    File "/Users/antonsruberts/miniconda/envs/blog/lib/python3.9/site-packages/keras/engine/training.py", line 1611, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/antonsruberts/miniconda/envs/blog/lib/python3.9/site-packages/keras/engine/training.py", line 1604, in run_step  **
        outputs = model.predict_step(data)
    File "/Users/antonsruberts/miniconda/envs/blog/lib/python3.9/site-packages/keras/engine/training.py", line 1572, in predict_step
        return self(x, training=False)
    File "/Users/antonsruberts/miniconda/envs/blog/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None

    InaccessibleTensorError: Exception encountered when calling layer "tab_transformer_encoder_1" (type TabTransformerEncoder).
    
    in user code:
    
        File "/var/folders/66/1klxbkpn5vdgpvqwt_hmtn5c0000gn/T/ipykernel_39366/3156529361.py", line 129, in call  *
            num_embedded = self.numerical_embeddings[n](inputs[n])
        File "/Users/antonsruberts/miniconda/envs/blog/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
    
        InaccessibleTensorError: Exception encountered when calling layer "ple_25" (type PLE).
        
        in user code:
        
            File "/var/folders/66/1klxbkpn5vdgpvqwt_hmtn5c0000gn/T/ipykernel_39366/528994165.py", line 35, in call  *
                left_masks = tf.concat(left_masks, axis=1)
        
            InaccessibleTensorError: tf.Graph captured an external symbolic tensor. The symbolic tensor <tf.Tensor 'tab_transformer_encoder_1/ple_25/while/and:0' shape=(None, 1) dtype=bool> is captured by FuncGraph(name=predict_function, id=140568045454720), but it is defined at FuncGraph(name=tab_transformer_encoder_1_ple_25_while_body_17721, id=140568118298656). A tf.Graph is not allowed to capture symoblic tensors from another graph. Use return values, explicit Python locals or TensorFlow collections to access it. Please see https://www.tensorflow.org/guide/function#all_outputs_of_a_tffunction_must_be_return_values for more information.
            
        
        
        Call arguments received:
          • x=tf.Tensor(shape=(None, 1), dtype=float32)
    
    
    Call arguments received:
      • inputs={'age': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'fnlwgt': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'education_num': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'capital_gain': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'capital_loss': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'hours_per_week': 'tf.Tensor(shape=(None, 1), dtype=float32)', 'workclass': 'tf.Tensor(shape=(None, 1), dtype=string)', 'education': 'tf.Tensor(shape=(None, 1), dtype=string)', 'marital_status': 'tf.Tensor(shape=(None, 1), dtype=string)', 'occupation': 'tf.Tensor(shape=(None, 1), dtype=string)', 'relationship': 'tf.Tensor(shape=(None, 1), dtype=string)', 'race': 'tf.Tensor(shape=(None, 1), dtype=string)', 'gender': 'tf.Tensor(shape=(None, 1), dtype=string)', 'native_country': 'tf.Tensor(shape=(None, 1), dtype=string)'}


In [11]:
tabtransformer = TabTransformer(
    numerical_features = NUMERIC_FEATURES,
    categorical_features = CATEGORICAL_FEATURES,
    categorical_lookup=category_prep_layers,
    numerical_embeddings=
    numerical_discretisers=None, # simply passing the numeric features
    embedding_dim=32,
    out_dim=1,
    out_activation='sigmoid',
    depth=4,
    heads=8,
    attn_dropout=0.2,
    ff_dropout=0.2,
    mlp_hidden_factors=[2, 4],
    use_column_embedding=True,
)

In [12]:
LEARNING_RATE = 0.0001
WEIGHT_DECAY = 0.0001
NUM_EPOCHS = 1000

optimizer = tfa.optimizers.AdamW(
        learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )

tabtransformer.compile(
    optimizer = optimizer,
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics= [tf.keras.metrics.AUC(name="PR AUC", curve='PR')],
)

In [13]:
early = EarlyStopping(monitor="val_loss", mode="min", patience=10, restore_best_weights=True)
callback_list = [early]

history = tabtransformer.fit(
    train_dataset, 
    epochs=NUM_EPOCHS, 
    validation_data=val_dataset,
    callbacks=callback_list
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000


In [14]:
test_preds = tabtransformer.predict(test_dataset)

In [15]:
print("Test ROC AUC:", np.round(roc_auc_score(test_data[LABEL], test_preds.ravel()), 4))
print("Test PR AUC:", np.round(average_precision_score(test_data[LABEL], test_preds.ravel()), 4))

Test ROC AUC: 0.8956
Test PR AUC: 0.7343
