<a href="https://colab.research.google.com/github/dmnk1308/DubAir/blob/main/TabNet_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import tensorflow as tf
import pandas as pd
import numpy as np
from typing import Optional, Union, Tuple
import tensorflow_addons as tfa

class GLUBlock(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None,
                 virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(GLUBlock, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
            
        self.fc_outout = tf.keras.layers.Dense(self.units, 
                                               use_bias=False)
        self.bn_outout = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                            momentum=self.momentum)
        
        self.fc_gate = tf.keras.layers.Dense(self.units, 
                                             use_bias=False)
        self.bn_gate = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                          momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None):
        output = self.bn_outout(self.fc_outout(inputs), 
                                training=training)
        gate = self.bn_gate(self.fc_gate(inputs), 
                            training=training)
    
        return output * tf.keras.activations.sigmoid(gate) # GLU

class FeatureTransformerBlock(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] = 0.02, skip=False):
        super(FeatureTransformerBlock, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        self.skip = skip
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
        
        self.initial = GLUBlock(units = self.units, 
                                virtual_batch_size=self.virtual_batch_size, 
                                momentum=self.momentum)
        self.residual =  GLUBlock(units = self.units, 
                                  virtual_batch_size=self.virtual_batch_size, 
                                  momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None):
        initial = self.initial(inputs, training=training)
        
        if self.skip == True:
            initial += inputs

        residual = self.residual(initial, training=training) # skip
        
        return (initial + residual) * np.sqrt(0.5)

class AttentiveTransformer(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(AttentiveTransformer, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
            
        self.fc = tf.keras.layers.Dense(self.units, 
                                        use_bias=False)
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                     momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], priors: Optional[Union[tf.Tensor, np.ndarray]] = None, training: Optional[bool] = None) -> tf.Tensor:
        feature = self.bn(self.fc(inputs), 
                          training=training)
        if priors is None:
            output = feature
        else:
            output = feature * priors
        
        return tfa.activations.sparsemax(output)

class TabNetStep(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] =0.02):
        super(TabNetStep, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
        
        self.unique = FeatureTransformerBlock(units = self.units, 
                                              virtual_batch_size=self.virtual_batch_size, 
                                              momentum=self.momentum,
                                              skip=True)
        self.attention = AttentiveTransformer(units = input_shape[-1], 
                                              virtual_batch_size=self.virtual_batch_size, 
                                              momentum=self.momentum)
        
    def call(self, inputs, shared, priors, training=None) -> Tuple[tf.Tensor]:  
        split = self.unique(shared, training=training)
        keys = self.attention(split, priors, training=training)
        masked = keys * inputs
        
        return split, masked, keys

class TabNetEncoder(tf.keras.layers.Layer):
    def __init__(self, units: int =1, 
                 n_steps: int = 3, 
                 n_features: int = 8,
                 outputs: int = 1, 
                 gamma: float = 1.3,
                 epsilon: float = 1e-8, 
                 sparsity: float = 1e-5, 
                 virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] =0.02):
        super(TabNetEncoder, self).__init__()
        
        self.units = units
        self.n_steps = n_steps
        self.n_features = n_features
        self.virtual_batch_size = virtual_batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.momentum = momentum
        self.sparsity = sparsity
        
    def build(self, input_shape: tf.TensorShape):            
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                     momentum=self.momentum)
        self.shared_block = FeatureTransformerBlock(units = self.n_features, 
                                                    virtual_batch_size=self.virtual_batch_size, 
                                                    momentum=self.momentum)        
        self.initial_step = TabNetStep(units = self.n_features, 
                                       virtual_batch_size=self.virtual_batch_size, 
                                       momentum=self.momentum)
        self.steps = [TabNetStep(units = self.n_features, 
                                 virtual_batch_size=self.virtual_batch_size, 
                                 momentum=self.momentum) for _ in range(self.n_steps)]
        self.final = tf.keras.layers.Dense(units = self.units, 
                                           use_bias=False)
    

    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> Tuple[tf.Tensor]:        
        entropy_loss = 0.
        encoded = 0.
        output = 0.
        importance = 0.
        prior = tf.reduce_mean(tf.ones_like(X), axis=0)
        
        B = prior * self.bn(X, training=training)
        shared = self.shared_block(B, training=training)
        _, masked, keys = self.initial_step(B, shared, prior, training=training)

        for step in self.steps:
            entropy_loss += tf.reduce_mean(tf.reduce_sum(-keys * tf.math.log(keys + self.epsilon), axis=-1)) / tf.cast(self.n_steps, tf.float32)
            prior *= (self.gamma - tf.reduce_mean(keys, axis=0))
            importance += keys
            
            shared = self.shared_block(masked, training=training)
            split, masked, keys = step(B, shared, prior, training=training)
            features = tf.keras.activations.relu(split)
            
            output += features
            encoded += split
            
        self.add_loss(self.sparsity * entropy_loss)
          
        prediction = self.final(output)
        return prediction, encoded, importance

class TabNetRegression(tf.keras.Model):
    def __init__(self, outputs: int = 1, 
                 n_steps: int = 3, 
                 n_features: int = 8,
                 gamma: float = 1.3, 
                 epsilon: float = 1e-8, 
                 sparsity: float = 1e-5, 
                 feature_column: Optional[tf.keras.layers.DenseFeatures] = None, 
                 pretrained_encoder: Optional[tf.keras.layers.Layer] = None,
                 virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(TabNetRegression, self).__init__()
        
        self.outputs = outputs
        self.n_steps = n_steps
        self.n_features = n_features
        self.feature_column = feature_column
        self.pretrained_encoder = pretrained_encoder
        self.virtual_batch_size = virtual_batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.momentum = momentum
        self.sparsity = sparsity
        
        if feature_column is None:
            self.feature = tf.keras.layers.Lambda(identity)
        else:
            self.feature = feature_column
            
        if pretrained_encoder is None:
            self.encoder = TabNetEncoder(units=outputs, 
                                        n_steps=n_steps, 
                                        n_features = n_features,
                                        outputs=outputs, 
                                        gamma=gamma, 
                                        epsilon=epsilon, 
                                        sparsity=sparsity,
                                        virtual_batch_size=self.virtual_batch_size, 
                                        momentum=momentum)
        else:
            self.encoder = pretrained_encoder

    def forward(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> Tuple[tf.Tensor]:
        X = self.feature(X)
        output, encoded, importance = self.encoder(X)
          
        prediction = output
        return prediction, encoded, importance
    
    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        prediction, _, _ = self.forward(X)
        return prediction
    
    def transform(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        _, encoded, _ = self.forward(X)
        return encoded
    
    def explain(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        _, _, importance = self.forward(X)
        return importance    






In [11]:
len(listings.columns)

24

In [12]:
# Load data
listings = pd.read_csv("/content/drive/MyDrive/Dubair/colab.csv")
listings.drop(listings.columns[0], axis = 1, inplace=True)
price = listings["log_price"]
listings = listings.drop("log_price", axis = 1)

#X_train, X_test, y_train, y_test = train_test_split(listings, price["log_price"], random_state = 123, test_size = 0.2)
model_score = []

listings.rename(columns = {"property_type_Private room in residential home": "property_type_private_room_residential_home","property_type_Entire rental unit": "property_type_entire_rental_units"}, inplace = True)
listings["property_type_private_room_residential_home"] = listings["property_type_private_room_residential_home"].values.astype(np.int8)
listings["property_type_entire_rental_units"] = listings["property_type_entire_rental_units"].values.astype(np.int8)

def R_squared(y, y_pred):
  residual = tf.reduce_sum(tf.square(tf.subtract(y, y_pred)))
  total = tf.reduce_sum(tf.square(tf.subtract(y, tf.reduce_mean(y))))
  r2 = tf.subtract(1.0, tf.math.divide(residual, total))
  return r2


train_size = 6000
batch_size = 600

def transform(ds):
    features = tf.unstack(ds["features"])
    prices = ds["price"]

    x = dict(zip(col_names, features))
    y = prices
    return x, y

bin_col = [col for col in listings if np.isin(listings[col].unique(), [0, 1]).all()]
num_col = [col for col in listings if ~np.isin(listings[col].unique(), [0, 1]).all()]
col_names = bin_col + num_col

data = tf.data.Dataset.from_tensor_slices({"features": listings, "price": price})


data = data.shuffle(6000, seed = 13)
train_dataset = data.take(train_size)
train_dataset = train_dataset.map(transform)
train_dataset = train_dataset.batch(batch_size)
test_dataset = data.take(len(listings)-train_size)
test_dataset = test_dataset.map(transform)
test_dataset = test_dataset.batch(batch_size)

feature_columns = []

for col in col_names:
    feature_columns.append(tf.feature_column.numeric_column(col))

feature_column = tf.keras.layers.DenseFeatures(feature_columns, trainable = True)

model = TabNetRegression(n_features = 24, feature_column =feature_column, virtual_batch_size=600)


lr = tf.keras.optimizers.schedules.ExponentialDecay(0.01, decay_steps=400, decay_rate=0.9, staircase=False)
optimizer = tf.keras.optimizers.Adam(lr)
model.compile(optimizer, loss='mse', metrics=[R_squared])

model.fit(train_dataset, epochs=600, validation_data=test_dataset, verbose=1)



Epoch 1/600

InvalidArgumentError: ignored