In [1]:
from typing import Optional, Union, Tuple

import numpy as np
import tensorflow as tf
# import tensorflow_probability as tfp
import tensorflow_addons as tfa
import pandas as pd
from sklearn.metrics import accuracy_score


@tf.function
def identity(x):
    return x


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)  

num_samples = 1000

data = {}
for i in range(1, 13):
    col_name = f"Feature_{i}"
    data[col_name] = np.random.rand(num_samples)  

df = pd.DataFrame(data)
df['Target'] = np.random.randint(0, 2, size=num_samples) 

print(df.head())
feature_columns = list(df.columns[:-1])  

print(f"Số lượng mẫu: {len(df)}, Số lượng cột: {len(df.columns)}")



   Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  Feature_6  \
0   0.374540   0.185133   0.261706   0.672703   0.571996   0.393636   
1   0.950714   0.541901   0.246979   0.796681   0.805432   0.473436   
2   0.731994   0.872946   0.906255   0.250468   0.760161   0.854547   
3   0.598658   0.732225   0.249546   0.624874   0.153900   0.340004   
4   0.156019   0.806561   0.271950   0.571746   0.149249   0.869650   

   Feature_7  Feature_8  Feature_9  Feature_10  Feature_11  Feature_12  Target  
0   0.648257   0.038799   0.720268    0.913578    0.373641    0.533031       1  
1   0.172386   0.186773   0.687283    0.525360    0.332912    0.137899       1  
2   0.872395   0.831246   0.095754    0.724910    0.176154    0.591243       0  
3   0.613116   0.766768   0.922572    0.436048    0.607267    0.314786       0  
4   0.157204   0.350643   0.568472    0.630035    0.476624    0.052349       0  
Số lượng mẫu: 1000, Số lượng cột: 13


In [3]:
class GLUBlock(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None,
                 virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(GLUBlock, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
            
        self.fc_outout = tf.keras.layers.Dense(self.units, 
                                               use_bias=False)
        self.bn_outout = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                            momentum=self.momentum)
        
        self.fc_gate = tf.keras.layers.Dense(self.units, 
                                             use_bias=False)
        self.bn_gate = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                          momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None):
        output = self.bn_outout(self.fc_outout(inputs), 
                                training=training)
        gate = self.bn_gate(self.fc_gate(inputs), 
                            training=training)
    
        return output * tf.keras.activations.sigmoid(gate) # GLU

In [4]:
class FeatureTransformerBlock(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] = 0.02, skip=False):
        super(FeatureTransformerBlock, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        self.skip = skip
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
        
        self.initial = GLUBlock(units = self.units, 
                                virtual_batch_size=self.virtual_batch_size, 
                                momentum=self.momentum)
        self.residual =  GLUBlock(units = self.units, 
                                  virtual_batch_size=self.virtual_batch_size, 
                                  momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None):
        initial = self.initial(inputs, training=training)
        
        if self.skip == True:
            initial += inputs

        residual = self.residual(initial, training=training) # skip
        
        return (initial + residual) * np.sqrt(0.5)

In [5]:
class AttentiveTransformer(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int] = 128, 
                 momentum: Optional[float] = 0.02):
        super(AttentiveTransformer, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
            
        self.fc = tf.keras.layers.Dense(self.units, 
                                        use_bias=False)
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                     momentum=self.momentum)
        
    def call(self, inputs: Union[tf.Tensor, np.ndarray], priors: Optional[Union[tf.Tensor, np.ndarray]] = None, training: Optional[bool] = None) -> tf.Tensor:
        feature = self.bn(self.fc(inputs), 
                          training=training)
        if priors is None:
            output = feature
        else:
            output = feature * priors
        
        return tfa.activations.sparsemax(output)

In [6]:
class TabNetStep(tf.keras.layers.Layer):
    def __init__(self, units: Optional[int] = None, virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] =0.02):
        super(TabNetStep, self).__init__()
        self.units = units
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        if self.units is None:
            self.units = input_shape[-1]
        
        self.unique = FeatureTransformerBlock(units = self.units, 
                                              virtual_batch_size=self.virtual_batch_size, 
                                              momentum=self.momentum,
                                              skip=True)
        self.attention = AttentiveTransformer(units = input_shape[-1], 
                                              virtual_batch_size=self.virtual_batch_size, 
                                              momentum=self.momentum)
        
    def call(self, inputs, shared, priors, training=None) -> Tuple[tf.Tensor]:  
        split = self.unique(shared, training=training)
        keys = self.attention(split, priors, training=training)
        masked = keys * inputs
        
        return split, masked, keys

In [7]:
class TabNetEncoder(tf.keras.layers.Layer):
    def __init__(self, units: int =1, 
                 n_steps: int = 3, 
                 n_features: int = 8,
                 outputs: int = 1, 
                 gamma: float = 1.3,
                 epsilon: float = 1e-8, 
                 sparsity: float = 1e-5, 
                 virtual_batch_size: Optional[int]=128, 
                 momentum: Optional[float] =0.02):
        super(TabNetEncoder, self).__init__()
        
        self.units = units
        self.n_steps = n_steps
        self.n_features = n_features
        self.virtual_batch_size = virtual_batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.momentum = momentum
        self.sparsity = sparsity
        
    def build(self, input_shape: tf.TensorShape):            
        self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
                                                     momentum=self.momentum)
        self.shared_block = FeatureTransformerBlock(units = self.n_features, 
                                                    virtual_batch_size=self.virtual_batch_size, 
                                                    momentum=self.momentum)        
        self.initial_step = TabNetStep(units = self.n_features, 
                                       virtual_batch_size=self.virtual_batch_size, 
                                       momentum=self.momentum)
        self.steps = [TabNetStep(units = self.n_features, 
                                 virtual_batch_size=self.virtual_batch_size, 
                                 momentum=self.momentum) for _ in range(self.n_steps)]
        self.final = tf.keras.layers.Dense(units = self.units, 
                                           use_bias=False)
    

    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> Tuple[tf.Tensor]:        
        entropy_loss = 0.
        encoded = 0.
        output = 0.
        importance = 0.
        prior = tf.reduce_mean(tf.ones_like(X), axis=0)
        
        B = prior * self.bn(X, training=training)
        shared = self.shared_block(B, training=training)
        _, masked, keys = self.initial_step(B, shared, prior, training=training)

        for step in self.steps:
            entropy_loss += tf.reduce_mean(tf.reduce_sum(-keys * tf.math.log(keys + self.epsilon), axis=-1)) / tf.cast(self.n_steps, tf.float32)
            prior *= (self.gamma - tf.reduce_mean(keys, axis=0))
            importance += keys
            
            shared = self.shared_block(masked, training=training)
            split, masked, keys = step(B, shared, prior, training=training)
            features = tf.keras.activations.relu(split)
            
            output += features
            encoded += split
            
        self.add_loss(self.sparsity * entropy_loss)
          
        prediction = self.final(output)
        return prediction, encoded, importance

In [8]:
class TabNetDecoder(tf.keras.layers.Layer):
    def __init__(self, units=1, 
                 n_steps = 3, 
                 n_features = 8,
                 outputs = 1, 
                 gamma = 1.3,
                 epsilon = 1e-8, 
                 sparsity = 1e-5, 
                 virtual_batch_size=128, 
                 momentum=0.02):
        super(TabNetDecoder, self).__init__()
        
        self.units = units
        self.n_steps = n_steps
        self.n_features = n_features
        self.virtual_batch_size = virtual_batch_size
        self.momentum = momentum
        
    def build(self, input_shape: tf.TensorShape):
        self.shared_block = FeatureTransformerBlock(units = self.n_features, 
                                                    virtual_batch_size=self.virtual_batch_size, 
                                                    momentum=self.momentum)
        self.steps = [FeatureTransformerBlock(units = self.n_features,
                                              virtual_batch_size=self.virtual_batch_size, 
                                              momentum=self.momentum) for _ in range(self.n_steps)]
        self.fc = [tf.keras.layers.Dense(units = self.units) for _ in range(self.n_steps)]
    

    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        decoded = 0.
        
        for ftb, fc in zip(self.steps, self.fc):
            shared = self.shared_block(X, training=training)
            feature = ftb(shared, training=training)
            output = fc(feature)
            
            decoded += output
        return decoded

In [22]:
import tensorflow as tf
import numpy as np
from typing import Optional, Union, Tuple

class TabNetAutoencoder(tf.keras.Model):
    def __init__(self, outputs: int = 1, 
                 inputs: int = 12,
                 n_steps: int = 3, 
                 n_features: int = 8,
                 gamma: float = 1.3, 
                 epsilon: float = 1e-8, 
                 sparsity: float = 1e-5, 
                 feature_column: Optional[tf.keras.layers.DenseFeatures] = None, 
                 virtual_batch_size: Optional[int] = None, 
                 momentum: Optional[float] = 0.02):
        super(TabNetAutoencoder, self).__init__()
        
        self.outputs = outputs
        self.inputs = inputs
        self.n_steps = n_steps
        self.n_features = n_features
        self.feature_column = feature_column
        self.virtual_batch_size = virtual_batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.momentum = momentum
        self.sparsity = sparsity
        
        if feature_column is None:
            self.feature = tf.keras.layers.Lambda(lambda x: x)
        else:
            self.feature = feature_column
            
        self.encoder = TabNetEncoder(units=outputs, 
                                    n_steps=n_steps, 
                                    n_features=n_features,
                                    gamma=gamma, 
                                    epsilon=epsilon, 
                                    sparsity=sparsity,
                                    virtual_batch_size=virtual_batch_size, 
                                    momentum=momentum)
        
        self.decoder = TabNetDecoder(units=inputs, 
                                     n_steps=n_steps, 
                                     n_features=n_features,
                                     virtual_batch_size=virtual_batch_size, 
                                     momentum=momentum)
        
        self.bn = tf.keras.layers.BatchNormalization(momentum=momentum)
        
        self.do = tf.keras.layers.Dropout(0.25)

    def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        X = self.feature(X)
        X = self.bn(X, training=training)
        
        M = self.do(tf.ones_like(X), training=training)
        D = X * M
        
        output, encoded, importance = self.encoder(D)
        prediction = tf.keras.activations.sigmoid(output)        
        
        T = X * (1 - M)
        reconstruction = self.decoder(encoded)
        
        loss = tf.reduce_mean(tf.where(M != 0., tf.square(T - reconstruction), tf.zeros_like(reconstruction)))
        self.add_loss(loss)
        
        return prediction
    
    def transform(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        X = self.feature(X)
        _, encoded, _, _, _ = self.encoder(X)
        return encoded
    
    def explain(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
        X = self.feature(X)
        _, _, importance, _, _ = self.encoder(X)
        return importance

feature_column = None  

ae = TabNetAutoencoder(outputs=1, inputs=12, n_steps=3, n_features=2, feature_column=feature_column, virtual_batch_size=None)



@tf.function
def dummy_loss(y, t):
    return 0.

ae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), loss='binary_crossentropy')

X_train = np.random.randn(100, 12)
y_train = np.random.randint(0, 2, size=(100,))

# Huấn luyện mô hình
ae.fit(X_train,y_train, epochs=100)
ae.summary()


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# class TabNetAutoencoder(tf.keras.Model):
#     def __init__(self, outputs: int = 1, 
#                  inputs: int = 12,
#                  n_steps: int  = 3, 
#                  n_features: int  = 8,
#                  gamma: float = 1.3, 
#                  epsilon: float = 1e-8, 
#                  sparsity: float = 1e-5, 
#                  feature_column: Optional[tf.keras.layers.DenseFeatures] = None, 
#                  virtual_batch_size: Optional[int] = 128, 
#                  momentum: Optional[float] = 0.02):
#         super(TabNetAutoencoder, self).__init__()
        
#         self.outputs = outputs
#         self.inputs = inputs
#         self.n_steps = n_steps
#         self.n_features = n_features
#         self.feature_column = feature_column
#         self.virtual_batch_size = virtual_batch_size
#         self.gamma = gamma
#         self.epsilon = epsilon
#         self.momentum = momentum
#         self.sparsity = sparsity
        
#         if feature_column is None:
#             self.feature = tf.keras.layers.Lambda(identity)
#         else:
#             self.feature = feature_column
            
#         self.encoder = TabNetEncoder(units=outputs, 
#                                     n_steps=n_steps, 
#                                     n_features = n_features,
#                                     outputs=outputs, 
#                                     gamma=gamma, 
#                                     epsilon=epsilon, 
#                                     sparsity=sparsity,
#                                     virtual_batch_size=self.virtual_batch_size, 
#                                     momentum=momentum)
        
#         self.decoder = TabNetDecoder(units=inputs, 
#                                      n_steps=n_steps, 
#                                      n_features = n_features,
#                                      virtual_batch_size=self.virtual_batch_size, 
#                                      momentum=momentum)
        
#         self.bn = tf.keras.layers.BatchNormalization(virtual_batch_size=self.virtual_batch_size, 
#                                                      momentum=momentum)
        
#         self.do = tf.keras.layers.Dropout(0.25)

#     def forward(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> Tuple[tf.Tensor]:
#         X = self.feature(X)
#         X = self.bn(X)
        
#         # training mask
#         M = self.do(tf.ones_like(X), training=training)
#         D = X*M
        
#         #encoder
#         output, encoded, importance = self.encoder(D)
#         prediction = tf.keras.activations.sigmoid(output)        
        
#         return prediction, encoded, importance, X, M
    
#     def call(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
#         # encode
#         prediction, encoded, _, X, M = self.forward(X)
#         T = X * (1 - M)

#         #decode
#         reconstruction = self.decoder(encoded)
        
#         #loss
#         loss  = tf.reduce_mean(tf.where(M != 0., tf.square(T-reconstruction), tf.zeros_like(reconstruction)))
        
#         self.add_loss(loss)
        
#         return prediction
    
#     def transform(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
#         _, encoded, _, _, _ = self.forward(X)
#         return encoded
    
#     def explain(self, X: Union[tf.Tensor, np.ndarray], training: Optional[bool] = None) -> tf.Tensor:
#         _, _, importance, _, _ = self.forward(X)
#         return importance