In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras import backend as K

In [2]:
EPOCHS = 100

In [3]:
# List all available devices detected by TensorFlow
print("Available devices:")
devices = tf.config.list_physical_devices()
for device in devices:
    print(device)

Available devices:
PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')


## Load and prepare data

In [4]:
# Load the data
data = pd.read_csv("../dataset.csv")

In [5]:
# Separate features and target
X = data.drop("Label", axis=1)
y = data["Label"]

In [6]:
# Convert categorical data to numeric if necessary
X = pd.get_dummies(X, sparse=True)

In [7]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

  if np.may_share_memory(array, array_orig):
  if np.may_share_memory(array, array_orig):


## Build an AutoEncoder

In [9]:
# Sampling layer
def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

In [10]:
class VAELossLayer(Layer):
    """Custom layer to add VAE loss to the model."""

    def __init__(self, **kwargs):
        super(VAELossLayer, self).__init__(**kwargs)

    def call(self, inputs):
        x, x_decoded_mean, z_mean, z_log_var = inputs
        xent_loss = binary_crossentropy(x, x_decoded_mean) * X_train_scaled.shape[1]
        kl_loss = -0.5 * K.sum(
            1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1
        )
        vae_loss = K.mean(xent_loss + kl_loss)
        self.add_loss(vae_loss)
        return x_decoded_mean

In [11]:
# Modify the autoencoder architecture
input_layer = Input(shape=(X_train_scaled.shape[1],))
x = Dense(128, activation="relu")(input_layer)
z_mean = Dense(64, activation="linear")(x)
z_log_var = Dense(64, activation="linear")(x)
z = Lambda(sampling, output_shape=(64,))([z_mean, z_log_var])

In [12]:
# Decoder
decoder_h = Dense(128, activation="relu")
decoder_mean = Dense(X_train_scaled.shape[1], activation="sigmoid")
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)
autoencoder = Model(input_layer, x_decoded_mean)

In [13]:
# Use custom VAELossLayer
output_with_loss = VAELossLayer()([input_layer, x_decoded_mean, z_mean, z_log_var])

In [14]:
autoencoder = Model(input_layer, output_with_loss)
autoencoder.compile(optimizer="adam")

In [15]:
# Prediction model
predictor = Dense(1, activation="sigmoid")(z)
prediction_model = Model(input_layer, predictor)
prediction_model.compile(
    optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]
)

In [16]:
# Model summaries
autoencoder.summary()
prediction_model.summary()

## Train and evaluate

In [17]:
# Training
autoencoder.fit(X_train_scaled, epochs=EPOCHS, batch_size=256, validation_split=0.2)

Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - loss: 9447.6348 - val_loss: 847.5374
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - loss: 102190.8281 - val_loss: 413.4688
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - loss: 610.2144 - val_loss: 719.1501
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step - loss: -203.7273 - val_loss: 1181.0654
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step - loss: -893.4562 - val_loss: 3208.1660
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - loss: -3690.7971 - val_loss: 15886.2402
Epoch 7/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - loss: -22547.4883 - val_loss: 76123.3984
Epoch 8/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 67ms/step - loss: -133198.5938 - val_lo

<keras.src.callbacks.history.History at 0x30da9c2d0>

In [18]:
prediction_model.fit(
    X_train_scaled, y_train, epochs=EPOCHS, batch_size=256, validation_split=0.2
)

Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.5021 - loss: 266.9033 - val_accuracy: 0.4981 - val_loss: 0.8430
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.5134 - loss: 25.3730 - val_accuracy: 0.4931 - val_loss: 0.8596
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5777 - loss: 6.0933 - val_accuracy: 0.5156 - val_loss: 0.8143
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6618 - loss: 3.7484 - val_accuracy: 0.5369 - val_loss: 0.8071
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7213 - loss: 2.0359 - val_accuracy: 0.5238 - val_loss: 0.8397
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.7307 - loss: 1.7813 - val_accuracy: 0.5394 - val_loss: 0.7865
Epoch 7/100
[1m25/25[0m

<keras.src.callbacks.history.History at 0x31a2c54d0>

In [19]:
# Evaluate
test_loss, test_acc = prediction_model.evaluate(X_test_scaled, y_test)
print("Test Accuracy: {:.2f}%".format(test_acc * 100))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9181 - loss: 0.2633
Test Accuracy: 91.35%


In [20]:
# Pickle the models
autoencoder.save("checkpoints/vae.h5")
prediction_model.save("checkpoints/vae_prediction.h5")

