# Variational Autoencoder (VAE) for Anomaly Detection

In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler

In [17]:
df = pd.read_csv("creditcard_1.csv")

X = df.drop(columns=['Class'])
y = df['Class']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

normal_data = X_scaled[y == 0]

In [18]:
input_dim = normal_data.shape[1]
latent_dim = 2

encoder_inputs = tf.keras.Input(shape=(input_dim,))
x = layers.Dense(16, activation='relu')(encoder_inputs)
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=tf.shape(z_mean))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = layers.Lambda(sampling)([z_mean, z_log_var])

encoder = tf.keras.Model(encoder_inputs, [z_mean, z_log_var, z])

In [19]:
latent_inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(16, activation='relu')(latent_inputs)
decoder_outputs = layers.Dense(input_dim)(x)

decoder = tf.keras.Model(latent_inputs, decoder_outputs)

In [22]:
def train_step(self, data):
    with tf.GradientTape() as tape:
        z_mean, z_log_var, z = self.encoder(data)
        reconstruction = self.decoder(z)

        reconstruction_loss = tf.reduce_mean(
            tf.keras.losses.mse(data, reconstruction)
        )

        kl_loss = -0.5 * tf.reduce_mean(
            tf.reduce_sum(
                1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var),
                axis=1
            )
        )

        total_loss = reconstruction_loss + kl_loss

    grads = tape.gradient(total_loss, self.trainable_weights)
    self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

    return {
        "loss": total_loss,
        "reconstruction_loss": reconstruction_loss,
        "kl_loss": kl_loss,
    }

In [23]:
vae = VAE(encoder, decoder)
vae.compile(optimizer=tf.keras.optimizers.Adam())

vae.fit(normal_data, epochs=20, batch_size=256)

Epoch 1/20


ValueError: Invalid reduction dimension 1 for input with 1 dimensions. for '{{node Sum}} = Sum[T=DT_FLOAT, Tidx=DT_INT32, keep_dims=false](Mean, Sum/reduction_indices)' with input shapes: [?], [] and with computed input tensors: input[1] = <1>.