# Anomaly Detection Using a Variational AutoEncoder

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

In [2]:
np.random.seed(42)
tf.random.set_seed(42)

In [3]:
normal_data = np.random.normal(loc = 0, scale = 1, size = (5000, 10))
anomalies = np.random.uniform(low = -5, high = 5, size = (100, 10))
data = np.vstack((normal_data, anomalies))

In [5]:
# labels (0 for normal, 1 for anomalies)
labels = np.zeros(len(data))
labels[len(normal_data):] = 1

X_train, X_test, y_train, y_test = train_test_split(data, labels,
                                                    test_size = 0.2,
                                                    random_state = 42)

In [7]:
# autoencoder anomaly detection model
input_dim = X_train.shape[1]

model = keras.Sequential([
    keras.layers.Input(shape=(input_dim,)),
    keras.layers.Dense(128, activation = 'relu'),
    keras.layers.Dense(128, activation = 'relu'),
    keras.layers.Dense(128, activation = 'relu'),
    keras.layers.Dense(128, activation = 'relu'),
    keras.layers.Dense(128, activation = 'relu'),
    keras.layers.Dense(128, activation = 'relu'),
    keras.layers.Dense(input_dim, activation = 'sigmoid')])

model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [8]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 128)               1408      
                                                                 
 dense_8 (Dense)             (None, 128)               16512     
                                                                 
 dense_9 (Dense)             (None, 128)               16512     
                                                                 
 dense_10 (Dense)            (None, 128)               16512     
                                                                 
 dense_11 (Dense)            (None, 128)               16512     
                                                                 
 dense_12 (Dense)            (None, 128)               16512     
                                                                 
 dense_13 (Dense)            (None, 10)               

In [11]:
model.fit(X_train, X_train, epochs = 20, batch_size = 16,
          validation_data = (X_test, X_test))

reconstructed_data = model.predict(X_test)
reconstruction_errors = np.mean(np.square(X_test - reconstructed_data),
                                axis = 1)

ANOMALY_DETECTION_THRESHOLD = 99
threshold = np.percentile(reconstruction_errors, ANOMALY_DETECTION_THRESHOLD)
y_pred = (reconstruction_errors > threshold).astype(int)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 0.9882352941176471
Precision: 1.0
Recall: 0.4782608695652174
