### Outlier Detection Tutorial


In [None]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.datasets as tfds

from daml.metrics.outlier_detection import AE

tf.keras.utils.set_random_seed(408)

#### Load the data
We will use the tensorflow mnist dataset for this tutorial on outlier detection

In [None]:
# Load in our images
(images, labels), (test_images, test_labels) = tfds.mnist.load_data()

In [None]:
print("Number of training samples: ", len(images))
print("Image shape:", images[0].shape)
print("Label counts: ", np.unique(labels, return_counts=True))

#### Initialize the model
Now, lets look at how to use DAML's outlier detection methods.  
We will focus on a simple autoencoder network from our Alibi Detect provider

First, let's initialize our outlier detection model with the input image size of 28x28x1

In [None]:
# Initialize the autoencoder-based outlier detector from alibi-detect
metric = AE()
metric.initialize_detector((28, 28, 1))
print("Outlier Detection Model:", metric.detector)

For this tutorial, we are going to trim down the data into only the labels 1, 4, and 9.  
We will also only take 5000 of each label.

In [None]:
# Keep only 1, 4, and 9
def get_subset(X, y, label, limit=5000):
    indices = np.where(y == label)
    X_sub = X[indices][:limit][..., np.newaxis]
    y_sub = y[indices][:limit]
    return X_sub, y_sub

images_one, labels_one = get_subset(images, labels, 1)
images_four, labels_four = get_subset(images, labels, 4)
images_nine, labels_nine = get_subset(images, labels, 9)

images_subset = np.concatenate([images_one, images_four, images_nine])
labels_subset = np.concatenate([labels_one, labels_four, labels_nine])
print("Image count:", len(images_subset))
print("Image shape:", images_subset[0].shape)
print("Label counts:", np.unique(labels_subset, return_counts=True))

#### Train the model
Next we will train a model on the dataset of 1, 4, and 9.
For better results, the epochs can be increased

In [None]:
# Train the detector on the set of images
metric.fit_dataset(dataset=images_subset, epochs=20, verbose=False)

#### Test for outliers
We have trained our detector on a dataset of digits 1, 4, 9.  
What happens when we give it images of digit 0 (which we expect to be "Outliers")?

In [106]:
# Only keep images with a label of 0
indices_zero = np.where((labels == 0))
images_zero = images[indices_zero][:5000][..., np.newaxis]
labels_zero = labels[indices_zero][:5000]

print(images_zero.shape)
print(np.unique(labels_zero, return_counts=True))

(5000, 28, 28, 1)
(array([0], dtype=uint8), array([5000]))


Now we evaluate the two datasets using the trained model

In [112]:
preds_in = metric.evaluate(images_subset).is_outlier
print(f"Digits 1, 4, and 9 outliers: {np.mean(preds_in)*100}%")

Digits 1, 4, and 9 outliers: 20.66%


In [113]:
preds_zeros = metric.evaluate(images_zero).is_outlier
print(f"Digit 0 outliers:{np.mean(preds_zeros)*100}%")

Digit 0 outliers:100.0%


##### Results
We identify all of the 0s as outliers!