In [None]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Input, Dense
from keras.callbacks import EarlyStopping


In [None]:
# Load dataset MNIST
(x_train, _), (x_test, _) = mnist.load_data()

# Normalize and reshape (reshape is necessary because we will use MLP)
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 784))
x_test = np.reshape(x_test, (len(x_test), 784))

In [None]:
# Model definition. We are using the functional mode instead of sequential mode
# https://keras.io/guides/functional_api/

input_img = Input(shape=(784,))
encoded1 = Dense(128, activation='relu')(input_img)
encoded2 = Dense(64, activation='relu')(encoded1)
decoded1 = Dense(128, activation='relu')(encoded2)
output = Dense(784, activation='sigmoid')(decoded1)

autoencoder = Model(input_img, output)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

# Train the model on the training dataset
autoencoder.fit(x_train, x_train,
                epochs=50,
                batch_size=256,
                shuffle=True,
                validation_split=0.2,
                callbacks=[EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)])


In [None]:
# create a noised dataset by adding some noise to the original test set
x_test_noise = x_test + np.random.normal(loc=0.0, scale=0.1, size=x_test.shape)
# creates a noisy version of the x_test array by adding random noise sampled
# from a normal distribution with a mean of 0.0 and a standard deviation of 0.1 to each element.
x_test_noise = np.clip(x_test_noise, 0., 1.)
# ensures that all values in the x_test_noise array are within the range [0, 1].
# Any values below 0 are set to 0, and any values above 1 are set to 1.


In [None]:

# # Visualize the image, its reconstruction and the corresponding reconstruction error
def visualize_reconstruction(image, autoencoder):
    plt.figure(figsize=(8, 4))
    # original image
    plt.subplot(1, 3, 1)
    plt.imshow(image.reshape(28, 28), cmap='gray')
    plt.title('Original Image')
    plt.axis('off')
    # reconstructed image
    plt.subplot(1, 3, 2)
    reconstructed_image = autoencoder.predict(np.expand_dims(image, axis=0)).reshape(28, 28)
    plt.imshow(reconstructed_image, cmap='gray')
    plt.title('Reconstructed Image')
    plt.axis('off')
    # reconstruction error
    plt.subplot(1, 3, 3)
    reconstruction_error = np.square(image.reshape(28,28) - reconstructed_image)
    plt.imshow(reconstruction_error, cmap='hot')
    plt.title('Reconstruction Error')
    plt.axis('off')
    plt.show()

# select a random index and visualize the images both for the normal and the noised record (anomalous)
index = np.random.randint(len(x_test))
visualize_reconstruction(x_test[index], autoencoder)
visualize_reconstruction(x_test_noise[index], autoencoder)


In [None]:
# Visualize the reconstruction errors for a set of data. Anomalous and normal data must have different colors
# Record must be sorted according to their reconstruction errors

# Select 30 random indexes
original_indices = np.random.choice(len(x_test), size=30, replace=False)
anomalous_indices = np.random.choice(len(x_test_noise), size=30, replace=False)

# Campute the reconstruction errors
reconstruction_error_original = np.mean(np.square(x_test[original_indices] - autoencoder.predict(x_test[original_indices])), axis=1)
reconstruction_error_noise = np.mean(np.square(x_test_noise[anomalous_indices] - autoencoder.predict(x_test_noise[anomalous_indices])), axis=1)

# Combina gli errori e ordina gli indici
errors = np.concatenate((reconstruction_error_original, reconstruction_error_noise))
labels = ['Original MNIST'] * 30 + ['Anomalous MNIST'] * 30
indices = np.argsort(errors)

# Plot reconstruction errors
plt.figure(figsize=(8, 6))
plt.scatter(range(len(errors)), errors[indices], c=np.array(labels)[indices] == 'Anomalous MNIST', cmap='coolwarm', marker='o', s=10)
plt.xlabel('Sample index')
plt.ylabel('Reconstruction error')
plt.title('Reconstruction Error on Test Dataset')
plt.colorbar(label='Anomalous')
plt.show()


In [None]:
# HOMEWORK 1: complete the code to compute the confusion matrix for the binary problem of assigning
#            the labels ANOMALOUS/NORMAL based on the reconstruction error (you have to set the threshold).


In [None]:
# HOMEWORK 2: complete the code to compute the ROC function for the binary problem of assigning
#            the labels ANOMALOUS/NORMAL based on the reconstruction error.
