# Autoencoder

The objective of this notebook is to evaluate Autoencoder (inkl. LSTM Autoencoders) for anomaly detection on streaming data

Code based on:

<https://blog.keras.io/building-autoencoders-in-keras.html>

In [3]:
from keras.layers import Input, Dense
from keras.models import Model
import numpy as np

Using TensorFlow backend.


Random Number generator

# MNIST

In [30]:
# this is the size of our encoded representations
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

# this is our input placeholder
input_img = Input(shape=(784,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_img)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(784, activation='sigmoid')(encoded)

# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)

In [31]:
# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))

In [32]:
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

In [12]:
from keras.datasets import mnist
import numpy as np
(x_train, _), (x_test, _) = mnist.load_data()


x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [13]:
(x_train, _), (x_test, _) = mnist.load_data()

In [14]:
x_train.shape

(60000, 28, 28)

In [None]:
autoencoder.fit(x_train, x_train,
                epochs=50,
                batch_size=256,
                shuffle=True,
                validation_data=(x_test, x_test))

# Time Series LSTM Autoencoder

Generate 1 Mio Data Points

In [8]:
def get_random_cluster_points(number_points, number_dim):
    mu = np.random.randn()
    sigma = np.random.randn()
    p = sigma * np.random.randn(number_points, number_dim) + mu
    return p

In [9]:
timesteps = 1048576
input_dim = 2
latent_dim = 2

In [10]:
p = get_random_cluster_points(timesteps, input_dim)
print("Before: {}".format(p.shape))
p=p[np.newaxis]
print("After reshape: {}".format(p.shape))

Before: (1048576, 2)
After reshape: (1, 1048576, 2)


## Build Autoencoder model

In [11]:
from keras.layers import Input, LSTM, RepeatVector
from keras.models import Model

inputs = Input(shape=(timesteps, input_dim))
encoded = LSTM(latent_dim)(inputs)

decoded = RepeatVector(timesteps)(encoded)
decoded = LSTM(input_dim, return_sequences=True)(decoded)

sequence_autoencoder = Model(inputs, decoded)
encoder = Model(inputs, encoded)

In [12]:
%%time
sequence_autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

CPU times: user 48 s, sys: 1.35 s, total: 49.3 s
Wall time: 49.7 s


In [1]:
%%time
sequence_autoencoder.fit(p, p,
                epochs=50,
                batch_size=256,
                shuffle=True,
                validation_data=None)

NameError: name 'sequence_autoencoder' is not defined