# Autoencoder (compression)
--- 

In this notebook we will train a fully connected (dense) autoencoder on the nasa data set.

The keras code was inspired by https://blog.keras.io/building-autoencoders-in-keras.html

In [None]:
# inports
import numpy as np

import keras
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Input, Dense, GaussianNoise
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import TensorBoard

from util import plot_spectrogram_features
from util import plot_reconstruction_error
from util import load_data
from util import scale

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')


In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
print(keras.__version__)

In [None]:
# auxiliary variable
tensorboard_path = "./"

## Load NASA data

In [None]:
path = "./features_nasa.pickle"
X_train, X_test = load_data(path)

## Build Model

In [None]:
# input dim

n_features = ... # provide the correct number of features

# number of hidden units
encoding_dim = 4 # 4 floats --> compression of factor 25, assuming the input is 100 floats

print("Number of features:", n_features)
print("Number of hidden units:", encoding_dim)

In [None]:
# Create model
model = Sequential()
model.add(Dense(input_shape=(n_features,), units=encoding_dim, activation='sigmoid'))
# now the model will take as input arrays of shape (*, n_features)
# and output arrays of shape (*, encoding_dim)
model.add(Dense(units=..., activation='sigmoid')) # Provide the correct number of output units

In [None]:
model.summary()

In [None]:
# Optimizer stochastic gradient descent
sgd = optimizers.SGD(learning_rate=0.2, decay=1e-6, momentum=0.9, nesterov=True)

In [None]:
# Compile model
model.compile(optimizer=sgd, loss='mean_squared_error')

### Prepare data

In [None]:
# Normalization (choose one sensor)
nb_sensor = 0
x_train = scale(X_train[:,:,nb_sensor])
x_test = scale(X_test[:,:,nb_sensor], samples=x_train.shape[0])

print("Shape of traing set scaled: \t {}".format(x_train.shape))
print("Shape of test set scaled: \t {}".format(x_test.shape))

In [None]:
### Train Model

In [None]:
# Fit the model
model.fit(x_train, x_train,
                epochs=50,
                batch_size=20,
                shuffle=True,
                callbacks=[TensorBoard(log_dir=tensorboard_path)])

### Reconstruction Error

In [None]:
# evaluate the model
# calculate the reconstruction error
costs = np.zeros(x_test.shape[0])
for i, x in enumerate(x_test):
    input_x = np.reshape(x, (1, x_test.shape[1]))
    reconstruction = model.predict(input_x)
    # calculate mean squared error
    costs[i] = ((x - reconstruction[0]) ** 2).mean(axis=0)

plot_reconstruction_error(scale(costs))