In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My Drive/Project2

dataset = pd.read_csv('Training.csv')

In [None]:
test_size = 6510
X_train_raw = dataset[:-test_size]
X_test_raw = dataset[-test_size:]

#Standardize
scaler = StandardScaler()
X_train_raw = scaler.fit_transform(X_train_raw)
X_test_raw = scaler.transform(X_test_raw)

# Normalize both features and labels
min_Scaler = MinMaxScaler()
X_train_raw = min_Scaler.fit_transform(X_train_raw)
X_test_raw = min_Scaler.transform(X_test_raw)

In [None]:
window = 400
stride = 25

In [None]:
def build_sequences(df, target_labels=['Sponginess'], window=200, stride=20, telescope=1):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    temp_df = df.copy()
    temp_label = df.copy()
    padding_len = len(df)%window

    if(padding_len != 0):
        # Compute padding length
        padding_len = window - len(df)%window
        padding = np.zeros((padding_len,temp_df.shape[1]), dtype='float64')
        temp_df = np.concatenate((padding,df))
        padding = np.zeros((padding_len,temp_label.shape[1]), dtype='float64')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_df) % window == 0

    for idx in np.arange(0,len(temp_df)-window-telescope,stride):
        dataset.append(temp_df[idx:idx+window])
        labels.append(temp_label[idx+window:idx+window+telescope])

    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [None]:
target_labels = dataset.columns
telescope = 20

In [None]:
X_train, y_train = build_sequences(X_train_raw, target_labels, window, stride, telescope)
X_test, y_test = build_sequences(X_test_raw, target_labels, window, stride, telescope)

In [None]:
def build_LSTM_model(input_shape, output_shape): 

    #ENCODER
    encoder_inputs = tfkl.Input(shape=input_shape)
    conv_outputs = tfkl.Conv1D(128, 3, padding='same', activation='relu')(encoder_inputs)
    maxpool_outputs = tfkl.MaxPool1D()(conv_outputs)
    encoder_outputs1 = tfkl.Bidirectional(tfkl.LSTM(256,return_sequences = True, return_state=True))(maxpool_outputs)
    encoder_states1 = encoder_outputs1[1:]
    encoder_l2 = tfkl.Bidirectional(tfkl.LSTM(128, return_state=True))
    encoder_outputs2 = encoder_l2(encoder_outputs1[0])
    encoder_states2 = encoder_outputs2[1:]

    #DECODER
    decoder_inputs = tfkl.RepeatVector(output_shape[0])(encoder_outputs2[0])
    decoder_l1 = tfkl.Bidirectional(tfkl.LSTM(256, return_sequences=True))(decoder_inputs,initial_state = encoder_states1)
    decoder_l2 = tfkl.Bidirectional(tfkl.LSTM(128, return_sequences=True))(decoder_l1,initial_state = encoder_states2)
    dropout_l = tfkl.Dropout(0.2)(decoder_l2)
    decoder_outputs2 = tfkl.TimeDistributed(tfkl.Dense(output_shape[1], activation='relu'))(dropout_l)

    #CREATION OF THE MODEL
    model = tf.keras.models.Model(encoder_inputs, decoder_outputs2)

    loss = tf.keras.losses.MeanSquaredError()
    optimizer = tf.keras.optimizers.Adam()
    model.compile(loss=loss, optimizer=optimizer, metrics=['mse'])

    return model

In [None]:
input_shape = (window, 7)
output_shape = (telescope, 7)

batch_size = 128
epochs = 100

In [None]:
model = build_LSTM_model(input_shape, output_shape)
model.summary()
tfk.utils.plot_model(model, expand_nested=True)

In [None]:
model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_split=.1,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-5)
    ]
)

model.save('model_cnn_lstm_dropout_400_25_20')

To visualize the predictions

In [None]:
reg_telescope = 900
X_test_reg, y_test_reg = build_sequences(X_test_raw, target_labels, window, stride, reg_telescope)

In [None]:
# Autoregressive Forecasting
reg_predictions = np.array([])

y_test_reg = y_test_reg[200:200+1,:,:]
X_test_reg = X_test_reg[200:200+1,:,:]


X_temp = X_test_reg
for reg in range(0,reg_telescope,telescope):
    pred_temp = model.predict(X_temp)
    if(len(reg_predictions)==0):
        reg_predictions = pred_temp
    else:
        reg_predictions = np.concatenate((reg_predictions, pred_temp), axis=1)
    X_temp = np.concatenate((X_temp[:,telescope:,:],pred_temp), axis=1)

In [None]:
def inspect_multivariate_prediction(X, y, pred, columns, telescope, idx=None):
    if(idx==None):
        idx=np.random.randint(0,len(X))

    figs, axs = plt.subplots(len(columns), 1, sharex=True, figsize=(17,17))
    for i, col in enumerate(columns):
        axs[i].plot(np.arange(len(X[0,:,i])), X[idx,:,i])
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), y[idx,:,i], color='orange')
        axs[i].plot(np.arange(len(X[0,:,i]), len(X_train[0,:,i])+telescope), pred[idx,:,i], color='green')
        axs[i].set_title(col)
        axs[i].set_ylim(0,1)
    plt.show()

In [None]:
inspect_multivariate_prediction(X_test_reg, y_test_reg, reg_predictions, target_labels, reg_telescope)