# Multivariate time serie prediction for one feature

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from utils_plot import plot_series, plot_train_history, plot_prediction
import matplotlib.pyplot as plt
print(tf.__version__)

In [None]:
CONFIG_PATH = '/root/work/artwork_sequence/train_test_configuration'
PLOT_PATH = '/root/work/artwork_sequence/plots'

In [None]:
museum_sequence_path = {
    'x_train' : os.path.join(CONFIG_PATH, 'X_train.csv'),
    'x_test' : os.path.join(CONFIG_PATH, 'X_test.csv'),
    'x_train_matrix' : os.path.join(CONFIG_PATH, 'X_train_matrix.npy'),
    'x_test_matrix' : os.path.join(CONFIG_PATH, 'X_test_matrix.npy')
}
museum_sequence_path

## Load data

In [None]:
df_x_train = pd.read_csv(museum_sequence_path['x_train'], index_col=0)
df_x_test = pd.read_csv(museum_sequence_path['x_test'], index_col=0)
x_train_matrix = np.load(museum_sequence_path['x_train_matrix'])
x_test_matrix = np.load(museum_sequence_path['x_test_matrix'])
df_x_train.head()

# Config data to fit with the model input


Because the **Prediction feature model** split the data into training and validation dataset, it is necessary to give all the data in only one block

**Define timeline**

In [None]:
time = np.arange(x_train_matrix.shape[0] + x_test_matrix.shape[0])
time.shape

## Split dataset

**Define window size**

In [None]:
split_time = x_train_matrix.shape[0]

X = np.concatenate((x_train_matrix, x_test_matrix))
#Define feature
x_feature = X[:,0:3]

#the length mean average of the tours
window_size = 4

batch_size = 128
shuffle_buffer_size = 300

In [None]:
len(x_feature.shape)

**Create windowed dataset**

In [None]:
from Prediction_model_feature import Windowed_Dataset

In [None]:
dataset = Windowed_Dataset(x_feature,
                           split_time=split_time,
                           window_size=window_size, 
                           shuffle_buffer=shuffle_buffer_size,
                           train_batch_size=batch_size,
                            val_batch_size=batch_size)

In [None]:
train_set = dataset.get_train_dataset()
val_set = dataset.get_val_dataset()

**Plot example**

In [None]:
for x, y in train_set.take(1):
    print(x[0].numpy().shape)
    print(y[0].numpy())
    plot = plot_prediction([x[0][:, 0].numpy(), y[0].numpy()] , 'Sample example')
    #Necessary to save before you plot
    #plot.savefig(os.path.join(PLOT_PATH, 'sample_example.png'))
    plot.show()
    

## Define model

In [None]:
def define_model():

    tf.keras.backend.clear_session()
    tf.random.set_seed(51)
    np.random.seed(51)

    model = tf.keras.models.Sequential([
      tf.keras.layers.Conv1D(filters=32, kernel_size=5,
                          strides=1, padding="causal",
                          activation="relu",
                          input_shape=[window_size, 3]),
      tf.keras.layers.LSTM(32, return_sequences=True),
      tf.keras.layers.LSTM(16),
      tf.keras.layers.Dense(16, activation="relu"),
      tf.keras.layers.Dense(8, activation="relu"),
      tf.keras.layers.Dense(1, name="prediction"),
      tf.keras.layers.Lambda(lambda x: x * 400)
    ],
    name="Sequence_Feature_1")
    
    return model

In [None]:
model = define_model()
model.summary()

**Train with different leraning rates**

In [None]:
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8 * 10**(epoch / 20))
optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

In [None]:
history = model.fit(train_set, epochs=100, callbacks=[lr_schedule])

**Find best learning rate**

In [None]:
import matplotlib.pyplot as plt

plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([1e-8, 1e-4, 0, 60])
plt.title('Learning rate history')
plt.xlabel("Learning rate")
plt.ylabel("Loss")

plt.savefig(os.path.join(PLOT_PATH, 'learning_rate.png'))
plt.show()

**Train with the best learning rate**

In [None]:
model = define_model()

optimizer = tf.keras.optimizers.SGD(lr=1.5e-6, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])
history = model.fit(train_set,
                    epochs=30,
                    validation_data=val_set)

In [None]:
plot = plot_train_history(history, "Train history")
plot.savefig(os.path.join(PLOT_PATH, 'train_history.png'))
plot.show()

## Evaluate model

In [None]:
for x, y in val_set.take(1):
    prediction = model.predict(x)[0]
    plot = plot_prediction([x[0][:, 0].numpy(), y[0].numpy(), prediction[0]] , 'Feature prediction model')
    plot.savefig(os.path.join(PLOT_PATH, 'prediction_single_point.png'))
    plot.show()

**Forecast all future**

In [None]:
def model_forecast(model, series, window_size, batch_size):
    #series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.map(lambda w: (w[:]))
    ds = ds.batch(batch_size)
    forecast = model.predict(ds)
    return forecast

In [None]:
rnn_forecast = model_forecast(model, x_feature, window_size, batch_size)

In [None]:
rnn_forecast = rnn_forecast[split_time-window_size+1:,-1]

In [None]:
rnn_forecast.shape

**Plot serie**

In [None]:
plot = plot_series(time[split_time:], [(rnn_forecast, 'rnn')])
plot.show()

In [None]:
x_valid = x_feature[split_time:]
plot = plot_series(time[split_time:], [(x_valid[:,0], 'x_valid'), (rnn_forecast, 'rnn')])
plot.title('Forescast artwork sequence')
plot.savefig(os.path.join(PLOT_PATH, 'forescast_sequence.png'))

In [None]:
tf.keras.metrics.mean_absolute_error(x_valid[:,0], rnn_forecast).numpy().mean()