<a href="https://colab.research.google.com/github/FrancLis/Multivariate-Time-Series-Forecasting/blob/main/Copia_di_Multivariate_Timeefssssseries_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install fast_ml
!pip install talos
!pip install kats
!pip install scipy

In [3]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value = 0

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED'] = str(seed_value)

import numpy as np
import tensorflow as tf
import random as python_random

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(123)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
python_random.seed(123)

# The below set_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/random/set_seed
tf.random.set_seed(1234)

import seaborn as sns
import pandas as pd
import talos as ta
from matplotlib import pyplot as plt
from fast_ml.model_development import train_valid_test_split
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler, PowerTransformer, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, max_error, mean_absolute_error
from tensorflow.keras import Sequential, layers, callbacks
from tensorflow.keras.layers import Input, Dense, LSTM, Dropout, GRU, Bidirectional, SimpleRNN, Conv1D, MaxPooling1D, Flatten


## 1. Data Acquisition

In [4]:
# Read Csv
file = r"/content/PG.csv"
df = pd.read_csv(file, parse_dates=['Date'], index_col='Date')
plt.style.use('seaborn')

## 2. Data Visualization...

## 3. Data prepocessing

#### 2.2 Data Splitting

In [13]:
# Fast_ml library
X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df, target='Close', method='sorted',
                                                                            sort_by_col='Date', train_size=0.6,
                                                                            valid_size=0.2, test_size=0.2)
print('X_train.shape:', X_train.shape, 'y_train.shape:', y_train.shape)
print('X_valid.shape:', X_valid.shape, 'y_valid.shape:', y_valid.shape)
print('X_test.shape:', X_test.shape, 'y_test.shape:', y_test.shape)

X_train.shape: (7887, 5) y_train.shape: (7887,)
X_valid.shape: (2629, 5) y_valid.shape: (2629,)
X_test.shape: (2629, 5) y_test.shape: (2629,)


#### 2.3 Data Transformation

In [14]:
# Normalization
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

'''
# Other transformers
# StandardScaler
st_scaler = StandardScaler()
X_train = st_scaler.fit_transform(X_train)
X_test = st_scaler.transform(X_test)

# PowerTransformer
pt = PowerTransformer()
X_train = pt.fit_transform(X_train)
X_test = pt.transform(X_test)
'''


'\n# Other transformers\n# StandardScaler\nst_scaler = StandardScaler()\nX_train = st_scaler.fit_transform(X_train)\nX_test = st_scaler.transform(X_test)\n\n# PowerTransformer\npt = PowerTransformer()\nX_train = pt.fit_transform(X_train)\nX_test = pt.transform(X_test)\n'

#### 2.4 Set "***Window size***"

In [None]:
# Create a 3D input
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:i + time_steps, :]
        Xs.append(v)
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)


TIME_STEPS = 10
X_test, y_test = create_dataset(X_test, y_test, TIME_STEPS)
X_train, y_train = create_dataset(X_train, y_train, TIME_STEPS)
X_valid, y_valid = create_dataset(X_valid, y_valid, TIME_STEPS)

print('All shapes are: (batch, time, features)')
print('X_train.shape:', X_train.shape, 'y_train.shape:', y_train.shape)
print('X_valid.shape:', X_valid.shape, 'y_valid.shape:', y_valid.shape)
print('X_test.shape:', X_test.shape, 'y_test.shape:', y_test.shape)

All shapes are: (batch, time, features)
X_train.shape: (1490, 10, 5) y_train.shape: (1490,)
X_valid.shape: (490, 10, 5) y_valid.shape: (490,)
X_test.shape: (490, 10, 5) y_test.shape: (490,)


## 3. Model Choice and Learning

In [None]:
# from talos.utils import hidden_layers

In [None]:
def network_shape(params, last_neuron, network_type):

    '''Provides the ability to include network shape in experiments. If params
    dictionary for the round contains float value for params['shapes'] then
    a linear contraction towards the last_neuron value. The higher the value,
    the fewer layers it takes to reach lesser than last_neuron.
    Supports three inbuilt shapes 'brick', 'funnel', and 'triangle'.
    params : dict
         Scan() params for a single roundself.
    last_neuron : int
         Number of neurons on the output layer in the Keras model.
    '''
    import numpy as np
    from talos.utils.exceptions import TalosParamsError

    layers = params['hidden_layers']
    shape = params['shapes']
    # network_type == 0 --> SimpleRNN
    # network_type == 1 --> GRU
    # network_type == 2 --> LSTM
    # network_type == 3 --> CONV1D
    if network_type == 3:
      first_neuron = params['first_filter']
    else:
      first_neuron = params['first_neuron']

    out = []
    n = first_neuron

    # the case where hidden_layers is zero
    if layers == 0:
        return [0]

    # the cases where an angle is applied
    if isinstance(shape, float):

        for i in range(layers):

            n *= 1 - shape

            if n > last_neuron:
                out.append(int(n))
            else:
                out.append(last_neuron)

    # the case where a rectantular shape is used
    elif shape == 'brick':
        out = [first_neuron] * layers

    elif shape == 'funnel':
        for i in range(layers + 1):
            n -= int((first_neuron - last_neuron) / layers)
            out.append(n)
        out.pop(-1)

    elif shape == 'triangle':
        out = np.linspace(first_neuron,
                          last_neuron,
                          layers+2,
                          dtype=int).tolist()

        out.pop(0)
        out.pop(-1)
        out.reverse()

    else:
        message = "'shapes' must be float or in ['funnel', 'brick', 'triangle']"
        raise TalosParamsError(message)

    return out


def hidden_layers(model, params, last_neuron, network_type):
    '''HIDDEN LAYER Generator

    NOTE: 'shapes', 'first_neuron', 'dropout', and 'hidden_layers' need
    to be present in the params dictionary.

    Hidden layer generation for the cases where number
    of layers is used as a variable in the optimization process.
    Handles things in a way where any number of layers can be tried
    with matching hyperparameters.'''

    # check for the params that are required for hidden_layers

    from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, SimpleRNN, GRU, LSTM
    # from .network_shape import network_shape
    from talos.utils.exceptions import TalosParamsError

    if network_type == 0:
      required = ['shapes', 'first_neuron', 'dropout', 'hidden_layers', 'rnn_units']
    elif network_type == 1:
      required = ['shapes', 'first_neuron', 'dropout', 'hidden_layers', 'gru_units']
    elif network_type == 2:
      required = ['shapes', 'first_neuron', 'dropout', 'hidden_layers', 'lstm_units']
    elif network_type == 3:
      required = ['shapes', 'first_filter', 'dropout', 'hidden_layers', 'kernel_size', 'activation', 'filter']



    for param in required:
        if param not in params:
            message = "hidden_layers requires '" + param + "' in params"
            raise TalosParamsError(message)

    layer_neurons = network_shape(params, last_neuron, network_type)
    # network_type == 0 --> SimpleRNN
    # network_type == 1 --> GRU
    # network_type == 2 --> LSTM
    # network_type == 3 --> CONV1D

    if network_type == 0:
        for i in range(params['hidden_layers']):
            if params['hidden_layers'] == 0:
                model.add(SimpleRNN(layer_neurons[i], return_sequences=False,))
            else:
                if i == params['hidden_layers'] - 1:
                    model.add(SimpleRNN(layer_neurons[i], return_sequences=False,))
                else:
                    model.add(SimpleRNN(layer_neurons[i], return_sequences=True,))
    elif network_type == 1:
         for i in range(params['hidden_layers']):
          model.add(GRU(layer_neurons[i], return_sequences=True))
    elif network_type == 2:
         for i in range(params['hidden_layers']):
          model.add(LSTM(layer_neurons[i], return_sequences=True))
    elif network_type == 3:
      for i in range(params['hidden_layers']):

          model.add(Conv1D(layer_neurons[i],
                          kernel_size=params.get('kernel_size'),
                           padding = 'same', activation='relu'))
          model.add(MaxPooling1D(pool_size=2, padding='same'))
          model.add(Dropout(params['dropout']))
    else:
     message = "Model not supported"
     raise TalosParamsError(message)



Topology shapes are package-specific names where ‘brick’ assigns the
same number of neurons in each layer, ‘triangle’ decreases the number of
neurons by a constant number with each layer so that the shape resembles a
triangle, and ‘funnel’ decreases the number of neurons by the floor of the
difference between the specified number of neurons in the first layer and last
layer divided by the number of desired hidden layers, resulting in a funnel
shape.

#### 3.1 Simple Recurrent Neural Network (RNN)

In [None]:
def SimpleRNN_fn(x_train, y_train, x_val, y_val, params):
	  # Step 1: reset the tensorflow backend session.
    tf.keras.backend.clear_session()
    # Step 2: Define the model with variable hyperparameters.
    dropout = float(params['dropout'])
    lr = float(params['lr'])
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.SimpleRNN(params['rnn_units'], return_sequences=True,
                     input_shape=(X_train.shape[1], X_train.shape[2])))
    hidden_layers(model, params, 1, 0)
    model.add(tf.keras.layers.Dropout(dropout))
    model.add(tf.keras.layers.Dense(1, activation='linear',))
    
    model.compile(
        loss='mse',
        optimizer = params['optimiz'](learning_rate=lr),
        # metrics=['mse']
    )

    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                  patience=5)
    
    history = model.fit(
        x_train, y_train, 
        epochs=params['epochs'], 
        batch_size=params['batch_size'], 
        verbose=0,
        validation_data=[x_val, y_val],
        callbacks=[stop_early], 
    )

    return history, model, 

from tensorflow.keras.optimizers import Adam, Nadam
para = {
    'batch_size': [1250],  # 100, 500, 1250,
    'epochs': [2],  # 100, 200
    'shapes': ['funnel', 'brick', 'triangle'],  # <<< required
    'first_neuron': [64],  # 64,                   # <<< required
    'hidden_layers': [2, 4],  # 2, 3                   # <<< required
    'dropout': [0, ],  # 0.25                         # <<< required
    'rnn_units': [128],  # 128                       # <<< required
    'optimiz': [Nadam, Adam],
    'lr': [0.001]
}

scan_results_SimpleRNN = ta.Scan(x=X_train,
                 y=y_train,
                 params=para,
                 model=SimpleRNN_fn,
                 experiment_name='HyperparameterTuning_SimpleRNN',
                 x_val=X_valid,
                 y_val=y_valid,
                 disable_progress_bar=False, print_params=True)

## 4. Model Selection

In [None]:
scan_results_SimpleRNN.details

experiment_name        HyperparameterTuning_SimpleRNN
random_method                        uniform_mersenne
reduction_method                                 None
reduction_interval                                 50
reduction_window                                   20
reduction_threshold                               0.2
reduction_metric                              val_acc
complete_time                          02/07/22/17:54
x_shape                                 (1490, 10, 5)
y_shape                                       (1490,)
dtype: object

In [None]:
ta.Reporting(scan_results_SimpleRNN)

<talos.commands.analyze.Analyze at 0x7febe81c13d0>

In [None]:
r = ta.Reporting('/content/HyperparameterTuning_SimpleRNN/020722111217.csv')

# returns the results dataframe
r.data.sort_values(by=['val_loss'], ascending=True)
#r.plot_kde

# returns the highest value for 'val_fmeasure'
#r.high('mse')

# returns the number of rounds it took to find best model
#r.rounds2high()

# draws a histogram for 'val_acc'
#r.plot_hist()


FileNotFoundError: ignored

In [None]:
# get correlation for hyperparameters against a metric
r.correlate('val_loss', ['epochs', 'loss', 'val_loss'])

In [None]:
r.plot_corr('val_loss', ['loss', 'val_loss', 'epochs', 'rnn_units', 'dropout'])

In [None]:
# Get the best model index with highest 'val_categorical_accuracy' 
model_id = scan_results_SimpleRNN.data['val_loss'].astype('float').argmin() - 0

model_id

In [None]:
# Clear any previous TensorFlow session.
tf.keras.backend.clear_session()

# Load the model parameters from the scanner.
from tensorflow.keras.models import model_from_json
model = model_from_json(scan_results_SimpleRNN.saved_models[model_id])
model.set_weights(scan_results_SimpleRNN.saved_weights[model_id])
model.summary()
model.save('./best_model')

# Load that model
# my_tf_saved_model = tf.keras.models.load_model('./saved_models/my_tf_model')
# my_tf_saved_model.summary()

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True)

In [None]:
model_history = scan_results_SimpleRNN.round_history[model_id]

# LOSS CURVE
# Plot train loss and validation loss
def plot_loss(history):
    plt.figure(figsize=(10, 6))
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.ylabel('Loss')
    plt.xlabel('epoch')
    plt.legend(['Train loss', 'Validation loss'], loc='upper right')
    # plt.show()


plot_loss(model_history)

## 5. Model Prediction

In [None]:
# Make prediction
def prediction(model):
    prediction = model.predict(X_test)
    return prediction


prediction_simpleRNN = prediction(model)

# prediction_simpleRNN.shape

# Plot true future vs prediction
def plot_future(prediction, y_test):
    plt.figure(figsize=(10, 6))
    range_future = len(prediction)
    plt.plot(np.arange(range_future), np.array(y_test),
             label='True Future')
    plt.plot(np.arange(range_future), np.array(prediction),
             label='Prediction')
    plt.legend(loc='upper left')
    plt.xlabel('Time (day)')
    plt.ylabel('Daily water consumption ($m^3$/capita.day)')
    # plt.show()


plot_future(prediction_simpleRNN, y_test)

In [None]:
# Define a function to calculate MAE and RMSE
def evaluate_prediction(predictions, actual, model_name):
    rsme = np.sqrt((mean_squared_error(predictions, actual)))
    mae = mean_absolute_error(actual, predictions)
    r2 = r2_score(actual, predictions)
    max_err = max_error(actual, predictions)

    print(model_name + ':')
    print('R^2: {:.4f}'.format(r2))
    print('Mean Absolute Error: {:.4f}'.format(mae))
    print('Root Mean Square Error: {:.4f}'.format(rsme))
    print('Max_error: {:.4f}'.format(max_err))
    print('')


evaluate_prediction(prediction_simpleRNN, y_test, 'LSTM')