# About
Univariate time series are datasets comprised of a single series of observations with a temporal ordering and a model is required to learn from the series of past observations to predict the next value in the sequence.

## Note:
* The chosen configuration of the models is arbitrary and not optimized for each problem; that was not the goal.

# Libraries

In [1]:
%run "/home/cesar/Python_NBs/HDL_Project/HDL_Project/global_fv.ipynb"

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import mysql.connector

import numpy as np 
import pandas as pd

from sklearn.model_selection import train_test_split

from numpy import array
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

# User-Defined Functions

In [3]:
def split_sequence(sequence, n_steps):
    """
    Transforming a univariate time series into a supervised learning problem.

    We can divide a sequence into multiple input/output patterns called samples, where n
    time steps are used as input and 1 time step is used as output for the one-step prediction
    that is being learned.
    """
    
    # Defining variable lists 
    X, y = list(), list()
    
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
            
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [4]:
def reshaping_1D(X, n_features):
    return X.reshape((X.shape[0], X.shape[1], n_features))

In [5]:
def quick_test(model, test_id, X, n_steps, n_features):
    x_input = X[test_id]
    x_input = x_input.reshape((1, n_steps, n_features))
    # yhat
    return model.predict(x_input, verbose=0)

# Data

## Input data and parameters

In [7]:
# ----------------------
# ----- Parameters -----
# ----------------------
# A simple linear input sequence
raw_seq = qdata("SELECT Monterrey FROM `HDL_PM2d5`")
# choose a number of time steps
n_steps = 5
# Number of features (Univariate example)
n_features = 1

## Data preparation

In [8]:
# ----------------------
# ------ Command -------
# ----------------------
# Splittin data into training samples
X, y = split_sequence(raw_seq, n_steps)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)

# Reshaping X from [samples, timesteps] into [samples, timesteps, features]
X_train = reshaping_1D(X_train, n_features)
# ----------------------
# --- Visualization ----
# ----------------------
#for i in range(2):
#    print(X[i], y[i])

# Bi-LSTM training

In [16]:
# ----------------------
# ------ Command -------
# ----------------------
# We can define a 1D CNN Model for univariate time series forecasting as follows:
model = Sequential()
model.add(Bidirectional(LSTM(50, activation= 'tanh' ), input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer= 'adam' , loss= 'mse' )

# fit model
model.fit(X_train, y_train, epochs=1000, verbose=0)

<keras.callbacks.History at 0x7fa4bc400e80>

In [17]:
test_id = 0
    
quick_test(model, test_id, X_test, n_steps, n_features)    

array([[29.84586]], dtype=float32)

# Error Metrics

In [18]:
test = model.predict(X_test, verbose=0)

## RMSE

In [19]:
mean_squared_error(y_test, test, squared=False)

5.895223235234297

## MAE

In [20]:
mean_absolute_error(y_test, test)

2.370067773316069