# IV Workshop on Data and Knowledge Engineering - INFONOR 2020
## **Título:** Redes neuronales recurrentes de corto y largo plazo


**Expositor:** Dr. Juan Bekios Calfa
**Ayudante:** Sr. Ricardo Pizarro


<pre>Universidad Católica del Norte
Departamento de Ingeniería de Sistemas y Computación
Núcleo de Investigación en Inteligencia Artificial y Data Science
</pre>

Referencia:

https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

##1. Obtener base de datos

Obtenemos la base de datos sugerida en la referencia.

In [None]:
!wget https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv

## 2. Cargamos las librerías necesarias para resolver el problema

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

## 3. Revisamos los datos de pasajeros

Cargamos los datos desde el archivo ``.csv`` y listamos los primeros cinco elementos.



In [None]:
training_set = pd.read_csv('airline-passengers.csv')

# Listar los 'n' primeros elementos
training_set.head()

Visualizamos los datos para ver su comportamiento en el tiempo

In [None]:
training_set = training_set.iloc[:,1:2].values
plt.plot(training_set)
plt.suptitle('Cantidad de pasajeros')

plt.show()

Creamos una ventana deslizante, dada una secuencia de tamaño n se desea predecir el siguiente punto

In [None]:
def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)
#escalamos los datos
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 4
x, y = sliding_windows(training_data, seq_length)

## 4. Seleccionamos los datos de entrenamiento y validación

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

train_size = int(len(y) * 0.67)
val_size = len(y) - train_size

x_train = torch.tensor(x[0:train_size],device=device)
y_train = torch.tensor(y[0:train_size],device=device)
x_val = torch.tensor(x[val_size:],device=device)
y_val = torch.tensor(y[val_size:],device=device)

x = torch.tensor(x,device=device)
y = torch.tensor(y,device=device)

## 5. Modelo

Implementamos el modelo neuronal para una red LSTM.

In [None]:


class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Propagate input through LSTM
        ula, (h_n, _) = self.lstm(x)
        """
        output of shape (batch,seq_len, num_directions * hidden_size): 
          tensor containing the output features (h_t) from the last layer of the LSTM, for each t.
        
        h_n of shape (batch, num_layers * num_directions, hidden_size): tensor containing the hidden state for t = seq_len.

        Like output, the layers can be separated using h_n.view(batch,num_layers, num_directions, hidden_size) and similarly for c_n.

        c_n of shape (batch, num_layers * num_directions, hidden_size): tensor containing the cell state for t = seq_len.


        """
        h_n = h_n.view(-1, self.hidden_size)
        
        out = self.fc(h_n)
        
        return out



## 6. Entrenamos el modelo

In [None]:
num_epochs = 2000
learning_rate = 0.01

input_size = 1
hidden_size = 2
num_layers = 1

num_classes = 1

#por defecto se crea una lstm con precision de 16bit, en pytorch 1.6.0 hay un bug que no permite el uso de 16bit con lstm
#pasamos la red a doble precision
lstm = LSTM(num_classes, input_size, hidden_size, num_layers).to(device).double()


criterion = torch.nn.MSELoss() 
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

# Entrenamiento
for epoch in range(num_epochs):
    outputs = lstm(x_train)
    optimizer.zero_grad()
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

## 7. Evaluamos los resultados

In [None]:
lstm.eval()
train_predict = lstm(x)

data_predict = train_predict.cpu().data.numpy()
dataY_plot = y.cpu().data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)
#Dibujamos una linea roja que nos dira cuando comienza el set de validacion
plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot)
plt.plot(data_predict)
plt.suptitle('Prediccion de la serie de tiempo')
plt.show()