In [None]:
# Mount drive to work in google colab
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

import time

# Path to data folder
path_to_data = 'data'

target = 'catalao_A034.csv'
all_data_file = 'alldata.csv'

In [None]:
data_all = pd.read_csv(path_to_data+'/preprocessed/'+all_data_file, sep = ';', index_col=0, low_memory=False, parse_dates=True)
data_target = pd.read_csv(path_to_data+'/preprocessed/'+target, sep = ';', index_col=0, low_memory=False, parse_dates=True)

In [None]:
data_target.columns

In [None]:
select_columns = [
                    'Precipitação',
                    'Temperatura máxima',
                    'Temperatura mínima',
                    'Umidade relativa máxima',
                    'Umidade relativa mínima',
                    'Vento rajada máxima',
]

data_target = data_target.loc[:, select_columns]

In [None]:
data_target

In [None]:
scaler_all = MinMaxScaler(feature_range = (-1, 1))
scaler_target = MinMaxScaler(feature_range = (-1, 1))

scaled_data_all = scaler_all.fit_transform(data_all)
scaled_data_target = scaler_target.fit_transform(data_target)

In [None]:
def windowing(data, target, size=2, horizons=[1]):
  X_data  = []
  y_data  = {}
  bigger_horizon = max(horizons)
  correct_horizontal_time = bigger_horizon - 1

  for horizon in horizons:
    y_data[horizon] = []

  for i in range(size, target.shape[0] - correct_horizontal_time):
    X_data.append(data[i-size:i, :])
    target_y = []
    for horizon in horizons:
      y_data[horizon].append(target[i+horizon-1, :])

  for horizon in horizons:
    y_data[horizon] = np.array(y_data[horizon])

  return np.array(X_data), y_data

In [None]:
window_size = 18
horizons = [1, 2, 3, 4, 5, 6, 12, 24, 48]
data, target = windowing(scaled_data_all, scaled_data_target, window_size, horizons)

In [None]:
data.shape

In [None]:
target[horizons[0]].shape

In [None]:
column_names = list(data_target.columns)

In [None]:
class NeuralNetwork:
  def __init__(self, data, target):
    self.data = data
    self.target = target
  
  def preprocessing(self, horizon=1):
    data_train, data_test, target_train, target_test = train_test_split(self.data, self.target, shuffle=True, train_size=0.8, random_state = 41)
    self.data_splited = {
        'data_train': data_train,
        'data_test': data_test,
        'target_train': target_train,
        'target_test': target_test,
    }

  def train(self):
    data_train = self.data_splited['data_train']
    target_train = self.data_splited['target_train']

    regressor = Sequential()

    regressor.add(LSTM(units = 128, return_sequences = True, input_shape = (data_train.shape[1], data_train.shape[2])))
    regressor.add(Dropout(0.2))

    regressor.add(LSTM(units = 512, return_sequences = True))
    regressor.add(Dropout(0.2))

    regressor.add(LSTM(units = 256))
    regressor.add(Dropout(0.2))

    regressor.add(Dense(units = target_train.shape[1]))

    regressor.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics=['mean_absolute_error'])

    callbacks = [
        EarlyStopping(monitor='loss', patience=10, restore_best_weights=True),
    ]

    history = regressor.fit(data_train, target_train, epochs = 200, batch_size = 128, shuffle=True, callbacks=callbacks, verbose=0, validation_split=0.1)

    self.regressor = regressor

  def predict(self):
    data_test = self.data_splited['data_test']
    target_test = self.data_splited['target_test']

    predicted = self.regressor.predict(data_test)

    self.predicted = scaler_target.inverse_transform(predicted)
    self.target_test = scaler_target.inverse_transform(target_test)

  def print_error(self):
    print('MAE')
    for index, column in enumerate(column_names):
      mae = mean_absolute_error(self.target_test[:, index], self.predicted[:, index])
      print(column+': '+str(mae))

  def print_graphs(self, size=0.5):
    target = self.target_test
    predicted = self.predicted
    for index, column in enumerate(column_names):
      plt.figure(figsize=(30,6))
      target_size = round(target.shape[0] * size)
      plt.plot(target[:target_size, index], color = 'black', label = 'Real')
      predicted_size = round(predicted.shape[0] * size)
      plt.plot(predicted[:predicted_size, index], color = 'blue', label = 'Predito')
      plt.title(column)
      plt.xlabel('Time')
      plt.ylabel(column)
      plt.legend()
      plt.show()


In [None]:
networks = {}
for horizon in horizons:
  network = NeuralNetwork(data, target[horizon])
  network.preprocessing(horizon=horizon)
  networks[horizon] = network

In [None]:
for horizon in horizons:
  network = networks[horizon]
  print(f'Training Horizon {horizon}')
  start_time = time.time()
  network.train()
  end_time = time.time()
  print(f'Trained in: {int(end_time-start_time)}s')

In [None]:
for horizon in horizons:
  network = networks[horizon]
  print(f'Horizon {horizon}')
  network.predict()
  network.print_error()
  print('\n', '-'*50, '\n')


In [None]:
for horizon in horizons:
  network = networks[horizon]
  print(f'Horizon {horizon}')
  network.print_graphs(0.3)
  print('\n', '-'*100, '\n')