# Table of contents
1. [Requirements](#Requirements)
2. [Introduction](#Introduction)
3. [Imports](#Imports)
    1. [Libraries](#Libraries)
    2. [Data](#Data)
4. [Data Exploration](#data-exploration)
5. [Modelling](#modelling)
    1. [Baseline](#baseline)
    2. [LSTM](#lstm)
6. [Results Analysis](#results-analysis)

# Requirements

In [1]:
!pip install keras-tuner

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-tuner
  Downloading keras_tuner-1.1.3-py3-none-any.whl (135 kB)
[K     |████████████████████████████████| 135 kB 4.6 MB/s 
Collecting kt-legacy
  Downloading kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.2-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 41.2 MB/s 
Installing collected packages: jedi, kt-legacy, keras-tuner
Successfully installed jedi-0.18.2 keras-tuner-1.1.3 kt-legacy-1.0.4


## Introduction

## Imports

### Libraries


In [2]:
import sympy
import pandas as pd
import tensorflow as tf
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential
import numpy as np
import kerastuner as kt
from kerastuner.engine.hyperparameters import HyperParameters
from google.colab import drive
from datetime import datetime

  import kerastuner as kt


### Data

In [18]:
# Mount the drive
drive.mount("/content/drive")

# Load the data
path = "/content/drive/MyDrive/IMS DLNN/Projeto/" # Deve apontar para as pastas do dataset na drive

hourly_energy_consumption = pd.read_csv(path + "consumption.csv")
hourly_weather = pd.read_csv(path + "weather.csv")



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Data Exploration

In [19]:
hourly_energy_consumption["utc_timestamp"] = pd.to_datetime(hourly_energy_consumption["utc_timestamp"])

# As seguintes colunas estão todas NaN, não são uteis
hourly_energy_consumption.drop([
    "cet_cest_timestamp",
    "PT_wind_generation_actual",
    "PT_wind_offshore_generation_actual",
], axis=1, inplace=True)

hourly_energy_consumption["PT_load_actual_entsoe_transparency"] = hourly_energy_consumption["PT_load_actual_entsoe_transparency"].bfill()

In [20]:
hourly_energy_consumption.shape

(43823, 5)

In [21]:
hourly_energy_consumption.head()

Unnamed: 0,utc_timestamp,PT_load_actual_entsoe_transparency,PT_load_forecast_entsoe_transparency,PT_solar_generation_actual,PT_wind_onshore_generation_actual
0,2015-01-01 00:00:00+00:00,5123.9,,,
1,2015-01-01 01:00:00+00:00,5123.9,4820.0,,551.0
2,2015-01-01 02:00:00+00:00,4771.1,4521.0,,596.5
3,2015-01-01 03:00:00+00:00,4443.5,4250.0,,706.3
4,2015-01-01 04:00:00+00:00,4234.9,4083.0,,720.5


In [22]:
hourly_weather["utc_timestamp"] = pd.to_datetime(hourly_weather["utc_timestamp"])

In [23]:
hourly_weather.shape

(43824, 4)

In [24]:
hourly_weather.head()

Unnamed: 0,utc_timestamp,PT_temperature,PT_radiation_direct_horizontal,PT_radiation_diffuse_horizontal
0,2015-01-01 00:00:00+00:00,5.584,0.0,0.0
1,2015-01-01 01:00:00+00:00,5.219,0.0,0.0
2,2015-01-01 02:00:00+00:00,5.034,0.0,0.0
3,2015-01-01 03:00:00+00:00,4.708,0.0,0.0
4,2015-01-01 04:00:00+00:00,4.353,0.0,0.0


In [None]:
def load_data(energy_cons, weather):
  final_df = pd.merge(energy_cons, weather, on="utc_timestamp")


# Split the data
X_train, y_train, X_val, y_val = load_data(hourly_energy_consumption, hourly_weather)

## Modelling

In [None]:
# Aqui tratar de qualquer operação ou criação de variáveis que sejam
# necessárias para o processo de modelação de DL.
# Number of samples
n_samples = len(hourly_energy_consumption)

# Number of time steps
n_timesteps = 24
# Number of features
n_features = 3
# Reshape the data into a 3D array to feed the Neural Netwokrs
X = np.empty((n_samples, n_timesteps, n_features))
X[:, :, 0] = hourly_energy_consumption
X[:, :, 1] = hourly_temperature
X[:, :, 2] = hourly_radiation

### Baseline
        Persistence

A persistência é o método de baseline mais condiserado e que, como o nome indica, considera que o valor para o futuro é igual à ultima observação. Pode ser denotado pela seguinte equação:

$ T_{t+1} = T_{t} $ 

In [None]:
# creating the persistence matrix
# df.merged.shape should be Amount of predictions we want to gather X 24
persistence_forecasts=np.zeros((dfmerged.shape),dtype=float)
for i in range(len(dfmerged)):
    persistence_forecasts.iloc[i,:]=dfmerged.iloc[i,-1]

### LSTM

In [None]:
# Define the model
def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units', min_value=32, max_value=256, step=32), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
    model.add(Dense(24))
    
    # Choose an optimizer
    optimizer = hp.Choice('optimizer', ['adam', 'sgd','rmsprop'])
    if optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
    elif optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
    else:    
        optimizer = tf.keras.optimizers.RMSprop(
            hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log'))
        
    # Compile the model
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model
# Define the search space for Keras Tuner
hps = HyperParameters()
hps.Choice('batch_size', [32, 64, 128, 256])
hps.Choice('activation', ['relu', 'tanh','sigmoid'])
# Use the Keras Tuner to search for the best set of hyperparameters
tuner = kt.Hyperband(build_model, hps)
tuner.search(X_train, y_train, epochs=100,batch_size=1, validation_data=(X_val, y_val))
# Get the best model from the search
best_model = tuner.get_best_model()
# Use the best model to make predictions on the test set
y_pred = best_model.predict(X_test)


## Results Analysis