In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
import glob

from RNN import RNN
#from CNN import CNN
from Transformer import Transformer

from utils import series_to_supervised
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [2]:
dataset = pd.read_csv('df_final.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])

# resample data to daily
daily_groups = dataset.resample('H')
daily_data = daily_groups.sum()

# We choose to keep only Global_active_power
to_drop = ['Wind Direction (deg)','h_max','Barometric Pressure (hPa)','Sea Level Pressure (hPa)','Precipitation (mm)']
daily_data.drop(columns=to_drop, inplace=True)

In [3]:
daily_data.head

<bound method NDFrame.head of                      Temperature (°C)  Wind Speed ​​(m/s)  Humidity (%)  \
datetime                                                                  
2021-01-01 00:00:00             -4.30                4.70         57.00   
2021-01-01 01:00:00             -4.80                3.60         57.05   
2021-01-01 02:00:00             -5.20                3.15         59.35   
2021-01-01 03:00:00             -5.10                4.05         60.45   
2021-01-01 04:00:00             -4.80                4.05         59.30   
...                               ...                 ...           ...   
2022-06-30 19:00:00             29.45                3.40         66.20   
2022-06-30 20:00:00             27.70                3.70         72.35   
2022-06-30 21:00:00             26.60                2.80         76.50   
2022-06-30 22:00:00             26.00                3.40         77.85   
2022-06-30 23:00:00             24.75                1.90         83.9

In [4]:

# add calendar-related features
daily_data['hour'] = pd.DatetimeIndex(daily_data.index).hour
daily_data['day'] = pd.DatetimeIndex(daily_data.index).day
daily_data['weekday'] = ((pd.DatetimeIndex(daily_data.index).dayofweek) // 5 == 1).astype(float)
daily_data['season'] = [month%12 // 3 + 1 for month in pd.DatetimeIndex(daily_data.index).month]

# summarize
print(daily_data.info())
#print(daily_data.head())


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 13104 entries, 2021-01-01 00:00:00 to 2022-06-30 23:00:00
Freq: H
Data columns (total 10 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Temperature (°C)              13104 non-null  float64
 1   Wind Speed ​​(m/s)            13104 non-null  float64
 2   Humidity (%)                  13104 non-null  float64
 3   Insolation (MJ/m^2) per hour  13104 non-null  float64
 4   Sunlight (Sec) per hour       13104 non-null  float64
 5   P(kWh)                        13104 non-null  float64
 6   hour                          13104 non-null  int64  
 7   day                           13104 non-null  int64  
 8   weekday                       13104 non-null  float64
 9   season                        13104 non-null  int64  
dtypes: float64(7), int64(3)
memory usage: 1.1 MB
None


In [5]:
look_back = 168
n_features = daily_data.shape[1]
n_features

10

In [6]:

# Walk-forward data split to avoid data leakage
X_train, y_train, X_test, y_test, scale_X = series_to_supervised(daily_data, train_size=0.8, n_in=look_back, n_out=24, target_column='P(kWh)', dropnan=True, scale_X=True)

# reshape input to be 3D [samples, timesteps, features]
X_train_reshaped = X_train.values.reshape((-1,look_back,n_features))
X_test_reshaped = X_test.values.reshape((-1,look_back,n_features))

y_train_reshaped = y_train.values
y_test_reshaped = y_test.values

In [7]:
X_train_reshaped.shape

(10330, 168, 10)

In [8]:
rnn = RNN()
rnn.train(X_train_reshaped,y_train_reshaped)



Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
184               |?                 |input_unit
6                 |?                 |num_layers
248               |?                 |units_0
relu              |?                 |act_0
120               |?                 |units_1
tanh              |?                 |act_1
376               |?                 |layer_2_neurons
0.3               |?                 |Dropout_rate

Epoch 1/5

In [None]:
_, rmse_result, mae_result, smape_result, r2_result = rnn.evaluate(X_test_reshaped,y_test_reshaped)



In [None]:

print('Result \n RMSE = %.2f [kWh] \n MAE = %.2f [kWh]\n R2 = %.1f [%%]' % (rmse_result,
                                                                            mae_result,
                                                                            r2_result*100))

Result 
 RMSE = 24.63 [kWh] 
 MAE = 18.62 [kWh]
 R2 = 17.8 [%]
