In [60]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import seaborn as sns
import custom_module as cm

In [61]:
# Loading the dataset
data = pd.read_csv("/Users/ahsan/Dropbox/My Mac (Alnoors-MBP-2.hub)/Downloads/combined_data.csv", parse_dates=True)

In [62]:
data["SETTLEMENTDATE"] = pd.to_datetime(data["SETTLEMENTDATE"])
data.index = data["SETTLEMENTDATE"]
data.drop(columns="SETTLEMENTDATE", inplace=True)

In [63]:
# The Real Test Data Set To Test Later #
X_test = data["2019-01-01":"2019-06-30"].copy()
X_test = X_test["RRP5MIN"]

In [64]:
# replace outliers by outlier threshold
data = cm.replace_outliers(data, 'RRP5MIN', 5)

In [65]:
data

Unnamed: 0_level_0,RRP5MIN,RESIDUAL_DEMAND
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-10-20 00:00:00,1.963450,1296.50
2010-10-20 00:05:00,1.955553,1275.46
2010-10-20 00:10:00,1.946951,1268.40
2010-10-20 00:15:00,1.953207,1267.73
2010-10-20 00:20:00,1.835252,1261.69
...,...,...
2019-06-30 23:40:00,5.998145,546.95
2019-06-30 23:45:00,5.961220,560.19
2019-06-30 23:50:00,5.066510,559.87
2019-06-30 23:55:00,5.002401,547.07


In [66]:
# Scaling the RRP between 0 and 1 as required by the NN
scaler = MinMaxScaler()
data["RRP5MIN"] = scaler.fit_transform(data["RRP5MIN"].values.reshape(-1,1))

In [67]:
train = data.loc["2017":].copy()

In [68]:
# include time lags of timeseries data for 3 days = 864
# We will use 3 days data to identify patterns to predict the next day

lags = 864
for i in range(1,lags+1):
    train["l_{}".format(i)] = train["RRP5MIN"].shift(i)

In [69]:
# Drop NANS
train.dropna(inplace=True)
train.tail(5)

Unnamed: 0_level_0,RRP5MIN,RESIDUAL_DEMAND,l_1,l_2,l_3,l_4,l_5,l_6,l_7,l_8,...,l_855,l_856,l_857,l_858,l_859,l_860,l_861,l_862,l_863,l_864
SETTLEMENTDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-06-30 23:40:00,0.532301,546.95,0.535514,0.50441,0.500036,0.497298,0.517292,0.504057,0.498771,0.472195,...,0.475404,0.0,0.385198,0.459936,0.459342,0.459893,0.437942,0.468562,0.467259,0.478711
2019-06-30 23:45:00,0.531433,560.19,0.532301,0.535514,0.50441,0.500036,0.497298,0.517292,0.504057,0.498771,...,0.464692,0.475404,0.0,0.385198,0.459936,0.459342,0.459893,0.437942,0.468562,0.467259
2019-06-30 23:50:00,0.510394,559.87,0.531433,0.532301,0.535514,0.50441,0.500036,0.497298,0.517292,0.504057,...,0.463457,0.464692,0.475404,0.0,0.385198,0.459936,0.459342,0.459893,0.437942,0.468562
2019-06-30 23:55:00,0.508886,547.07,0.510394,0.531433,0.532301,0.535514,0.50441,0.500036,0.497298,0.517292,...,0.477038,0.463457,0.464692,0.475404,0.0,0.385198,0.459936,0.459342,0.459893,0.437942
2019-07-01 00:00:00,0.510313,570.31,0.508886,0.510394,0.531433,0.532301,0.535514,0.50441,0.500036,0.497298,...,0.467286,0.477038,0.463457,0.464692,0.475404,0.0,0.385198,0.459936,0.459342,0.459893


In [70]:
# create feature and label dataframes
prelim_features = train.drop(['RRP5MIN', 'RESIDUAL_DEMAND'], axis=1)
prelim_labels = pd.DataFrame(train[['RRP5MIN']])


In [71]:
# format labels to 24 hour output range
for i in range(0, 288):
    prelim_labels['t_{}'.format(i)] = prelim_labels['RRP5MIN'].shift(-i)
prelim_labels.drop(['RRP5MIN'], axis=1, inplace=True)

# apply one-day discretization to the data
labels = prelim_labels[prelim_labels.index.minute == 0]
labels = labels[labels.index.hour == 0]
features = prelim_features[prelim_features.index.minute == 0]
features = features[features.index.hour == 0]

features_train = features[:'2018']
features_test = features['2019':'2019-06-30']
labels_train = labels[:'2018']

samples_train = len(features_train)
samples_test = len(features_test)
timesteps = 864

# convert pandas data frames to numpy ndarrays
features_train = features_train.to_numpy().reshape(samples_train, timesteps, 1)
features_test = features_test.to_numpy().reshape(samples_test, timesteps, 1)
labels_train = labels_train.to_numpy()

# check for correct data shape
features_train.shape, labels_train.shape

((727, 864, 1), (727, 288))

In [72]:
from keras.models import Model, load_model
from keras.layers.convolutional import Conv1D
from keras.callbacks import ModelCheckpoint
from keras.regularizers import l1_l2

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import json

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [73]:
# split into training and validation data
X_train, X_valid, y_train, y_valid = train_test_split(features_train, labels_train, test_size=0.2, random_state=7)

In [74]:
###################### DESIGNING THE NN ###################
##########################################################
## 1D Convolution layer to avoid overfitting
## 3 layers of LSTM considering the complexity of the dataset

# Initialising
rnn = Sequential()
# Adding Conv1D Layer
#rnn.add(Conv1D(64, kernel_size=288, strides=288, padding='valid', input_shape=(X_train.shape[1],1)))
# Add LSTM layer 1st
rnn.add(LSTM(128, recurrent_activation='relu', return_sequences=True))
rnn.add(Dropout(0.1))
# Add LSTM layer 2nd
rnn.add(LSTM(64, recurrent_activation='relu'))
rnn.add(Dropout(0.1))
rnn.add(Dense(units=288))
rnn.compile(optimizer='adam', loss='mse')

In [None]:
# train the model and calculate the performance on the test set
results, hist = cm.train_predict_evaluate(rnn, X_train, X_valid, y_train, y_valid, features_test,
                                       X_test.to_numpy().flatten(), X_test.index, scaler, 32, 200, 
                                       'simple_neural_network.hdf5', verbose=1)

Epoch 1/200

In [None]:
f, ax = plt.subplots(figsize=(12, 6))
results.loc['2019-01-01':'2019-01-04'].plot(ax=ax);
ax.set_ylabel('Day-Ahead price in $/MWh', fontsize=14)
ax.set_xlabel('Date', fontsize=14)
ax.set_title('Short Term predictive Performance', fontsize=14);

In [None]:
results

In [None]:
cm.quantify_performance(results)