In [121]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

# extract holidays from file
holiday_path = '../data/australian-public-holidays-combined-2021-2024.csv'
holiday_df = pd.read_csv(holiday_path, dtype='str')
holiday_df['Date'] = holiday_df['Date'].astype('datetime64[ns]')

holidays = holiday_df.loc[
    holiday_df['Jurisdiction'] == 'sa', 
    ['Date', 'Holiday Name']]

# import and add features
data_path = '../data/sa/merged.csv'
df = pd.read_csv(data_path)
df['datetime'] = df['datetime'].astype('datetime64')
dt = df['datetime'].dt
df['year'] = dt.year
df['month'] = dt.month
df['day'] = dt.day
df['hour'] = dt.hour
df['minute'] = dt.minute
df['day_of_week'] = dt.day_of_week
df['week'] = dt.isocalendar().week
df['holiday'] = dt.date.isin(holidays['Date'].dt.date).astype('int')

df = df[df['datetime'] >= '2021-01-01'].reset_index(drop=True)
df.head()

Unnamed: 0,datetime,tempc,cloud8,windk,wdir,humid,rainmm,radkjm2,pv_est,net_load,total_load,year,month,day,hour,minute,day_of_week,week,holiday
0,2021-01-01 00:00:00,18.2,0.0,8.0,110.0,61.0,0.0,0.0,0.0,1409.0,1181.115,2021,1,1,0,0,4,53,1
1,2021-01-01 01:00:00,17.9,1.0,7.0,100.0,61.0,0.0,0.0,0.0,1430.0,1203.7,2021,1,1,1,0,4,53,1
2,2021-01-01 02:00:00,18.4,2.0,2.0,40.0,54.0,0.0,0.0,0.0,1297.5,1089.315,2021,1,1,2,0,4,53,1
3,2021-01-01 03:00:00,16.0,4.0,5.0,290.0,68.0,0.0,0.0,0.0,1235.5,1055.67,2021,1,1,3,0,4,53,1
4,2021-01-01 04:00:00,17.6,5.0,2.0,280.0,75.0,0.0,0.0,0.0,1185.5,1033.13,2021,1,1,4,0,4,53,1


In [122]:
from sklearn.preprocessing import StandardScaler

train_split = df.index[df['datetime'] >= '2023-01-01'].min()
learning_rate = 0.001
batch_size = 24*7
epochs = 10

selected_features = [ 'net_load',
    'tempc', 'cloud8', 'windk', 'wdir', 'humid', 
    'rainmm', 'radkjm2', 'year', 'month', 'day',
    'hour', 'day_of_week', 'week']
features = df[selected_features].astype('float').fillna(0)
features = StandardScaler(features.values)
features = pd.DataFrame(features)
features[14] = df['holiday'].values
features.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0.221351,0.312995,-1.879236,-0.304198,-0.494113,-0.223635,-0.257309,-0.70294,-1.0,-1.602745,-1.673503,-1.661325,0.498202,1.755417,1
1,0.27695,0.257788,-1.465537,-0.510099,-0.594978,-0.223635,-0.257309,-0.70294,-1.0,-1.602745,-1.673503,-1.516862,0.498202,1.755417,1
2,-0.073852,0.3498,-1.051839,-1.539606,-1.200169,-0.605058,-0.257309,-0.70294,-1.0,-1.602745,-1.673503,-1.372399,0.498202,1.755417,1
3,-0.238001,-0.091856,-0.224441,-0.921902,1.321461,0.157788,-0.257309,-0.70294,-1.0,-1.602745,-1.673503,-1.227936,0.498202,1.755417,1
4,-0.370379,0.202581,0.189258,-1.539606,1.220596,0.539211,-0.257309,-0.70294,-1.0,-1.602745,-1.673503,-1.083473,0.498202,1.755417,1
