In [23]:
from func.datePoint import datePoint
from func.util import getProfile, getDateList, getDateInput, readGridDataFromFile
from func.valueMaps import ValueMap

from datetime import datetime
import json
import math
import random
import time

import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from joblib import dump, load

In [19]:
def calculateRMSE(predicted, observed):
    if len(predicted) != len(observed):
        print("ERROR, two lists aren't the same length")
        
    RMSE = 0
    for valIndex, val in enumerate(predicted):
        RMSE = float(RMSE + (predicted[valIndex] - observed[valIndex])**2)
        
    RMSE = RMSE/len(predicted)
    return RMSE ** 0.5

In [3]:
def getTrainingRow(datePoint):
    row = [
        datePoint.dayoftheyear,
        datePoint.temperature,
        datePoint.irradiance,
        datePoint.pressure,
        datePoint.rainfall,
        datePoint.energyData["au.nem.nsw1.demand.energy (GWh)"]
    ]
    return row

In [4]:
def predict(xValue, scalerX, scalerY, svr):
    transformedVal = scalerX.transform(xValue)
    predictedVal = svr.predict(transformedVal)
    val = scalerY.inverse_transform(predictedVal)
    return val

In [5]:
%matplotlib qt 

In [6]:
profileName = "SRRPeriod"

with open("./config/download.json", "r", encoding="utf-8") as dataProfileFile:
    configData = json.load(dataProfileFile)

configData = configData[profileName]
dateList = getDateList(datetime.strptime(configData["startDate"], "%d/%m/%Y"), datetime.strptime(configData["endDate"], "%d/%m/%Y"))

for dateIndex, date in enumerate(dateList):
    dateList[dateIndex] = datePoint(date)

dataTypeCount = len(configData["dataTypes"])

In [7]:
startTime = time.time()
# Load data about the energy grid
print("Loading Grid Data")
if "grid" in configData["dataTypes"]:
    dataTypeCount = dataTypeCount - 1
    with open(f"./data/processed/grid/{profileName}.csv", "r") as gridDataFile:
        gridLines = gridDataFile.readlines()
        gridLabels = gridLines[0].split(",")[1:]
        

        for date in dateList:
            for rowIndex, row in enumerate(gridLines):
                if row.split(",")[0] == date.getDateStr():
                    dataArr = {}
                    for valIndex, val in enumerate(gridLabels):
                        dataArr[val] = float(row.split(",")[valIndex+1])
                    date.energyData = dataArr
                    gridLines.remove(row)
                    break
            # print(f"Loading Grid Data {str(round((rowIndex+1)/len(gridLines), 2) * 100)[:4]}%", end="\r", flush=True)
# Load Climate Data
print("Loading Climate Data")
if dataTypeCount > 0:
    with open(f"./data/processed/climate/{profileName}.csv", "r") as climateFile:
        climateData = climateFile.readlines()
        climateLabels = climateData[0].split(",")
        
        # Automatically determine the column each datatype is in
        dataIndexes = {}
        for dataType in configData["dataTypes"]:
            for labelIndex, label in enumerate(climateLabels):
                if dataType in label:
                    dataIndexes[dataType] = labelIndex
                    break
        
        # Validate that start and end dates are the same
        if not (climateData[1].split(",")[0] == dateList[0].getDateStr() and climateData[-1].split(",")[0] == dateList[-1].getDateStr()):
            print(f"FATAL ERROR\nThe Climate data and studied period do not have the same date range")
            print(f"Study Period date Range: {dateList[0].getDateStr()} - {dateList[-1].getDateStr()}")
            print(f"Climate Data date Range: {climateData[1].split(',')[0]} - {climateData[-1].split(',')[0]}")
            exit()

        # Load data
        climateData = climateData[1:] # Remove labels
        for rowIndex, row in enumerate(climateData):
            if row.split(",")[0] == dateList[rowIndex].getDateStr():
                for dataType in configData["dataTypes"]:
                    if dataType != "grid":
                        exec(f"dateList[{rowIndex}].{dataType} = float(row.split(',')[dataIndexes[dataType]])")

# Load Wind Data
print("Loading Wind Data")
if configData["windFile"] != "":
    with open(f"./data/processed/wind/{configData['windFile']}", "r") as windFile:
        windData = windFile.readlines()

        # Validate that start and end dates are the same
        if not(windData[1].split(",")[0] == dateList[0].getDateStr() and windData[-1].split(",")[0] == dateList[-1].getDateStr()):
            print(f"FATAL ERROR\nThe Wind data and studied period do not have the same date range")
            print(f"Study Period date Range: {dateList[0].getDateStr()} - {dateList[-1].getDateStr()}")
            print(f"Wind Data date Range: {windData[1].split(',')[0]} - {windData[-1].split(',')[0]}")
            exit()

        windData = windData[1:]
        for rowIndex, row in enumerate(windData):
            row = row.split(",")
            if row[0] == dateList[rowIndex].getDateStr():
                dateList[rowIndex].windspeed = float(row[1])
                dateList[rowIndex].windangle = float(row[4])

print(f"Loaded in {round(time.time() - startTime,3)} seconds")

Loading Grid Data
Loading Climate Data
Loading Wind Data
Loaded in 1.305 seconds


In [8]:
# Create Model to predict energy demand
testValCount = 400 # The number of data points that are EXCLUDED and used to assess the model
cVal = 1
gammaVal = 0.1
inputData = np.empty((len(dateList), 6))
for dateIndex, date in enumerate(dateList):
    inputData[dateIndex] = getTrainingRow(date)
    
np.random.shuffle(inputData)
trainingData, testData = inputData[:len(dateList)-testValCount,:], inputData[len(dateList)-testValCount:,:]

In [9]:
scalerX = StandardScaler()
scalerY = StandardScaler()
X = scalerX.fit_transform(trainingData[:, :5])
Y = scalerY.fit_transform(trainingData[:,5].reshape(-1, 1))

In [10]:
# create an SVR model with 
demandSVR = SVR(kernel='rbf', C=cVal, gamma=gammaVal) 

# train the model on the data 
demandSVR.fit(X, Y.ravel()) 

SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [11]:
x = []
predicted = []
observed = []

for rowIndex, row in enumerate(trainingData):
    x.append(row[0])
    observed.append(row[5])
    predictedVal = predict(
        row[:5].reshape(1, -1), 
        scalerX, 
        scalerY, 
        demandSVR
    )
    predicted.append(predictedVal)
print(calculateRMSE(predicted, observed))

9.855203022291082


In [13]:
testX = scalerX.fit_transform(testData[:, :5])
testY = scalerY.fit_transform(testData[:,5].reshape(-1, 1))

demandSVR.score(testX, testY)

0.6788775566848949

In [17]:
plt.scatter(x, observed, label="Observed")
plt.scatter(x, predicted, label="Predicted")
plt.xlabel("Day of the Year")
plt.ylabel("Energy Demand")
plt.legend()

<matplotlib.legend.Legend at 0x200db002bc8>

In [16]:
plt.scatter(observed, predicted)
plt.xlabel("Observed")
plt.ylabel("Predicted")

Text(0, 0.5, 'Predicted')

In [24]:
# Export Data
labels = [
    "Day of the Year",
    "Temperature",
    "Irradiance",
    "Pressure",
    "Rainfall",
    "Observed Demand",
    "Predicted Demand",
    "Delta"
]

with open(f"./data/SVM/{profileName}prediction.csv", "w") as predFile:
    predFile.write(",".join(labels)+"\n")
    for row in inputData:
        predictedVal = predict(
            row[:5].reshape(1, -1), 
            scalerX, 
            scalerY, 
            demandSVR
        )
        csvRow = ''
        for val in row:
            csvRow = csvRow + str(val) + ","
        csvRow = csvRow + str(predictedVal[0]) + ","
        csvRow = csvRow + str(predictedVal[0]-row[5]) + ","
        predFile.write(csvRow + "\n")

In [35]:
# Export Model
dump(demandSVR, f"data/SVM/{profileName}.joblib")
dump(scalerX, f"data/SVM/{profileName}scalerX.joblib")
dump(scalerY, f"data/SVM/{profileName}scalerY.joblib")

['data/SVM/SRRPeriodscalerY.joblib']

In [36]:
svr = load(f"data/SVM/{profileName}.joblib")

xScale = load(f"data/SVM/{profileName}scalerX.joblib")
yScale = load(f"data/SVM/{profileName}scalerY.joblib")

b = np.empty((1,5))
b[0] = getTrainingRow(dateList[0])[:5]
predict(
    b,
    xScale,
    yScale,
    svr
)

array([204.47853625])