In [2]:
from func.datePoint import datePoint
from func.util import getProfile, getDateList, getDateInput, readGridDataFromFile
from func.valueMaps import ValueMap

from datetime import datetime
import json
import math
import random
import time

import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

In [3]:
def calculateRMSE(predicted, observed):
    if len(predicted) != len(observed):
        print("ERROR, two lists aren't the same length")
        
    RMSE = 0
    for valIndex, val in enumerate(predicted):
        RMSE = float(RMSE + (predicted[valIndex] - observed[valIndex])**2)
        
    RMSE = RMSE/len(predicted)
    return RMSE ** 0.5

In [4]:
def getTrainingRow(datePoint):
    row = [
        datePoint.dayoftheyear,
        datePoint.temperature,
        datePoint.irradiance,
        datePoint.pressure,
        datePoint.rainfall,
        datePoint.energyData["au.nem.nsw1.demand.energy (GWh)"]
    ]
    return row

In [19]:
def predict(xValue, scalerX, scalerY, svr):
    print(xValue)
    transformedVal = scalerX.transform(xValue)
    predictedVal = svr.predict(transformedVal)
    val = scalerY.inverse_transform(predictedVal)
    return val

In [6]:
%matplotlib qt 

In [7]:
profileName = "SRRPeriod"

with open("./config/download.json", "r", encoding="utf-8") as dataProfileFile:
    configData = json.load(dataProfileFile)

configData = configData[profileName]
dateList = getDateList(datetime.strptime(configData["startDate"], "%d/%m/%Y"), datetime.strptime(configData["endDate"], "%d/%m/%Y"))

for dateIndex, date in enumerate(dateList):
    dateList[dateIndex] = datePoint(date)

dataTypeCount = len(configData["dataTypes"])

In [8]:
startTime = time.time()
# Load data about the energy grid
print("Loading Grid Data")
if "grid" in configData["dataTypes"]:
    dataTypeCount = dataTypeCount - 1
    with open(f"./data/processed/grid/{profileName}.csv", "r") as gridDataFile:
        gridLines = gridDataFile.readlines()
        gridLabels = gridLines[0].split(",")[1:]
        

        for date in dateList:
            for rowIndex, row in enumerate(gridLines):
                if row.split(",")[0] == date.getDateStr():
                    dataArr = {}
                    for valIndex, val in enumerate(gridLabels):
                        dataArr[val] = float(row.split(",")[valIndex+1])
                    date.energyData = dataArr
                    gridLines.remove(row)
                    break
            # print(f"Loading Grid Data {str(round((rowIndex+1)/len(gridLines), 2) * 100)[:4]}%", end="\r", flush=True)
# Load Climate Data
print("Loading Climate Data")
if dataTypeCount > 0:
    with open(f"./data/processed/climate/{profileName}.csv", "r") as climateFile:
        climateData = climateFile.readlines()
        climateLabels = climateData[0].split(",")
        
        # Automatically determine the column each datatype is in
        dataIndexes = {}
        for dataType in configData["dataTypes"]:
            for labelIndex, label in enumerate(climateLabels):
                if dataType in label:
                    dataIndexes[dataType] = labelIndex
                    break
        
        # Validate that start and end dates are the same
        if not (climateData[1].split(",")[0] == dateList[0].getDateStr() and climateData[-1].split(",")[0] == dateList[-1].getDateStr()):
            print(f"FATAL ERROR\nThe Climate data and studied period do not have the same date range")
            print(f"Study Period date Range: {dateList[0].getDateStr()} - {dateList[-1].getDateStr()}")
            print(f"Climate Data date Range: {climateData[1].split(',')[0]} - {climateData[-1].split(',')[0]}")
            exit()

        # Load data
        climateData = climateData[1:] # Remove labels
        for rowIndex, row in enumerate(climateData):
            if row.split(",")[0] == dateList[rowIndex].getDateStr():
                for dataType in configData["dataTypes"]:
                    if dataType != "grid":
                        exec(f"dateList[{rowIndex}].{dataType} = float(row.split(',')[dataIndexes[dataType]])")

# Load Wind Data
print("Loading Wind Data")
if configData["windFile"] != "":
    with open(f"./data/processed/wind/{configData['windFile']}", "r") as windFile:
        windData = windFile.readlines()

        # Validate that start and end dates are the same
        if not(windData[1].split(",")[0] == dateList[0].getDateStr() and windData[-1].split(",")[0] == dateList[-1].getDateStr()):
            print(f"FATAL ERROR\nThe Wind data and studied period do not have the same date range")
            print(f"Study Period date Range: {dateList[0].getDateStr()} - {dateList[-1].getDateStr()}")
            print(f"Wind Data date Range: {windData[1].split(',')[0]} - {windData[-1].split(',')[0]}")
            exit()

        windData = windData[1:]
        for rowIndex, row in enumerate(windData):
            row = row.split(",")
            if row[0] == dateList[rowIndex].getDateStr():
                dateList[rowIndex].windspeed = float(row[1])
                dateList[rowIndex].windangle = float(row[4])

print(f"Loaded in {round(time.time() - startTime,3)} seconds")

Loading Grid Data
Loading Climate Data
Loading Wind Data
Loaded in 0.235 seconds


In [9]:
# Create Model to predict energy demand
testValCount = 400 # The number of data points that are EXCLUDED and used to assess the model
cVal = 1
gammaVal = 0.1
inputData = np.empty((len(dateList), 6))
for dateIndex, date in enumerate(dateList):
    inputData[dateIndex] = getTrainingRow(date)
    
np.random.shuffle(inputData)
trainingData, testData = inputData[:len(dateList)-testValCount,:], inputData[len(dateList)-testValCount:,:]

In [10]:
scalerX = StandardScaler()
scalerY = StandardScaler()
X = scalerX.fit_transform(trainingData[:, :5])
Y = scalerY.fit_transform(trainingData[:,5].reshape(-1, 1))

In [11]:
# create an SVR model with 
demandSVR = SVR(kernel='rbf', C=cVal, gamma=gammaVal) 

# train the model on the data 
demandSVR.fit(X, Y.ravel()) 

SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [12]:
x = []
predicted = []
observed = []

for rowIndex, row in enumerate(trainingData):
    x.append(row[0])
    observed.append(row[5])
    predictedVal = predict(
        row[:5].reshape(1, -1), 
        scalerX, 
        scalerY, 
        demandSVR
    )
    predicted.append(predictedVal)
print(calculateRMSE(predicted, observed))

9.980325156790848


In [13]:
testX = scalerX.fit_transform(testData[:, :5])
testY = scalerY.fit_transform(testData[:,5].reshape(-1, 1))

demandSVR.score(testX, testY)

0.7121056580528614

In [14]:
plt.scatter(x, observed, label="Observed")
plt.scatter(x, predicted, label="Predicted")
plt.xlabel("Day of the Year")
plt.ylabel("Energy Demand")
plt.legend()

<matplotlib.legend.Legend at 0x28eeaa45198>

In [15]:
plt.scatter(observed, predicted)
plt.xlabel("Observed")
plt.ylabel("Predicted")

Text(38.2222,0.5,'Predicted')

In [20]:
# Export Data
labels = [
    "Day of the Year",
    "Temperature",
    "Irradiance",
    "Pressure",
    "Rainfall",
    "Observed Demand",
    "Predicted Demand",
    "Delta"
]

with open(f"./data/SVM/{profileName}prediction.csv", "w") as predFile:
    predFile.write(",".join(labels)+"\n")
    for row in inputData:
        predictedVal = predict(
            row[:5].reshape(1, -1), 
            scalerX, 
            scalerY, 
            demandSVR
        )
        csvRow = ''
        for val in row:
            csvRow = csvRow + str(val) + ","
        csvRow = csvRow + str(predictedVal[0]) + ","
        csvRow = csvRow + str(predictedVal[0]-row[5]) + ","
        predFile.write(csvRow + "\n")

[[2.78000000e+02 2.03314356e+01 2.25331479e+01 8.62744598e+00
  2.45572673e-01]]
[[3.48000000e+02 2.80647501e+01 2.73046167e+01 1.69548052e+01
  6.26083612e-02]]
[[214.          17.43971466  11.22389224  10.59958961   0.31234412]]
[[94.         26.54936237 17.55835286 13.72994548  0.63075727]]
[[1.03000000e+02 2.19256024e+01 1.67796014e+01 1.07652182e+01
  2.70905856e-03]]
[[68.         25.95458267 10.32980741 19.77412753  0.25558186]]
[[99.         19.43761922  7.5281966  15.90322507  1.44756599]]
[[1.09000000e+02 2.24349792e+01 1.57336093e+01 1.04000676e+01
  4.10972341e-03]]
[[88.         26.77847253 15.32230793 17.25562524  0.13400991]]
[[39.         26.05885323 17.37953302 20.10483568  1.29131993]]
[[2.06000000e+02 1.85339945e+01 1.24610180e+01 7.92923538e+00
  2.69999247e-02]]
[[1.75000000e+02 1.79084532e+01 8.64471133e+00 1.02415543e+01
  2.90230470e-02]]
[[158.          15.39345331  10.31544433   8.13430237   0.33602388]]
[[3.60000000e+01 2.77319430e+01 2.88883588e+01 1.2285901

[[173.          17.05766908   7.74397667  12.4269213    8.07405764]]
[[54.         35.25976144 19.99047331 23.27399511  0.11415132]]
[[185.          14.95723985   5.68960944  13.82391753   3.30271236]]
[[285.          17.12469166  13.64784054  12.27917782  12.66654272]]
[[19.         28.12910609 29.39325407 13.12458286  0.33439715]]
[[3.14000000e+02 2.42959764e+01 2.74651582e+01 8.09808028e+00
  1.16661140e-02]]
[[2.77000000e+02 2.74259294e+01 2.32535383e+01 1.33547161e+01
  4.16122476e-03]]
[[2.71000000e+02 2.19506639e+01 1.96238160e+01 1.20094094e+01
  3.92328933e-02]]
[[146.          20.58345476  11.12634398  14.23224198   0.47155259]]
[[244.          21.8526618   17.08184222  11.2950022    1.83727488]]
[[142.          16.96728553   5.71377323  14.02112286   1.06218174]]
[[217.          21.18394649  11.23062194  14.06755449   8.48526727]]
[[317.          24.93501405  12.03232384  19.75206669   3.33189315]]
[[286.          20.33757224  14.0411496   14.57270003   1.35151614]]
[[185.  

[[2.47000000e+02 2.13329762e+01 1.71264750e+01 1.23698567e+01
  5.03140616e-02]]
[[2.35000000e+02 1.81230301e+01 1.50524525e+01 6.72596714e+00
  1.50896705e-02]]
[[59.         25.19582034 24.35670505 12.7497404   0.06900477]]
[[302.          19.8794766   14.79039705  14.18214032   0.96022385]]
[[2.49000000e+02 2.13025732e+01 1.82896969e+01 8.85340698e+00
  8.18395514e-03]]
[[197.          17.45141876   9.9738875   11.82218211   2.59581521]]
[[155.          17.40717194  10.91451185   9.5076833    0.31718891]]
[[3.43000000e+02 2.23933185e+01 3.05298057e+01 8.93874342e+00
  2.16421704e-01]]
[[195.          14.58339061   9.02744307   8.89573811   7.85855217]]
[[ 2.         28.27748579 28.39686815 18.91666629  1.11015509]]
[[22.         33.00108477 29.15006601 18.33804838  0.07637244]]
[[131.          19.63295647  11.86331026   9.11822492   1.49630279]]
[[73.         26.98599104 13.11587991 21.48943372  3.90842791]]
[[159.          14.32171209  10.85608945   7.17293157   0.50838856]]
[[179.

[[2.24000000e+02 2.03198406e+01 1.32546345e+01 9.16493114e+00
  2.14713532e-02]]
[[41.         24.4756983  21.46379909 17.43399121  0.99765758]]
[[89.         25.91256766 16.06102906 19.36111804  0.5510714 ]]
[[69.         30.45754061 19.75155323 21.49547126  2.31763444]]
[[219.          16.2362024   10.05113514  11.42404711   1.57003497]]
[[178.          18.60217404  10.32157806  13.03081069   1.94897121]]
[[12.         32.14595794 30.66722644 22.21564926  5.54413268]]
[[241.          22.12014764  16.3850065   13.03943918   1.51212344]]


In [None]:
# Export Model
dump(demandSVR, f"data/SVM/{profileName}.joblib")
dump(scalerX, f"data/SVM/{profileName}scalerX.joblib")
dump(scalerY, f"data/SVM/{profileName}scalerY.joblib")

In [36]:
svr = load(f"data/SVM/{profileName}.joblib")

xScale = load(f"data/SVM/{profileName}scalerX.joblib")
yScale = load(f"data/SVM/{profileName}scalerY.joblib")

b = np.empty((1,5))
b[0] = getTrainingRow(dateList[0])[:5]
predict(
    b,
    xScale,
    yScale,
    svr
)

array([204.47853625])