In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from numpy.core.fromnumeric import size
from sklearn.svm import SVR
from torch.optim import Adam
from torch.autograd import Variable
from sklearn import preprocessing as pre
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_absolute_percentage_error

# SVR

In [26]:
dataPath = '/Users/cicada/Documents/DTU_resource/Thesis/Incremental-learning-EL/src/data-withtemp.csv'
categoryFeatures=['dayOfYear','hourOfDay', 'dayOfWeek', 'holiday', 'weekend']

#functions
def shiftData(from_, to_, feature):
    for i in range(from_, to_ + 1):
        df['prev_' + feature + str(i)] = df[feature].shift(periods=i)
    return df.dropna().copy()


In [27]:
df = pd.read_csv(dataPath)
df = df.sort_values(by=['datetime'])
del df['Unnamed: 0']
del df['Unnamed: 0.1']
del df['temp']

In [28]:
scaler = pre.StandardScaler() 
scaler.fit(df[df['datetime']< '2020-01-01']['meter'].values.reshape(-1, 1))

meter = scaler.transform(df['meter'].values.reshape(-1, 1))
df['meter'] = list(meter.ravel())

df = shiftData(12, 24, 'meter')
df = df.reset_index()
del df['index']
df.head(2)

Unnamed: 0,datetime,meter,hourOfDay,dayOfWeek,dayOfYear,holiday,prev_meter12,prev_meter13,prev_meter14,prev_meter15,prev_meter16,prev_meter17,prev_meter18,prev_meter19,prev_meter20,prev_meter21,prev_meter22,prev_meter23,prev_meter24
0,2019-01-02 00:00:00,-0.778512,0,2,2,0,0.888003,0.773931,0.338515,0.240032,-0.039775,-0.445899,-0.785941,-0.952268,-1.059915,-0.808519,-0.764492,-0.352465,-0.122258
1,2019-01-02 01:00:00,-1.018446,1,2,2,0,0.811891,0.888003,0.773931,0.338515,0.240032,-0.039775,-0.445899,-0.785941,-0.952268,-1.059915,-0.808519,-0.764492,-0.352465


In [29]:
#split train test
features = [x for x in df.columns if x not in ['meter','datetime']]

data_train = df[df['datetime']< '2020-01-01']
data_test = df[df['datetime'] >= '2020-01-01']

X_train = data_train[features].to_numpy()
y_train = data_train['meter'].to_numpy()

X_test = data_test[features].to_numpy()
y_test = data_test['meter'].to_numpy()


In [30]:
svr = SVR(kernel='linear', C=10, gamma='auto')
svr.fit(X_train, y_train)
y_pred = svr.predict(X_test)
mape = mean_absolute_percentage_error(y_test, y_pred)
print(mape)

1.9260760830394879


# LSTM

In [22]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__() 
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
  
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))

        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out


In [None]:
model = model
lr = learningRate
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = Adam(model.parameters(), lr=lr)

for e in epoch_num:
    outputs = self.model(X)
    self.optimizer.zero_grad()

    loss = self.criterion(outputs, y)
    loss.backward()
    self.optimizer.step()

In [8]:
def initDeepData(dataPath, seqLength, lagLength):
    df = pd.read_csv(dataPath)
    df = df.sort_values(by=['datetime'])

    trainData = df['meter'].values.reshape(-1, 1)
    scaler = pre.MinMaxScaler().fit(trainData[:])
    trainData = scaler.transform(trainData)

    hourOfDay = df['hourOfDay'].values.reshape(-1, 1)
    hourOfDay = pre.MinMaxScaler().fit_transform(hourOfDay)

    dayOfWeek = df['dayOfWeek'].values.reshape(-1, 1)
    dayOfWeek = pre.MinMaxScaler().fit_transform(dayOfWeek)

    holiday = df['holiday'].values.reshape(-1, 1)
    holiday = pre.MinMaxScaler().fit_transform(holiday)

    dayOfYear = df['dayOfYear'].values.reshape(-1, 1)
    dayOfYear = pre.MinMaxScaler().fit_transform(dayOfYear)

    times = df[seqLength+lagLength:]['datetime'].values
    x, y = toBatches(seqLength, lagLength, trainData, hourOfDay, dayOfWeek, dayOfYear, holiday)
    
    return x, y, times

def toBatches(seqLength, lagLength, data, hourOfDay, dayofWeek, dayOfYear, holiday, useTimeFeature=True):
    x = []
    y = []
    for i in range(len(data)-seqLength-lagLength):
        _x = data[i:(i+seqLength)]
        if useTimeFeature:  
            _hourOfDay = hourOfDay[i:(i+seqLength)]
            _dayofWeek = dayofWeek[i:(i+seqLength)]
            _dayOfYear = dayOfYear[i:(i+seqLength)]
            _holiday = holiday[i:(i+seqLength)]
            _x = np.hstack((_x,_hourOfDay, _dayofWeek, _dayOfYear, _holiday))
        
        _y = data[i+seqLength+lagLength]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)


In [20]:
X, y, times  = initDeepData(dataPath, 24, 12)

dataX = Variable(torch.Tensor(np.array(X)))
dataY = Variable(torch.Tensor(np.array(y)))
midpoint = int(len(times)/2)+19
trainX = Variable(torch.Tensor(np.array(X[0:midpoint])))
trainY = Variable(torch.Tensor(np.array(y[0:midpoint])))

testX = Variable(torch.Tensor(np.array(X[midpoint:len(X)])))
testY = Variable(torch.Tensor(np.array(y[midpoint:len(y)])))

In [25]:

num_epochs = 50
learning_rate = 0.01

input_size = 5
hidden_size = 128
num_layers = 1

num_classes = 1

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    outputs = lstm(trainX)
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 10 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))


Epoch: 0, loss: 0.22369
Epoch: 10, loss: 0.09090
Epoch: 20, loss: 0.02535
Epoch: 30, loss: 0.01678
Epoch: 40, loss: 0.01476
