# EPF using LSTM

## Importing packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error,r2_score

## Data processing and loading

In [None]:
 from google.colab import drive
drive.mount('/content/drive')

In [None]:
dir = "drive/MyDrive/Colab_Notebooks/data_"
prices_df = pd.read_csv(dir+"/prices.csv")

In [None]:
prices_df.head()

In [None]:
print(len(pd.isna(prices_df["prices"])==False))
print(len(prices_df["prices"]))
print(3*365*24)

In [None]:
df_test = prices_df.iloc[:92*24,:]
print(df_test.head())
df_test.plot(x='dates', y='prices')
plt.show()

In [None]:
l = df_test['dates'].unique()
print(len(df_test['prices'])==92*24)
prices_arr = df_test.values
print(len(prices_arr[prices_arr==0]))
print(len(l))
print(l)


In [None]:
prices_arr = prices_df.iloc[:,2].values
print(type(prices_arr))
#plt.plot(training_set, label = 'Shampoo Sales Data')
plt.figure(figsize=(20, 6), dpi=80)
plt.plot(prices_arr, label = 'Electricity Prices')
plt.show()

### Data loading

In [None]:
""""def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

sc = MinMaxScaler()
training_data = sc.fit_transform(prices_arr.reshape(-1,1))

seq_length = 24
x, y = sliding_windows(training_data, seq_length)

train_size = int(len(y) * 0.67)
test_size = len(y) - train_size

dataX = Variable(torch.Tensor(np.array(x)))
dataY = Variable(torch.Tensor(np.array(y)))

trainX = Variable(torch.Tensor(np.array(x[0:train_size])))
trainY = Variable(torch.Tensor(np.array(y[0:train_size])))

testX = Variable(torch.Tensor(np.array(x[train_size:len(x)])))
testY = Variable(torch.Tensor(np.array(y[train_size:len(y)])))""""

In [None]:
def restructering(data, input_days):
    x = []
    y = []

    day_len = 24
    num_hours = len(data)
    num_of_days = num_hours//day_len
    if (num_hours % day_len != 0):
      print("something's wrong, I can feel it ! ")
    data_ = data.reshape(num_of_days,day_len)
    print(data_.shape)
    
    num_of_lines = len(data_)//input_days
    for i in range(num_of_days-input_days):
        _x = data_[i:(i+input_days)]
        _y = data_[i+input_days]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [None]:

sc = MinMaxScaler()
training_data = sc.fit_transform(prices_arr.reshape(-1,1))

input_days = 5

x, y = restructering(training_data, input_days)

train_size = int(len(y) * 0.67)
test_size = len(y) - train_size

dataX = Variable(torch.Tensor(np.array(x)))
dataY = Variable(torch.Tensor(np.array(y)))

trainX = Variable(torch.Tensor(np.array(x[0:train_size])))
trainY = Variable(torch.Tensor(np.array(y[0:train_size])))

testX = Variable(torch.Tensor(np.array(x[train_size:len(x)])))
testY = Variable(torch.Tensor(np.array(y[train_size:len(y)])))

In [None]:
print(trainX.shape)
print(trainX.size(0))

## Model

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        #self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

## Training

In [None]:
num_epochs = 3000
learning_rate = 0.001

input_size = 24
hidden_size = 100
num_layers = 1

num_classes = 24

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    outputs = lstm(trainX)
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

## Evaluation

In [None]:
lstm.eval()


In [None]:
pred_y = lstm(testX)

In [None]:
print(pred_y.shape)
print(testY.shape)

In [None]:
plt.plot(pred_y[6].detach().numpy())
plt.plot(testY[6].detach().numpy())
plt.show()

In [None]:
plt.figure(figsize=(20, 6), dpi=80)
fig, axs = plt.subplots(2,2)

fig.suptitle('real vs predication')
for i in range(2):
  for j in range(2):
    axs[i, j].plot(pred_y[i+j].detach().numpy())
    axs[i, j].plot(testY[i+j].detach().numpy())



In [None]:

fig, axs = plt.subplots(6,6)
fig.set_figheight(15)
fig.set_figwidth(15)
fig.suptitle('real vs predication')
for i in range(6):
  for j in range(6):
    tmp_pred = sc.inverse_transform(pred_y[5+i+j].detach().numpy().reshape(-1,1))
    tmp_real = sc.inverse_transform(testY[5+i+j].detach().numpy().reshape(-1,1))
    axs[i, j].plot(tmp_pred)
    axs[i, j].plot(tmp_real)

In [None]:
total_mse = 0
total_mape = 0
total_r2 = 0
test_len = len(testY)

for i in range (test_len):
  total_mse += mean_squared_error(pred_y[i].detach().numpy(),testY[i].detach().numpy())
  total_mape += mean_absolute_percentage_error(pred_y[i].detach().numpy(),testY[i].detach().numpy())
  total_r2 += r2_score(pred_y[i].detach().numpy(),testY[i].detach().numpy())

print("mse:" , total_mse/test_len )
print("total_mape:" , total_mape/test_len )
print("total_r2:" , total_r2/test_len )

In [None]:

print(tmp_pred)
print(tmp_real)