In [712]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as mlt
import seaborn as sns
from torch.autograd import Variable
from torch import autograd
from datetime import datetime
import matplotlib.pyplot as plt
from datetime import timedelta
import sys
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score
from scipy.stats import ks_2samp
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
import json
from types import SimpleNamespace

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [716]:
%run ../Data_Process/data_preprocess.ipynb
%run ../Data_Process/prepare_data.ipynb
%run ../Models/lstm.ipynb
%run ../Models/bi_lstm.ipynb
%run ../Models/seq2seq.ipynb
%run wrapper.ipynb
%run visualize.ipynb

## Pre-process dataset

In [None]:
with open("../Config/config_data.json") as json_data:
    data = json.load(json_data, object_hook=lambda d: SimpleNamespace(**d))
    parameters_palo_alto = data.palo_alto
    parameters_jpl = data.jpl

In [None]:
PreProcessDataset("../Dataset/Raw/palo_alto.csv", 'palo_alto', parameters_palo_alto)()
PreProcessDataset("../Dataset/Raw/jpl.csv", 'jpl', parameters_jpl)()

In [None]:
columns = ['Day of week', 'Energy', 'Week Day', 'Year', 'Month', 'Day of month', 'Hour', 'Sum']

df = pd.read_csv("../Dataset/Processed/palo_alto_data_with_zero.csv")
df['Hour'] = pd.to_datetime(df['Start']).dt.hour
df['Sum'] = df.groupby(pd.to_datetime(df['Start']).dt.date)['Energy'].cumsum()
df.drop(columns=['Start'], inplace=True)
correlation_matrix = df.corr()

# Visualize the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Matrix")
plt.savefig('../Results/eda/correlation.png')
plt.show()

## Model Parameters

In [717]:
with open("../Config/config_model.json") as json_data:
    data = json.load(json_data, object_hook=lambda d: SimpleNamespace(**d))
    parameters_lstm = data.LSTM
    parameters_bilstm = data.BiLSTM
    parameters_seq2seq = data.Seq2Seq

## Data Prepare without PCA

In [None]:
columns = ['Day of week', 'Energy', 'Week Day', 'Year', 'Month', 'Day of month', 'Hour', 'Sum']

df = pd.read_csv("../Dataset/Processed/palo_alto_data_with_zero.csv")
df['Hour'] = pd.to_datetime(df['Start']).dt.hour
df['Sum'] = df.groupby(pd.to_datetime(df['Start']).dt.date)['Energy'].cumsum()
df.drop(columns=['Start'], inplace=True)

scaler = MinMaxScaler(feature_range=(0,1))
df = scaler.fit_transform(df)
df = pd.DataFrame(df, columns=columns)
df = np.array(df)

train_ratio = 0.70

data_train, data_test, ground_truth_train, ground_truth_test = PrepareDatForInput(parameters_lstm).get_train_test_dataset_forecasting(df, train_ratio)

step_per_epoch = len(data_train) // batch_size

# Converting to tensor
data_train = torch.from_numpy(data_train).float().to(device)
ground_truth_train = torch.from_numpy(ground_truth_train).float().to(device)

## Data Prepare with PCA

In [None]:
columns = ['Day of week', 'Energy', 'Week Day', 'Year', 'Month', 'Day of month', 'Hour', 'Sum']

df = pd.read_csv("../Dataset/Processed/palo_alto_data_with_zero.csv")
df['Hour'] = pd.to_datetime(df['Start']).dt.hour
df['Sum'] = df.groupby(pd.to_datetime(df['Start']).dt.date)['Energy'].cumsum()
df.drop(columns=['Start'], inplace=True)

pca = GetPcaTransformedData()
pca_transformed_data = pca.transform_data(df, columns)
df = np.array(pca_transformed_data)

train_ratio = 0.70

data_train, data_test, ground_truth_train, ground_truth_test = PrepareDatForInput(parameters_seq2seq).get_train_test_dataset_forecasting(df, train_ratio)

step_per_epoch = len(data_train) // batch_size

# Converting to tensor
data_train = torch.from_numpy(data_train).float().to(device)
ground_truth_train = torch.from_numpy(ground_truth_train).float().to(device)

## LSTM

In [None]:
model = vanilla_LSTM(parameters_lstm).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = parameters_lstm.learning_rate, weight_decay = 0.005)
loss_function_lstm = nn.MSELoss()

In [None]:
wrapper = ModelWrapper(model_parameter=parameters_lstm, col=5)

In [None]:
real_dataset_lstm, gen_dataset_lstm, errors_generator_lstm = wrapper.train_model(model, optimizer, loss_function_lstm, data_train, ground_truth_train, step_per_epoch)


In [None]:
plot_full_dataset(real_dataset_lstm[:, -1, :].detach().cpu().numpy(), gen_dataset_lstm[:, -1, :].detach().cpu().numpy(), 000, 50000, '../Results/test1.png', 5, 'LSTM')

test

In [None]:
data_test = data_test[: -(data_test.shape[0] % parameters_lstm.batch_size)]
ground_truth_test = ground_truth_test[: -(ground_truth_test.shape[0] % parameters_lstm.batch_size)]

In [None]:
step_per_epoch = len(data_test) // parameters_lstm.batch_size

In [None]:
data_test = torch.from_numpy(data_test).float().to(device)
ground_truth_test = torch.from_numpy(ground_truth_test).float().to(device)

In [None]:
real_data_lstm, predicted_data_lstm, loss_lstm, smape_loss, mae_loss, mse_loss, rmse_loss, r2_loss = wrapper.test_model(model, data_test, ground_truth_test, loss_function_lstm, step_per_epoch)

In [None]:
plot_full_dataset(real_data_lstm[:, -1, :].detach().cpu().numpy(), predicted_data_lstm[:, -1, :].detach().cpu().numpy(), 000, 500, '../Results/10_pca_lstm.png', 5, 'LSTM')


## Bi LSTM

In [None]:
model = Bi_LSTM(parameters_bilstm).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = parameters_bilstm.learning_rate, weight_decay = 0.005)
loss_function_bilstm = nn.MSELoss()

In [None]:
wrapper = ModelWrapper(model_parameter=parameters_bilstm, col=5)

In [None]:
real_dataset_bilstm, gen_dataset_bilstm, errors_generator_bilstm = wrapper.train_model(model, optimizer, loss_function_bilstm, data_train, ground_truth_train, step_per_epoch)

In [None]:
plot_full_dataset(real_dataset_bilstm[:, -1, :].detach().cpu().numpy(), gen_dataset_bilstm[:, -1, :].detach().cpu().numpy(), 000, 5000, '../Results/test1.png', 5, 'Bilstm')

Test

In [None]:
data_test = data_test[: -(data_test.shape[0] % parameters_bilstm.batch_size)]
ground_truth_test = ground_truth_test[: -(ground_truth_test.shape[0] % parameters_bilstm.batch_size)]

step_per_epoch = len(data_test) // parameters_bilstm.batch_size

data_test = torch.from_numpy(data_test).float().to(device)
ground_truth_test = torch.from_numpy(ground_truth_test).float().to(device)

In [None]:
real_data_bilstm, predicted_data_bilstm, loss_bilstm, smape_loss, mae_loss, mse_loss, rmse_loss, r2_loss = wrapper.test_model(model, data_test, ground_truth_test, loss_function_bilstm, step_per_epoch)

In [None]:
plot_full_dataset(real_data_bilstm[:, -1, :].detach().cpu().numpy(), predicted_data_bilstm[:, -1, :].detach().cpu().numpy(), 000, 5000, '../Results/10_pca_bilstm.png', 5, 'Bi-LSTM')

## Seq2seq

In [None]:
model = Seq2SeqConv1D(parameters_seq2seq).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = parameters_seq2seq.learning_rate, weight_decay = 0.005)
loss_function_seq = nn.MSELoss()

In [None]:
wrapper = ModelWrapper(model_parameter=parameters_seq2seq, col=5)

In [None]:
real_dataset_seq, gen_dataset_seq, errors_generator_seq = wrapper.train_model(model, optimizer, loss_function_seq, data_train, ground_truth_train, step_per_epoch)

In [None]:
plot_full_dataset(real_dataset_seq[:, -1, :].detach().cpu().numpy(), gen_dataset_seq[:, -1, :].detach().cpu().numpy(), 000, 5000, '../Results/test1.png', 5, 'Seq2seq')

Test

In [None]:
data_test = data_test[: -(data_test.shape[0] % parameters_seq2seq.batch_size)]
ground_truth_test = ground_truth_test[: -(ground_truth_test.shape[0] % parameters_seq2seq.batch_size)]

step_per_epoch = len(data_test) // parameters_seq2seq.batch_size

data_test = torch.from_numpy(data_test).float().to(device)
ground_truth_test = torch.from_numpy(ground_truth_test).float().to(device)

In [None]:
real_data_seq, predicted_data_seq, loss_seq, smape_loss, mae_loss, mse_loss, rmse_loss, r2_loss = wrapper.test_model(model, data_test, ground_truth_test, loss_function_seq, step_per_epoch)

In [None]:
plot_full_dataset(real_data_seq[:, -1, :].detach().cpu().numpy(), predicted_data_seq[:, -1, :].detach().cpu().numpy(), 000, 5900, '../Results/10_pca_seq2seq.png', 5, 'Seq2seq')

In [None]:
compare_predicted_dataset(real_data_seq[:, -1, :].detach().cpu().numpy(), predicted_data_lstm[:, -1, :].detach().cpu().numpy(), predicted_data_bilstm[:, -1, :].detach().cpu().numpy(), predicted_data_seq[:, -1, :].detach().cpu().numpy(), 3200, 3300, '../Results/10_pca_all.png', 5)