## Load data

In [1]:
import pandas as pd

url_office= 'https://raw.githubusercontent.com/irenekarijadi/RF-LSTM-CEEMDAN/main/Dataset/data%20of%20Office_Abigail.csv'
office= pd.read_csv(url_office)
data_office= office[(office['timestamp'] > '2015-03-01') & (office['timestamp'] < '2015-06-01')]
dfs_office=data_office['energy']
datas_office=pd.DataFrame(dfs_office)


## import libraries

In [2]:
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter('ignore')

from PyEMD import CEEMDAN
import numpy
import math
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn import metrics

import time
import dataframe_image as dfi


## Import all functions from another notebook for building prediction method

In [3]:
import Setting
from myfunctions import lr_model,svr_model,ann_model,rf_model,lstm_model,hybrid_ceemdan_rf,hybrid_ceemdan_lstm,proposed_method

## Import parameter settings

In [4]:
hours=Setting.n_hours
data_partition=Setting.data_partition
max_features=Setting.max_features
epoch=Setting.epoch
batch_size=Setting.batch_size
neuron=Setting.neuron
lr=Setting.lr
optimizer=Setting.optimizer

## Run the experiments
### Run this following cell will train and test the proposed method and other benchmark methods on Office Dataset

In [9]:
#Linear Regression

start_time = time.time()
lr_office=lr_model(datas_office,hours,data_partition)
lr_time_office=time.time() - start_time
print("--- %s seconds - Linear Regression- office ---" % (lr_time_office))

#Support Vector Regression
start_time = time.time()
svr_office=svr_model(datas_office,hours,data_partition)
svr_time_office=time.time() - start_time
print("--- %s seconds - Support Vector Regression- office ---" % (svr_time_office))


#ANN
start_time = time.time()
ann_office=ann_model(datas_office,hours,data_partition)
ann_time_office=time.time() - start_time
print("--- %s seconds - ANN- office ---" % (ann_time_office))

#random forest
start_time = time.time()
rf_office=rf_model(datas_office,hours,data_partition,max_features)
rf_time_office=time.time() - start_time
print("--- %s seconds - Random Forest- office ---" % (rf_time_office))

#LSTM
start_time = time.time()
lstm_office=lstm_model(datas_office,hours,data_partition,max_features,epoch,batch_size,neuron,lr,optimizer)
lstm_time_office=time.time() - start_time
print("--- %s seconds - lstm- office ---" % (lstm_time_office))


#CEEMDAN RF
start_time = time.time()
ceemdan_rf_office=hybrid_ceemdan_rf(dfs_office,hours,data_partition,max_features)
ceemdan_rf_time_office=time.time() - start_time
print("--- %s seconds - ceemdan_rf- office ---" % (ceemdan_rf_time_office))

#CEEMDAN LSTM
start_time = time.time()
ceemdan_lstm_office=hybrid_ceemdan_lstm(dfs_office,hours,data_partition,max_features,epoch,batch_size,neuron,lr,optimizer)
ceemdan_lstm_time_office=time.time() - start_time
print("--- %s seconds - ceemdan_lstm- office ---" % (ceemdan_lstm_time_office))


#proposed method
start_time = time.time()
proposed_method_office=proposed_method(dfs_office,hours,data_partition,max_features,epoch,batch_size,neuron,lr,optimizer)
proposed_method_time_office=time.time() - start_time
print("--- %s seconds - proposed_method- office ---" % (proposed_method_time_office))



--- 0.5634441375732422 seconds - Linear Regression- office ---
--- 0.3931136131286621 seconds - Support Vector Regression- office ---
--- 1.9873061180114746 seconds - ANN- office ---
--- 1.088470220565796 seconds - Random Forest- office ---
--- 10.032578229904175 seconds - lstm- office ---
--- 34.42661666870117 seconds - ceemdan_rf- office ---
--- 89.35560321807861 seconds - ceemdan_lstm- office ---
--- 84.93939208984375 seconds - proposed_method- office ---


## Summarize of experimental results with running time
### Run this following cell will summarize the result and generate output used in Section 4.4 (Table 3) for Office dataset

In [10]:
running_time_office=pd.DataFrame([lr_time_office,svr_time_office,ann_time_office,
                                   rf_time_office,lstm_time_office,ceemdan_rf_time_office,
                                   ceemdan_lstm_time_office,proposed_method_time_office])
running_time_office=running_time_office.T
running_time_office.columns=['LR','SVR','ANN','RF','LSTM','CEEMDAN RF','CEEMDAN LSTM','Proposed Method']


proposed_method_office_df=proposed_method_office[0:3]
result_office=pd.DataFrame([lr_office,svr_office,ann_office,rf_office,lstm_office,ceemdan_rf_office,
                    ceemdan_lstm_office,proposed_method_office_df])
result_office=result_office.T
result_office.columns=['LR','SVR','ANN','RF','LSTM','CEEMDAN RF','CEEMDAN LSTM','Proposed Method']
office_summary=pd.concat([result_office,running_time_office],axis=0)

office_summary.set_axis(['MAPE(%)', 'RMSE','MAE','running time (s)'], axis='index')

office_summary.style.set_caption("Office Results")
index = office_summary.index
index.name = "office results"
office_summary

Unnamed: 0_level_0,LR,SVR,ANN,RF,LSTM,CEEMDAN RF,CEEMDAN LSTM,Proposed Method
office results,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MAPE(%),11.169459,9.306992,11.291101,10.251288,9.792064,6.16485,6.353779,5.330695
RMSE,1.217669,1.106114,1.148319,1.083738,1.123522,0.64962,0.668548,0.570283
MAE,0.901621,0.769583,0.876292,0.805058,0.8057,0.491275,0.499195,0.43038
running time (s),0.563444,0.393114,1.987306,1.08847,10.032578,34.426617,89.355603,84.939392


In [11]:
#export table to png
#dfi.export(office_summary,"office_summary_table.png")

## Calculate percentage improvement
### Run this following cell will calculate percentage improvement and generate output used in Section 4.4 (Table 4) for Office dataset

In [12]:
pMAPE_LR_vs_Proposed_office=((lr_office[0]-proposed_method_office[0])/lr_office[0])*100
pRMSE_LR_vs_Proposed_office=((lr_office[1]-proposed_method_office[1])/lr_office[1])*100
pMAE_LR_vs_Proposed_office=((lr_office[2]-proposed_method_office[2])/lr_office[2])*100

pMAPE_SVR_vs_Proposed_office=((svr_office[0]-proposed_method_office[0])/svr_office[0])*100
pRMSE_SVR_vs_Proposed_office=((svr_office[1]-proposed_method_office[1])/svr_office[1])*100
pMAE_SVR_vs_Proposed_office=((svr_office[2]-proposed_method_office[2])/svr_office[2])*100

pMAPE_ANN_vs_Proposed_office=((ann_office[0]-proposed_method_office[0])/ann_office[0])*100
pRMSE_ANN_vs_Proposed_office=((ann_office[1]-proposed_method_office[1])/ann_office[1])*100
pMAE_ANN_vs_Proposed_office=((ann_office[2]-proposed_method_office[2])/ann_office[2])*100

pMAPE_RF_vs_Proposed_office=((rf_office[0]-proposed_method_office[0])/rf_office[0])*100
pRMSE_RF_vs_Proposed_office=((rf_office[1]-proposed_method_office[1])/rf_office[1])*100
pMAE_RF_vs_Proposed_office=((rf_office[2]-proposed_method_office[2])/rf_office[2])*100

pMAPE_LSTM_vs_Proposed_office=((lstm_office[0]-proposed_method_office[0])/lstm_office[0])*100
pRMSE_LSTM_vs_Proposed_office=((lstm_office[1]-proposed_method_office[1])/lstm_office[1])*100
pMAE_LSTM_vs_Proposed_office=((lstm_office[2]-proposed_method_office[2])/lstm_office[2])*100

pMAPE_ceemdan_rf_vs_Proposed_office=((ceemdan_rf_office[0]-proposed_method_office[0])/ceemdan_rf_office[0])*100
pRMSE_ceemdan_rf_vs_Proposed_office=((ceemdan_rf_office[1]-proposed_method_office[1])/ceemdan_rf_office[1])*100
pMAE_ceemdan_rf_vs_Proposed_office=((ceemdan_rf_office[2]-proposed_method_office[2])/ceemdan_rf_office[2])*100


pMAPE_ceemdan_lstm_vs_Proposed_office=((ceemdan_lstm_office[0]-proposed_method_office[0])/ceemdan_lstm_office[0])*100
pRMSE_ceemdan_lstm_vs_Proposed_office=((ceemdan_lstm_office[1]-proposed_method_office[1])/ceemdan_lstm_office[1])*100
pMAE_ceemdan_lstm_vs_Proposed_office=((ceemdan_lstm_office[2]-proposed_method_office[2])/ceemdan_lstm_office[2])*100


df_PI_office=[[pMAPE_LR_vs_Proposed_office,pMAPE_SVR_vs_Proposed_office,pMAPE_ANN_vs_Proposed_office,
                pMAPE_RF_vs_Proposed_office,pMAPE_LSTM_vs_Proposed_office,pMAPE_ceemdan_rf_vs_Proposed_office,
                pMAPE_ceemdan_lstm_vs_Proposed_office],
                [pRMSE_LR_vs_Proposed_office,pRMSE_SVR_vs_Proposed_office,pRMSE_ANN_vs_Proposed_office,
                pRMSE_RF_vs_Proposed_office,pRMSE_LSTM_vs_Proposed_office,pRMSE_ceemdan_rf_vs_Proposed_office,
                pRMSE_ceemdan_lstm_vs_Proposed_office],
                [pMAE_LR_vs_Proposed_office,pMAE_SVR_vs_Proposed_office,pMAE_ANN_vs_Proposed_office,
                pMAE_RF_vs_Proposed_office,pMAE_LSTM_vs_Proposed_office,pMAE_ceemdan_rf_vs_Proposed_office,
                pMAE_ceemdan_lstm_vs_Proposed_office]]

PI_office=pd.DataFrame(df_PI_office, columns=["LR vs. Proposed Method", "SVR vs. Proposed Method","ANN vs. Proposed Method",
                                      "RF vs. Proposed Method","LSTM vs. Proposed Method","CEEMDAN RF vs. Proposed Method",
                                      "CEEMDAN RF LSTM vs. Proposed Method"])
PI_office= PI_office.round(decimals = 2)
PI_office.set_axis(['MAPE(%)', 'RMSE','MAE'], axis='index')
PI_office.style.set_caption("Percentage Improvement-Office Building")
index = PI_office.index
index.name = "Percentage Improvement office"
PI_office

Unnamed: 0_level_0,LR vs. Proposed Method,SVR vs. Proposed Method,ANN vs. Proposed Method,RF vs. Proposed Method,LSTM vs. Proposed Method,CEEMDAN RF vs. Proposed Method,CEEMDAN RF LSTM vs. Proposed Method
Percentage Improvement office,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MAPE(%),52.27,42.72,52.79,48.0,45.56,13.53,16.1
RMSE,53.17,48.44,50.34,47.38,49.24,12.21,14.7
MAE,52.27,44.08,50.89,46.54,46.58,12.4,13.79


In [11]:
#export table to png
#dfi.export(PI_office,"PI_office_table.png")