## Load data

In [1]:
import pandas as pd

url_univlab= 'https://raw.githubusercontent.com/irenekarijadi/RF-LSTM-CEEMDAN/main/Dataset/data%20of%20UnivLab_Christy.csv'
univlab= pd.read_csv(url_univlab)
data_univlab= univlab[(univlab['timestamp'] > '2015-03-01') & (univlab['timestamp'] < '2015-06-01')]
dfs_univlab=data_univlab['energy']
datas_univlab=pd.DataFrame(dfs_univlab)


## import libraries

In [2]:
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter('ignore')

from PyEMD import CEEMDAN
import numpy
import math
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn import metrics

import time
import dataframe_image as dfi

## Import all functions from another notebook for building prediction methods

In [3]:
import Setting
from myfunctions import lr_model,svr_model,ann_model,rf_model,lstm_model,hybrid_ceemdan_rf,hybrid_ceemdan_lstm,proposed_method

## Import parameter settings

In [4]:
hours=Setting.n_hours
data_partition=Setting.data_partition
max_features=Setting.max_features
epoch=Setting.epoch
batch_size=Setting.batch_size
neuron=Setting.neuron
lr=Setting.lr
optimizer=Setting.optimizer

## Run the experiments
### Run this following cell will train and test the proposed method and other benchmark methods on University Laboratory Dataset

In [5]:
#Linear Regression

start_time = time.time()
lr_univlab=lr_model(datas_univlab,hours,data_partition)
lr_time_univlab=time.time() - start_time
print("--- %s seconds - Linear Regression- univlab ---" % (lr_time_univlab))

#Support Vector Regression
start_time = time.time()
svr_univlab=svr_model(datas_univlab,hours,data_partition)
svr_time_univlab=time.time() - start_time
print("--- %s seconds - Support Vector Regression- univlab ---" % (svr_time_univlab))


#ANN
start_time = time.time()
ann_univlab=ann_model(datas_univlab,hours,data_partition)
ann_time_univlab=time.time() - start_time
print("--- %s seconds - ANN- univlab ---" % (ann_time_univlab))

#random forest
start_time = time.time()
rf_univlab=rf_model(datas_univlab,hours,data_partition,max_features)
rf_time_univlab=time.time() - start_time
print("--- %s seconds - Random Forest- univlab ---" % (rf_time_univlab))

#LSTM
start_time = time.time()
lstm_univlab=lstm_model(datas_univlab,hours,data_partition,max_features,epoch,batch_size,neuron,lr,optimizer)
lstm_time_univlab=time.time() - start_time
print("--- %s seconds - lstm- univlab ---" % (lstm_time_univlab))


#CEEMDAN RF
start_time = time.time()
ceemdan_rf_univlab=hybrid_ceemdan_rf(dfs_univlab,hours,data_partition,max_features)
ceemdan_rf_time_univlab=time.time() - start_time
print("--- %s seconds - ceemdan_rf- univlab ---" % (ceemdan_rf_time_univlab))

#CEEMDAN LSTM
start_time = time.time()
ceemdan_lstm_univlab=hybrid_ceemdan_lstm(dfs_univlab,hours,data_partition,max_features,epoch,batch_size,neuron,lr,optimizer)
ceemdan_lstm_time_univlab=time.time() - start_time
print("--- %s seconds - ceemdan_lstm- univlab ---" % (ceemdan_lstm_time_univlab))


#proposed method
start_time = time.time()
proposed_method_univlab=proposed_method(dfs_univlab,hours,data_partition,max_features,epoch,batch_size,neuron,lr,optimizer)
proposed_method_time_univlab=time.time() - start_time
print("--- %s seconds - proposed_method- univlab ---" % (proposed_method_time_univlab))



--- 0.23638319969177246 seconds - Linear Regression- univlab ---
--- 0.37601685523986816 seconds - Support Vector Regression- univlab ---
--- 1.5343713760375977 seconds - ANN- univlab ---
--- 1.1199922561645508 seconds - Random Forest- univlab ---
--- 6.68122673034668 seconds - lstm- univlab ---
--- 31.26933741569519 seconds - ceemdan_rf- univlab ---
--- 86.68152022361755 seconds - ceemdan_lstm- univlab ---
--- 75.80700922012329 seconds - proposed_method- univlab ---


## Summarize of experimental results with running time
### Run this following cell will summarize the result and generate output used in Section 4.4 (Table 3) for University Laboratory dataset

In [6]:
running_time_univlab=pd.DataFrame([lr_time_univlab,svr_time_univlab,ann_time_univlab,
                                   rf_time_univlab,lstm_time_univlab,ceemdan_rf_time_univlab,
                                   ceemdan_lstm_time_univlab,proposed_method_time_univlab])
running_time_univlab=running_time_univlab.T
running_time_univlab.columns=['LR','SVR','ANN','RF','LSTM','CEEMDAN RF','CEEMDAN LSTM','Proposed Method']

proposed_method_univlab_df=proposed_method_univlab[0:3]
result_univlab=pd.DataFrame([lr_univlab,svr_univlab,ann_univlab,rf_univlab,lstm_univlab,ceemdan_rf_univlab,
                    ceemdan_lstm_univlab,proposed_method_univlab_df])
result_univlab=result_univlab.T
result_univlab.columns=['LR','SVR','ANN','RF','LSTM','CEEMDAN RF','CEEMDAN LSTM','Proposed Method']
univlab_summary=pd.concat([result_univlab,running_time_univlab],axis=0)

univlab_summary.set_axis(['MAPE(%)', 'RMSE','MAE','running time (s)'], axis='index')

univlab_summary.style.set_caption("University Laboratory Results")
index = univlab_summary.index
index.name = "university laboratory results"
univlab_summary

Unnamed: 0_level_0,LR,SVR,ANN,RF,LSTM,CEEMDAN RF,CEEMDAN LSTM,Proposed Method
university laboratory results,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MAPE(%),5.858858,6.419207,6.134858,6.27342,6.385431,3.566265,3.509094,3.190548
RMSE,2.579903,2.706899,2.658628,2.715355,2.627445,1.46624,1.42914,1.293451
MAE,1.864822,2.093199,2.004848,2.045536,2.063418,1.131705,1.115774,1.014384
running time (s),0.236383,0.376017,1.534371,1.119992,6.681227,31.269337,86.68152,75.807009


In [7]:
#export table to png
#dfi.export(univlab_summary,"univlab_summary_table.png")

## Calculate percentage improvement
### Run this following cell will calculate percentage improvement and generate output used in Section 4.4 (Table 4) for University Laboratory dataset

In [8]:
pMAPE_LR_vs_Proposed_univlab=((lr_univlab[0]-proposed_method_univlab[0])/lr_univlab[0])*100
pRMSE_LR_vs_Proposed_univlab=((lr_univlab[1]-proposed_method_univlab[1])/lr_univlab[1])*100
pMAE_LR_vs_Proposed_univlab=((lr_univlab[2]-proposed_method_univlab[2])/lr_univlab[2])*100

pMAPE_SVR_vs_Proposed_univlab=((svr_univlab[0]-proposed_method_univlab[0])/svr_univlab[0])*100
pRMSE_SVR_vs_Proposed_univlab=((svr_univlab[1]-proposed_method_univlab[1])/svr_univlab[1])*100
pMAE_SVR_vs_Proposed_univlab=((svr_univlab[2]-proposed_method_univlab[2])/svr_univlab[2])*100

pMAPE_ANN_vs_Proposed_univlab=((ann_univlab[0]-proposed_method_univlab[0])/ann_univlab[0])*100
pRMSE_ANN_vs_Proposed_univlab=((ann_univlab[1]-proposed_method_univlab[1])/ann_univlab[1])*100
pMAE_ANN_vs_Proposed_univlab=((ann_univlab[2]-proposed_method_univlab[2])/ann_univlab[2])*100

pMAPE_RF_vs_Proposed_univlab=((rf_univlab[0]-proposed_method_univlab[0])/rf_univlab[0])*100
pRMSE_RF_vs_Proposed_univlab=((rf_univlab[1]-proposed_method_univlab[1])/rf_univlab[1])*100
pMAE_RF_vs_Proposed_univlab=((rf_univlab[2]-proposed_method_univlab[2])/rf_univlab[2])*100

pMAPE_LSTM_vs_Proposed_univlab=((lstm_univlab[0]-proposed_method_univlab[0])/lstm_univlab[0])*100
pRMSE_LSTM_vs_Proposed_univlab=((lstm_univlab[1]-proposed_method_univlab[1])/lstm_univlab[1])*100
pMAE_LSTM_vs_Proposed_univlab=((lstm_univlab[2]-proposed_method_univlab[2])/lstm_univlab[2])*100

pMAPE_ceemdan_rf_vs_Proposed_univlab=((ceemdan_rf_univlab[0]-proposed_method_univlab[0])/ceemdan_rf_univlab[0])*100
pRMSE_ceemdan_rf_vs_Proposed_univlab=((ceemdan_rf_univlab[1]-proposed_method_univlab[1])/ceemdan_rf_univlab[1])*100
pMAE_ceemdan_rf_vs_Proposed_univlab=((ceemdan_rf_univlab[2]-proposed_method_univlab[2])/ceemdan_rf_univlab[2])*100


pMAPE_ceemdan_lstm_vs_Proposed_univlab=((ceemdan_lstm_univlab[0]-proposed_method_univlab[0])/ceemdan_lstm_univlab[0])*100
pRMSE_ceemdan_lstm_vs_Proposed_univlab=((ceemdan_lstm_univlab[1]-proposed_method_univlab[1])/ceemdan_lstm_univlab[1])*100
pMAE_ceemdan_lstm_vs_Proposed_univlab=((ceemdan_lstm_univlab[2]-proposed_method_univlab[2])/ceemdan_lstm_univlab[2])*100


df_PI_univlab=[[pMAPE_LR_vs_Proposed_univlab,pMAPE_SVR_vs_Proposed_univlab,pMAPE_ANN_vs_Proposed_univlab,
                pMAPE_RF_vs_Proposed_univlab,pMAPE_LSTM_vs_Proposed_univlab,pMAPE_ceemdan_rf_vs_Proposed_univlab,
                pMAPE_ceemdan_lstm_vs_Proposed_univlab],
                [pRMSE_LR_vs_Proposed_univlab,pRMSE_SVR_vs_Proposed_univlab,pRMSE_ANN_vs_Proposed_univlab,
                pRMSE_RF_vs_Proposed_univlab,pRMSE_LSTM_vs_Proposed_univlab,pRMSE_ceemdan_rf_vs_Proposed_univlab,
                pRMSE_ceemdan_lstm_vs_Proposed_univlab],
                [pMAE_LR_vs_Proposed_univlab,pMAE_SVR_vs_Proposed_univlab,pMAE_ANN_vs_Proposed_univlab,
                pMAE_RF_vs_Proposed_univlab,pMAE_LSTM_vs_Proposed_univlab,pMAE_ceemdan_rf_vs_Proposed_univlab,
                pMAE_ceemdan_lstm_vs_Proposed_univlab]]

PI_univlab=pd.DataFrame(df_PI_univlab, columns=["Proposed Method vs.LR", "Proposed Method vs.SVR"," Proposed Method vs.ANN",
                                      "Proposed Method vs.RF","Proposed Method vs.LSTM","Proposed Method vs.CEEMDAN RF",
                                      "Proposed Method vs. CEEMDAN LSTM"])
PI_univlab= PI_univlab.round(decimals = 2)
PI_univlab.set_axis(['MAPE(%)', 'RMSE','MAE'], axis='index')
PI_univlab.style.set_caption("Percentage Improvement-University Laboratory Building")
index = PI_univlab.index
index.name = "Percentage Improvement university laboratory"
PI_univlab

Unnamed: 0_level_0,Proposed Method vs.LR,Proposed Method vs.SVR,Proposed Method vs.ANN,Proposed Method vs.RF,Proposed Method vs.LSTM,Proposed Method vs.CEEMDAN RF,Proposed Method vs. CEEMDAN LSTM
Percentage Improvement university laboratory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MAPE(%),45.54,50.3,47.99,49.14,50.03,10.54,9.08
RMSE,49.86,52.22,51.35,52.37,50.77,11.78,9.49
MAE,45.6,51.54,49.4,50.41,50.84,10.37,9.09


In [9]:
#export table to png
#dfi.export(PI_univlab,"PI_univlab_table.png")