In [2]:
import sys
sys.path.append("/home/hugo/projetos-doutorado/mimo_emb_fts/src/")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from embfts.models.MimoNonStationaryFtsPca import MimoNonStationaryFtsPca
from pyFTS.models.nonstationary import nsfts
from pyFTS.benchmarks import Measures
from pyFTS.benchmarks import Measures
import matplotlib.pyplot as plt
from pyFTS.common import Util
import datetime
import statistics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import math


np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [3]:
def sample_first_prows(data, perc):
    return data.head(int(len(data)*(perc)))

In [4]:
def cal_nrmse(rmse, y):
    x = max(y)-min(y)
    return (rmse/x)

In [5]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame) 
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

In [6]:
# convert series to supervised learning
def series_to_supervised_mimo(data, n_in, n_out, dropnan=True):
	"""
	Frame a time series as a supervised learning dataset.
	Arguments:
		data: Sequence of observations as a list or NumPy array.
		n_in: Number of lag observations as input (X).
		n_out: Number of observations as output (y).
		dropnan: Boolean whether or not to drop rows with NaN values.
	Returns:
		Pandas DataFrame of series framed for supervised learning.
	"""
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [(df.columns[j]+'(t-%d)' % (i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [(df.columns[j]+'(t)') for j in range(n_vars)]
		else:
			names += [(df.columns[j]+'%d(t+%d)' % (j, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [7]:
df = pd.read_csv('/home/hugo/projetos-doutorado/mimo_emb_fts/data/energydata_complete.csv', sep=',')
df = df.drop(labels=['date','rv1', 'rv2'], axis=1)
df = clean_dataset(df)

In [8]:
# frame as supervised learning
reframed = series_to_supervised_mimo(df, 1, 1)
# reframed = df
reframed.head()

Unnamed: 0,Appliances(t-1),lights(t-1),T1(t-1),RH_1(t-1),T2(t-1),RH_2(t-1),T3(t-1),RH_3(t-1),T4(t-1),RH_4(t-1),...,T8(t),RH_8(t),T9(t),RH_9(t),T_out(t),Press_mm_hg(t),RH_out(t),Windspeed(t),Visibility(t),Tdewpoint(t)
1,60.0,30.0,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,...,18.2,48.863333,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2
2,60.0,30.0,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,...,18.2,48.73,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1
3,50.0,30.0,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,...,18.1,48.59,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0
4,50.0,40.0,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,45.723333,...,18.1,48.59,17.0,45.4,6.133333,733.9,92.0,5.666667,47.666667,4.9
5,60.0,40.0,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,45.53,...,18.1,48.59,17.0,45.29,6.016667,734.0,92.0,5.333333,43.833333,4.8


In [9]:
cols = reframed.columns
cols

Index(['Appliances(t-1)', 'lights(t-1)', 'T1(t-1)', 'RH_1(t-1)', 'T2(t-1)',
       'RH_2(t-1)', 'T3(t-1)', 'RH_3(t-1)', 'T4(t-1)', 'RH_4(t-1)', 'T5(t-1)',
       'RH_5(t-1)', 'T6(t-1)', 'RH_6(t-1)', 'T7(t-1)', 'RH_7(t-1)', 'T8(t-1)',
       'RH_8(t-1)', 'T9(t-1)', 'RH_9(t-1)', 'T_out(t-1)', 'Press_mm_hg(t-1)',
       'RH_out(t-1)', 'Windspeed(t-1)', 'Visibility(t-1)', 'Tdewpoint(t-1)',
       'Appliances(t)', 'lights(t)', 'T1(t)', 'RH_1(t)', 'T2(t)', 'RH_2(t)',
       'T3(t)', 'RH_3(t)', 'T4(t)', 'RH_4(t)', 'T5(t)', 'RH_5(t)', 'T6(t)',
       'RH_6(t)', 'T7(t)', 'RH_7(t)', 'T8(t)', 'RH_8(t)', 'T9(t)', 'RH_9(t)',
       'T_out(t)', 'Press_mm_hg(t)', 'RH_out(t)', 'Windspeed(t)',
       'Visibility(t)', 'Tdewpoint(t)'],
      dtype='object')

In [10]:
# df_forecast.rename(columns = {'Appliances(t-1)': 'Appliances(t)', 
#                               'lights(t-1)':'lights(t)', 'T1(t-1)':'T1(t)', 
#                               'RH_1(t-1)':'RH_1(t)', 'T2(t-1)':'T2(t)',
#                               'RH_2(t-1)':'RH_2(t)', 'T3(t-1)':'T3(t)', 
#                               'RH_3(t-1)':'RH_3(t)', 'T4(t-1)':'T4(t)', 
#                               'RH_4(t-1)':'RH_4(t)', 'T5(t-1)':'T5(t)',
#                               'RH_5(t-1)':'RH_5(t)', 'T6(t-1)':'T6(t)', 
#                               'RH_6(t-1)':'RH_6(t)', 'T7(t-1)':'T7(t)', 
#                               'RH_7(t-1)':'RH_7(t)', 'T8(t-1)':'T8(t)',
#                               'RH_8(t-1)':'RH_8(t)', 'T9(t-1)':'T9(t)', 
#                               'RH_9(t-1)':'RH_9(t)', 'T_out(t-1)':'T_out(t)', 
#                               'Press_mm_hg(t-1)':'Press_mm_hg(t)',
#                               'RH_out(t-1)':'RH_out(t)', 'Windspeed(t-1)':'Windspeed(t)', 
#                               'Visibility(t-1)':'Visibility(t)','Tdewpoint(t-1)':'Tdewpoint(t)'}, 
#                                inplace = True)

In [13]:
df_forecats_coluns = ['Appliances(t)', 'lights(t)', 'T1(t)', 'RH_1(t)', 'T2(t)', 'RH_2(t)',
       'T3(t)', 'RH_3(t)', 'T4(t)', 'RH_4(t)', 'T5(t)', 'RH_5(t)', 'T6(t)',
       'RH_6(t)', 'T7(t)', 'RH_7(t)', 'T8(t)', 'RH_8(t)', 'T9(t)', 'RH_9(t)',
       'T_out(t)', 'Press_mm_hg(t)', 'RH_out(t)', 'Windspeed(t)',
       'Visibility(t)', 'Tdewpoint(t)']

In [51]:
def sliding_window_pca(data,n_windows,train_size,steps_ahead,transformation):

    result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "variable":[]
    }
    
    final_result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "variable":[]
    }

    tam = len(data)
    n_windows = 30
    windows_length = math.floor(tam / n_windows)
    for ct, ttrain, ttest in Util.sliding_window(data, windows_length, train_size, inc=1):
        if len(ttest) > 0:
            
            print('-' * 20)
            print(f'training window {(ct)}')
            
#             df_train = ttrain.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             df_test = ttest.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             df_original = ttest.loc[:,'Appliances(t)':'Tdewpoint(t)']

            Xtrain = ttrain.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
            ytrain = ttrain.loc[:,'Appliances(t)':'Tdewpoint(t)']
            Xtest = ttest.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
            ytest = ttest.loc[:,'Appliances(t)':'Tdewpoint(t)']
            
#             models, data_train = mimo_kpca_nsfts.run_train(ttrain,transformation)
#             forecast, data_test = mimo_kpca_nsfts.run_test(models,ttest,steps_ahead,transformation)

            models, data_train = mimo_kpca_nsfts.run_train(Xtrain,transformation)
            forecast, data_test = mimo_kpca_nsfts.run_test(models,Xtest,steps_ahead,transformation)
            
            
#             df_forecats_columns = ['Appliances(t)', 'lights(t)', 'T1(t)', 'RH_1(t)', 'T2(t)', 'RH_2(t)',
#                                    'T3(t)', 'RH_3(t)', 'T4(t)', 'RH_4(t)', 'T5(t)', 'RH_5(t)', 'T6(t)',
#                                    'RH_6(t)', 'T7(t)', 'RH_7(t)', 'T8(t)', 'RH_8(t)', 'T9(t)', 'RH_9(t)',
#                                    'T_out(t)', 'Press_mm_hg(t)', 'RH_out(t)', 'Windspeed(t)',
#                                    'Visibility(t)', 'Tdewpoint(t)']
                                                           
            columns = list(ytest.columns)
            df_forecast = pd.DataFrame(forecast,columns=columns)
            #df_forecast = pd.DataFrame(forecast,columns=ttest.columns)
            
#             print("input")
#             print(df_test.loc[:,'Appliances(t-1)'][1:50])
            
            for col in columns:  
                original = ttest[col].values
                forecast = df_forecast[col].values
#                 original = original[1:]
#                 forecast = forecast[:-1]
#                 print(original.shape)
#                 print(forecast.shape)
                
#                 print("Original")
#                 print(original[1:50])
#                 print("Forecast")
#                 print(forecast[1:50])
                #original = original[1:]
                #forecast = forecast[1:]
                
#                 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 3])
#                 ax.plot(original, label='Original')
#                 ax.plot(forecast, label='Forecast')
#                 handles, labels = ax.get_legend_handles_labels()
#                 lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
#                 plt.show()
                
                #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics for variable: " + col)
                mae = mean_absolute_error(original,forecast)
                r2 = r2_score(original,forecast)
                #rmse = mean_squared_error(original,forecast,squared=False)
                rmse = Measures.rmse(original,forecast)
                mape = Measures.mape(original,forecast)
                
                #nrmse = cal_nrmse(rmse, original)

                result["rmse"].append(rmse)
                #result["nrmse"].append(nrmse)
                result["mape"].append(mape)
                result["mae"].append(mae)
                result["r2"].append(r2)
                result["window"].append(ct)
                result["variable"].append(col)
                
                
        
    measures = pd.DataFrame(result)
    return measures

## Sliding Window: MIMO KPCA-NSFTS

In [74]:
num_components_pca = 2
order_fts_model = 1
npart = 50
gamma = 0.1
fts_model = nsfts.NonStationaryFTS
memory_window_error = 10
steps_ahead = 1

mimo_kpca_nsfts = MimoNonStationaryFtsPca(num_components_pca, order_fts_model, npart,
                                            gamma, fts_model, memory_window_error)

In [75]:
kpca_result =  sliding_window_pca(data=reframed,n_windows=30,train_size=0.75,steps_ahead=1,transformation='KPCA')

--------------------
training window 0
--------------------
training window 657
--------------------
training window 1314
--------------------
training window 1971
--------------------
training window 2628
--------------------
training window 3285
--------------------
training window 3942
--------------------
training window 4599
--------------------
training window 5256
--------------------
training window 5913
--------------------
training window 6570
--------------------
training window 7227
--------------------
training window 7884
--------------------
training window 8541
--------------------
training window 9198
--------------------
training window 9855
--------------------
training window 10512
--------------------
training window 11169


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100


--------------------
training window 11826
--------------------
training window 12483
--------------------
training window 13140
--------------------
training window 13797
--------------------
training window 14454
--------------------
training window 15111
--------------------
training window 15768
--------------------
training window 16425
--------------------
training window 17082
--------------------
training window 17739
--------------------
training window 18396
--------------------
training window 19053


In [76]:
#columns = list(reframed.columns)
columns = list(df_forecats_coluns)


final_result = {
    "variable": [],
    "rmse": [],
    "mae": [],
    "mape": [],
    "r2": []
}

measures = kpca_result
var = measures.groupby("variable")

for col in columns:
    
    var_agr = var.get_group(col)
           
    rmse = round(statistics.mean(var_agr.loc[:,'rmse']),3)
    mape = round(statistics.mean(var_agr.loc[:,'mape']),3)
    mae = round(statistics.mean(var_agr.loc[:,'mae']),3)
    r2 = round(statistics.mean(var_agr.loc[:,'r2']),3)

    final_result["variable"].append(col)
    final_result["rmse"].append(rmse)
    final_result["mape"].append(mape)
    final_result["mae"].append(mae)
    final_result["r2"].append(r2)
        
    #print(f'Results: {(col,rmse,mae,r2)}')
        
        
final_measures_kpca = pd.DataFrame(final_result) 



In [77]:
print("Statistics MIMO KPCA-NSFTS(test): ")
final_measures_kpca

Statistics MIMO KPCA-NSFTS(test): 


Unnamed: 0,variable,rmse,mae,mape,r2
0,Appliances(t),64.859,29.53,25.625,0.408
1,lights(t),5.013,2.456,,0.191
2,T1(t),0.059,0.044,0.203,0.988
3,RH_1(t),0.414,0.194,0.473,0.919
4,T2(t),0.116,0.083,0.401,0.989
5,RH_2(t),0.282,0.188,0.481,0.975
6,T3(t),0.072,0.046,0.201,0.971
7,RH_3(t),0.2,0.114,0.29,0.96
8,T4(t),0.082,0.049,0.238,0.977
9,RH_4(t),0.162,0.108,0.279,0.974


## Sliding Window: MIMO PCA-NSFTS

In [78]:
num_components_pca = 10
order_fts_model = 1
npart = 50
gamma = 0.1
fts_model = nsfts.NonStationaryFTS
memory_window_error = 10
steps_ahead = 1

mimo_kpca_nsfts = MimoNonStationaryFtsPca(num_components_pca, order_fts_model, npart,
                                            gamma, fts_model, memory_window_error)

In [79]:
pca_result =  sliding_window_pca(data=reframed,n_windows=30,train_size=0.75,steps_ahead=1,transformation='PCA')

--------------------
training window 0
--------------------
training window 657
--------------------
training window 1314
--------------------
training window 1971
--------------------
training window 2628
--------------------
training window 3285
--------------------
training window 3942
--------------------
training window 4599
--------------------
training window 5256
--------------------
training window 5913
--------------------
training window 6570
--------------------
training window 7227
--------------------
training window 7884
--------------------
training window 8541
--------------------
training window 9198
--------------------
training window 9855
--------------------
training window 10512
--------------------
training window 11169
--------------------
training window 11826
--------------------
training window 12483
--------------------
training window 13140
--------------------
training window 13797
--------------------
training window 14454
--------------------
training w

In [80]:
columns = list(df_forecats_coluns)

final_result = {
    "variable": [],
    "rmse": [],
    "mae": [],
    "mape": [],
    "r2": []
}

measures = pca_result
var = measures.groupby("variable")

for col in columns:
    
    var_agr = var.get_group(col)
           
    rmse = round(statistics.mean(var_agr.loc[:,'rmse']),3)
    mape = round(statistics.mean(var_agr.loc[:,'mape']),3)
    mae = round(statistics.mean(var_agr.loc[:,'mae']),3)
    r2 = round(statistics.mean(var_agr.loc[:,'r2']),3)

    final_result["variable"].append(col)
    final_result["rmse"].append(rmse)
    final_result["mape"].append(mape)
    final_result["mae"].append(mae)
    final_result["r2"].append(r2)
        
    #print(f'Results: {(col,rmse,mae,r2)}')
        
        
final_measures_pca = pd.DataFrame(final_result) 

print("Statistics MIMO PCA-NSFTS(test): ")
final_measures_pca

Statistics MIMO PCA-NSFTS(test): 


Unnamed: 0,variable,rmse,mae,mape,r2
0,Appliances(t),62.521,35.387,37.769,0.447
1,lights(t),4.796,3.075,inf,0.207
2,T1(t),0.096,0.074,0.346,0.97
3,RH_1(t),0.549,0.348,0.857,0.865
4,T2(t),0.182,0.139,0.666,0.973
5,RH_2(t),0.383,0.289,0.736,0.952
6,T3(t),0.114,0.088,0.39,0.935
7,RH_3(t),0.287,0.206,0.527,0.917
8,T4(t),0.131,0.099,0.483,0.947
9,RH_4(t),0.247,0.188,0.489,0.941
