In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

from pyFTS.benchmarks import Measures
from pyFTS.benchmarks import Measures
from pyFTS.common import Util
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import math
import statistics
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append("/home/hugo/projetos-doutorado/mimo_emb_fts/src/")

from embfts.util.DataSetUtil import DataSetUtil
from embfts.util.StatisticsUtil import StatisticsUtil

In [2]:
data_set_util = DataSetUtil()
statistics_util = StatisticsUtil()

### Dataset

In [3]:
df = pd.read_csv('/home/hugo/projetos-doutorado/mimo_emb_fts/data/energydata_complete.csv', sep=',')
df = df.drop(labels=['date','rv1','rv2'], axis=1)
data = data_set_util.clean_dataset(df)
data = data_set_util.series_to_supervised_mimo(data, 1, 1)
#data = data.diff().dropna()
data.head()

Unnamed: 0,Appliances(t-1),lights(t-1),T1(t-1),RH_1(t-1),T2(t-1),RH_2(t-1),T3(t-1),RH_3(t-1),T4(t-1),RH_4(t-1),...,T8(t),RH_8(t),T9(t),RH_9(t),T_out(t),Press_mm_hg(t),RH_out(t),Windspeed(t),Visibility(t),Tdewpoint(t)
1,60.0,30.0,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,...,18.2,48.863333,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2
2,60.0,30.0,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,...,18.2,48.73,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1
3,50.0,30.0,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,...,18.1,48.59,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0
4,50.0,40.0,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,45.723333,...,18.1,48.59,17.0,45.4,6.133333,733.9,92.0,5.666667,47.666667,4.9
5,60.0,40.0,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,45.53,...,18.1,48.59,17.0,45.29,6.016667,734.0,92.0,5.333333,43.833333,4.8


In [4]:
def cal_nrmse(rmse, y):
    x = max(y)-min(y)
    return (rmse/x)

## VAR 

In [5]:
def lags_v(dados, p):
  T, n = dados.shape
  X = np.zeros((T-p, n*p))
  Y = dados[p:, :]
  for i in range(p, T):
    for j in range(p):
      X[i - p, j*n:(j*n)+n] = dados[i-(p-j), : ]
  return X, Y

def var(dados, parametros):
  T, n = dados.shape
  coef, _ = parametros
  p = int(coef.shape[0]/n)
  X,_ = lags_v(dados, p)
  ret = np.zeros((T-p, n))
  for i in range(T-p):
    ret[i, :] = coef.T @ X[i, :] 
  return ret 

def ajustar_var(dados, p):
  X,Y = lags_v(dados, p)
  
  coef = np.linalg.inv(X.T @ X) @ ( X.T @ Y )

  previsoes = var(dados, [coef, None])

  residuos = dados[p:, :] - previsoes

  Sigma = np.sqrt(np.cov(residuos, rowvar=False))

  return coef, Sigma


In [6]:
def sliding_window(data,n_windows,train_size,p):

    result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "smape": [],
         "nrmse": [],
         "variable":[]
    }
    
    final_result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "smape": [],
         "nrmse": [],
         "variable":[]
    }

    tam = len(data)
    n_windows = n_windows
    windows_length = math.floor(tam / n_windows)
    for ct, ttrain, ttest in Util.sliding_window(data, windows_length, train_size, inc=1):
        if len(ttest) > 0:
            
            print('-' * 20)
            print(f'training window {(ct)}')
            
#             Xtrain = ttrain.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             ytrain = ttrain.loc[:,'Appliances(t)':'Tdewpoint(t)']
#             Xtest = ttest.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             ytest = ttest.loc[:,'Appliances(t)':'Tdewpoint(t)']

            scaler = StandardScaler()
            Xtrain = scaler.fit_transform(ttrain.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)'])
            ytrain = scaler.fit_transform(ttrain.loc[:,'Appliances(t)':'Tdewpoint(t)'])
            Xtest = scaler.transform(ttest.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)'])
            ytest = scaler.transform(ttest.loc[:,'Appliances(t)':'Tdewpoint(t)'])
                        
            param = ajustar_var(Xtrain, p)
            forecast = var(Xtest, param)
             
            forecast = scaler.inverse_transform(forecast)  
            ytest_metric = ttest.loc[:,'Appliances(t)':'Tdewpoint(t)']
            df_forecast = pd.DataFrame(forecast,columns=ytest_metric.columns)
            df_original = pd.DataFrame(ytest_metric,columns=ytest_metric.columns)
            
            
            for col in ytest_metric.columns:  
                original = df_original[col].values
                forecast = df_forecast[col].values
                original = original[p-1:len(original)-1]
#                 original = original[1:]
#                 forecast = forecast[:-1]

                
#                 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 3])
#                 ax.plot(original, label='Original')
#                 ax.plot(forecast, label='Forecast')
#                 handles, labels = ax.get_legend_handles_labels()
#                 lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
#                 plt.show()
                
                #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics for variable: " + col)
                mae = round(mean_absolute_error(original,forecast),3)
                r2 = round(r2_score(original,forecast),3)
                #rmse = mean_squared_error(original,forecast,squared=False)
                rmse = round(Measures.rmse(original,forecast),3)
                mape = round(Measures.mape(original,forecast),3)
                nrmse = round(cal_nrmse(rmse, original),3)
                smape = round(Measures.smape(original,forecast),3)
                
                #nrmse = cal_nrmse(rmse, original)

                result["rmse"].append(rmse)
                result["nrmse"].append(nrmse)
                result["mape"].append(mape)
                result["mae"].append(mae)
                result["r2"].append(r2)
                result["smape"].append(smape)
                result["window"].append(ct)
                result["variable"].append(col)
                
                
        
    measures = pd.DataFrame(result)
    return measures

In [7]:
p = 1
var_result =  sliding_window(data=data,n_windows=30,train_size=0.75,p=p)

--------------------
training window 0
--------------------
training window 657
--------------------
training window 1314
--------------------
training window 1971
--------------------

  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))



training window 2628
--------------------
training window 3285
--------------------
training window 3942
--------------------
training window 4599


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))


--------------------
training window 5256
--------------------
training window 5913
--------------------
training window 6570
--------------------
training window 7227
--------------------
training window 7884


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanme

--------------------
training window 8541
--------------------
training window 9198
--------------------
training window 9855
--------------------
training window 10512
--------------------
training window 11169
--------------------
training window 11826
--------------------
training window 12483
--------------------
training window 13140
--------------------

  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100



training window 13797
--------------------
training window 14454
--------------------
training window 15111
--------------------
training window 15768
--------------------
training window 16425


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.s

--------------------
training window 17082
--------------------
training window 17739
--------------------
training window 18396
--------------------
training window 19053


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100


In [8]:
columns = list(data.loc[:,'Appliances(t)':'Tdewpoint(t)'].columns)

final_result = {
    "variable": [],
    "rmse": [],
    "mae": [],
    "mape": [],
    "r2": [],
    "smape": [],
    "nrmse": [],
    "rmse_std": [],
    "mae_std": [],
    "mape_std": [],
    "r2_std": [],
    "smape_std": [],
    "nrmse_std": []
}

var_result.to_csv (r'all_windows_var_uci_appliances.csv', index = False, header=True)

measures = var_result
var = measures.groupby("variable")

for col in columns:
    
    var_agr = var.get_group(col)
           
    rmse = round(statistics.mean(var_agr.loc[:,'rmse']),3)
    mape = round(statistics.mean(var_agr.loc[:,'mape']),3)
    mae = round(statistics.mean(var_agr.loc[:,'mae']),3)
    r2 = round(statistics.mean(var_agr.loc[:,'r2']),3)
    smape = round(statistics.mean(var_agr.loc[:,'smape']),3)
    nrmse = round(statistics.mean(var_agr.loc[:,'nrmse']),3)
    
    rmse_std = round(statistics.stdev(var_agr.loc[:,'rmse']),3)
    mape_std = round(statistics.stdev(var_agr.loc[:,'mape']),3)
    mae_std = round(statistics.stdev(var_agr.loc[:,'mae']),3)
    r2_std = round(statistics.stdev(var_agr.loc[:,'r2']),3)
    smape_std = round(statistics.stdev(var_agr.loc[:,'smape']),3)
    nrmse_std = round(statistics.stdev(var_agr.loc[:,'nrmse']),3)

    final_result["variable"].append(col)
    final_result["rmse"].append(rmse)
    final_result["mape"].append(mape)
    final_result["mae"].append(mae)
    final_result["r2"].append(r2)
    final_result["smape"].append(mae)
    final_result["nrmse"].append(r2)
    
    final_result["rmse_std"].append(rmse_std)
    final_result["mape_std"].append(mape_std)
    final_result["mae_std"].append(mae_std)
    final_result["r2_std"].append(r2_std)
    final_result["smape_std"].append(smape_std)
    final_result["nrmse_std"].append(nrmse_std)
        
    #print(f'Results: {(col,rmse,mae,r2)}')
        
        
final_measures = round(pd.DataFrame(final_result),3) 



In [9]:
final_measures

Unnamed: 0,variable,rmse,mae,mape,r2,smape,nrmse,rmse_std,mae_std,mape_std,r2_std,smape_std,nrmse_std
0,Appliances(t),98.442,73.755,95.59,-1.543,73.755,-1.543,45.66,39.715,51.582,5.361,16.886,0.189
1,lights(t),8.06,6.313,inf,-2.093,6.313,-2.093,2.994,2.619,,5.29,10.907,
2,T1(t),0.076,0.061,0.286,0.974,0.061,0.974,0.048,0.042,0.201,0.036,0.1,0.022
3,RH_1(t),0.783,0.604,1.507,0.554,0.604,0.554,0.293,0.285,0.722,0.619,0.353,0.085
4,T2(t),0.167,0.139,0.667,0.96,0.139,0.96,0.092,0.084,0.384,0.071,0.19,0.029
5,RH_2(t),0.477,0.393,0.975,0.867,0.393,0.867,0.233,0.224,0.521,0.257,0.259,0.048
6,T3(t),0.107,0.083,0.366,0.897,0.083,0.897,0.05,0.043,0.171,0.143,0.086,0.04
7,RH_3(t),0.273,0.205,0.525,0.892,0.205,0.892,0.094,0.072,0.197,0.108,0.099,0.032
8,T4(t),0.129,0.102,0.499,0.908,0.102,0.908,0.078,0.063,0.327,0.179,0.164,0.042
9,RH_4(t),0.304,0.249,0.636,0.87,0.249,0.87,0.175,0.159,0.391,0.179,0.195,0.048


In [10]:
final_measures.to_csv (r'var_uci_appliances_with_std.csv', index = False, header=True)