In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

from pyFTS.benchmarks import Measures
from pyFTS.benchmarks import Measures
from pyFTS.common import Util
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import math
import statistics
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append("/home/hugo/projetos-doutorado/mimo_emb_fts/src/")

from embfts.util.DataSetUtil import DataSetUtil
from embfts.util.StatisticsUtil import StatisticsUtil

In [78]:
data_set_util = DataSetUtil()
statistics_util = StatisticsUtil()

### Dataset

In [79]:
df = pd.read_csv('/home/hugo/projetos-doutorado/mimo_emb_fts/data/AirQualityUCI.csv', sep=';', decimal=',')
data = df.drop(labels=['Date','Time','Unnamed: 15', 'Unnamed: 16'], axis=1)
data.dropna(inplace=True)
#data = clean_dataset(data)
data = data_set_util.series_to_supervised_mimo(data, 1, 1)
data.head()

Unnamed: 0,CO(GT)(t-1),PT08.S1(CO)(t-1),NMHC(GT)(t-1),C6H6(GT)(t-1),PT08.S2(NMHC)(t-1),NOx(GT)(t-1),PT08.S3(NOx)(t-1),NO2(GT)(t-1),PT08.S4(NO2)(t-1),PT08.S5(O3)(t-1),...,C6H6(GT)(t),PT08.S2(NMHC)(t),NOx(GT)(t),PT08.S3(NOx)(t),NO2(GT)(t),PT08.S4(NO2)(t),PT08.S5(O3)(t),T(t),RH(t),AH(t)
1,2.6,1360.0,150.0,11.9,1046.0,166.0,1056.0,113.0,1692.0,1268.0,...,9.4,955.0,103.0,1174.0,92.0,1559.0,972.0,13.3,47.7,0.7255
2,2.0,1292.0,112.0,9.4,955.0,103.0,1174.0,92.0,1559.0,972.0,...,9.0,939.0,131.0,1140.0,114.0,1555.0,1074.0,11.9,54.0,0.7502
3,2.2,1402.0,88.0,9.0,939.0,131.0,1140.0,114.0,1555.0,1074.0,...,9.2,948.0,172.0,1092.0,122.0,1584.0,1203.0,11.0,60.0,0.7867
4,2.2,1376.0,80.0,9.2,948.0,172.0,1092.0,122.0,1584.0,1203.0,...,6.5,836.0,131.0,1205.0,116.0,1490.0,1110.0,11.2,59.6,0.7888
5,1.6,1272.0,51.0,6.5,836.0,131.0,1205.0,116.0,1490.0,1110.0,...,4.7,750.0,89.0,1337.0,96.0,1393.0,949.0,11.2,59.2,0.7848


In [80]:
data_train = data.loc[:,'CO(GT)(t-1)':'AH(t-1)']
data_test = data.loc[:,'CO(GT)(t)':'AH(t)']

Xtrain = data_set_util.sample_first_prows(data_train,0.75)
ytrain = data_set_util.sample_first_prows(data_test,0.75)

Xtest = data_train.iloc[max(Xtrain.index):]
ytest = data_test.iloc[max(ytrain.index):]

In [81]:
# Xtest

In [82]:
# ytest

In [83]:
# Xtest.loc[:,'Appliances(t-1)']

In [84]:
# ytest.loc[:,'Appliances(t)']

In [85]:
# fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 3])
# ax.plot(Xtest.loc[:,'Appliances(t-1)'][1:10], label='Xtest')
# ax.plot(ytest.loc[:,'Appliances(t)'][1:10], label='ytest')
# handles, labels = ax.get_legend_handles_labels()
# lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
# plt.show()

## VAR 

In [86]:
def lags_v(dados, p):
  T, n = dados.shape
  X = np.zeros((T-p, n*p))
  Y = dados[p:, :]
  for i in range(p, T):
    for j in range(p):
      X[i - p, j*n:(j*n)+n] = dados[i-(p-j), : ]
  return X, Y

def var(dados, parametros):
  T, n = dados.shape
  coef, _ = parametros
  p = int(coef.shape[0]/n)
  X,_ = lags_v(dados, p)
  ret = np.zeros((T-p, n))
  for i in range(T-p):
    ret[i, :] = coef.T @ X[i, :] 
  return ret 

def ajustar_var(dados, p):
  X,Y = lags_v(dados, p)
  
  #coef = np.linalg.inv(X.T @ X) @ ( X.T @ Y )
  coef = np.linalg.pinv(X.T @ X) @ ( X.T @ Y )

  previsoes = var(dados, [coef, None])

  residuos = dados[p:, :] - previsoes

  Sigma = np.sqrt(np.cov(residuos, rowvar=False))

  return coef, Sigma


In [87]:
def sliding_window(data,n_windows,train_size,p):

    result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "variable":[]
    }
    
    final_result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "variable":[]
    }

    tam = len(data)
    n_windows = 30
    windows_length = math.floor(tam / n_windows)
    for ct, ttrain, ttest in Util.sliding_window(data, windows_length, train_size, inc=1):
        if len(ttest) > 0:
            
            print('-' * 20)
            print(f'training window {(ct)}')
            
#             Xtrain = ttrain.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             ytrain = ttrain.loc[:,'Appliances(t)':'Tdewpoint(t)']
#             Xtest = ttest.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             ytest = ttest.loc[:,'Appliances(t)':'Tdewpoint(t)']

            scaler = StandardScaler()
            Xtrain = scaler.fit_transform(ttrain.loc[:,'CO(GT)(t-1)':'AH(t-1)'])
            ytrain = scaler.fit_transform(ttrain.loc[:,'CO(GT)(t)':'AH(t)'])
            Xtest = scaler.transform(ttest.loc[:,'CO(GT)(t-1)':'AH(t-1)'])
            ytest = scaler.transform(ttest.loc[:,'CO(GT)(t)':'AH(t)'])
                        
            param = ajustar_var(Xtrain, p)
            forecast = var(Xtest, param)
            
            
            forecast = scaler.inverse_transform(forecast)  
            ytest_metric = ttest.loc[:,'CO(GT)(t)':'AH(t)']
            df_forecast = pd.DataFrame(forecast,columns=ytest_metric.columns)
            df_original = pd.DataFrame(ytest_metric,columns=list(ytest_metric.columns))
                        
            for col in ytest_metric.columns:  
                original = df_original[col].values
                forecast = df_forecast[col].values
                original = original[p-1:len(original)-1]

                
#                 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 3])
#                 ax.plot(original, label='Original')
#                 ax.plot(forecast, label='Forecast')
#                 handles, labels = ax.get_legend_handles_labels()
#                 lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
#                 plt.show()
                
                #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics for variable: " + col)
                mae = mean_absolute_error(original,forecast)
                r2 = r2_score(original,forecast)
                #rmse = mean_squared_error(original,forecast,squared=False)
                rmse = Measures.rmse(original,forecast)
                mape = Measures.mape(original,forecast)
                
                #nrmse = cal_nrmse(rmse, original)

                result["rmse"].append(rmse)
                #result["nrmse"].append(nrmse)
                result["mape"].append(mape)
                result["mae"].append(mae)
                result["r2"].append(r2)
                result["window"].append(ct)
                result["variable"].append(col)
                
                
        
    measures = pd.DataFrame(result)
    return measures

In [88]:
p = 1
var_result =  sliding_window(data=data,n_windows=30,train_size=0.75,p=p)

--------------------
training window 0
--------------------
training window 311
--------------------
training window 622
--------------------
training window 933
--------------------
training window 1244
--------------------
training window 1555
--------------------
training window 1866


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))


--------------------
training window 2177
--------------------
training window 2488
--------------------
training window 2799
--------------------
training window 3110
--------------------
training window 3421
--------------------
training window 3732
--------------------
training window 4043


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))


--------------------
training window 4354
--------------------
training window 4665
--------------------
training window 4976
--------------------
training window 5287
--------------------
training window 5598
--------------------
training window 5909
--------------------
training window 6220


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))


--------------------
training window 6531
--------------------
training window 6842
--------------------
training window 7153
--------------------
training window 7464
--------------------
training window 7775
--------------------
training window 8086


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))


--------------------
training window 8397
--------------------
training window 8708
--------------------
training window 9019


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))


In [89]:
columns = list(ytest.columns)

final_result = {
    "variable": [],
    "rmse": [],
    "mae": [],
    "mape": [],
    "r2": []
}

measures = var_result
var = measures.groupby("variable")

for col in columns:
    
    var_agr = var.get_group(col)
           
    rmse = round(statistics.mean(var_agr.loc[:,'rmse']),3)
    mape = round(statistics.mean(var_agr.loc[:,'mape']),3)
    mae = round(statistics.mean(var_agr.loc[:,'mae']),3)
    r2 = round(statistics.mean(var_agr.loc[:,'r2']),3)

    final_result["variable"].append(col)
    final_result["rmse"].append(rmse)
    final_result["mape"].append(mape)
    final_result["mae"].append(mae)
    final_result["r2"].append(r2)
        
    #print(f'Results: {(col,rmse,mae,r2)}')
        
        
final_measures = round(pd.DataFrame(final_result),3) 



In [90]:
final_measures

Unnamed: 0,variable,rmse,mae,mape,r2
0,CO(GT)(t),370.824,162.177,2447.171,-297.134
1,PT08.S1(CO)(t),5704.98,2751.418,1256.249,-1600.323
2,NMHC(GT)(t),34.092,26.734,12.092,0.847
3,C6H6(GT)(t),262.402,127.689,295.093,-139.938
4,PT08.S2(NMHC)(t),8308.411,4073.212,1870.53,-2888.616
5,NOx(GT)(t),3328.612,1693.598,2533.151,-11889.018
6,PT08.S3(NOx)(t),5423.522,2725.265,1256.748,-1380.285
7,NO2(GT)(t),1028.81,577.859,751.172,-3175.409
8,PT08.S4(NO2)(t),7702.473,3690.557,1671.405,-1535.772
9,PT08.S5(O3)(t),10016.17,5009.702,2308.089,-4063.763


In [91]:
# p = 1

# param = ajustar_var(Xtrain.values, p)

# sigma = param[-1]

In [92]:
# previsoes = var(Xtest.values, param)

In [93]:
# T = Xtest.shape[0]

# fig, ax = plt.subplots(2, 1, figsize=(15,7))

# ax[0].set_title('Appliances')
# ax[0].plot(ytest.values[p-1:, 0], label='Original')
# ax[0].plot(previsoes[:, 0], label='Previsto')


# ax[1].set_title('Lights')
# ax[1].plot(ytest.values[p-1:, 1], label='Original')
# ax[1].plot(previsoes[:, 1], label='Previsto')

# ax[2].set_title('T1')
# ax[2].plot(ytest.values[p-1:, 2], label='Original')
# ax[2].plot(previsoes[:, 2], label='Previsto')

# original = ytest.values[p-1:, 0]
# forecast = previsoes[:, 0]

# original = original[:len(original)-2]
# forecast = forecast[1:]

# fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 3])
# ax.plot(original[1:50], label='Original')
# ax.plot(forecast[1:50], label='Forecast')
# handles, labels = ax.get_legend_handles_labels()
# lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
# plt.show()


In [94]:
# r2 = round(r2_score(original,forecast),3)
# rmse = round(Measures.rmse(original,forecast),3)

In [95]:
# from sklearn.metrics import mean_absolute_error
# from sklearn.metrics import mean_squared_error
# from sklearn.metrics import r2_score
# import matplotlib.pyplot as plt
 
# # original = original[:len(original)-1]
# # forecast = forecast[1:]  

# forecast_df = pd.DataFrame(previsoes,columns=list(Xtest.columns))

# validation_df = Xtest
    
# for index,i in enumerate(validation_df.columns):
#     print(i)
    
#     original = validation_df[i].values
#     forecast = forecast_df[i].values
#     #forecast2 = forecast_e2d2[i].values
#     original = original[:len(original)-1]
#     #forecast = forecast[1:]
# #     forecast2 = forecast2[1:]
    
#     print("RMSE : ",mean_squared_error(original,forecast,squared=False),end=", ")
#     print("R2 : ",r2_score(original,forecast),end=", ")
#     print("MAE : ",mean_absolute_error(original,forecast),end=", ")
    
#     fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 3])
#     ax.plot(original[1:50], label='Original')
#     ax.plot(forecast[1:50], label='Forecast')
#     handles, labels = ax.get_legend_handles_labels()
#     lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
#     plt.show()
    
#     print()