In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

from pyFTS.benchmarks import Measures
from pyFTS.benchmarks import Measures
from pyFTS.common import Util
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
import math
import statistics
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append("/home/hugo/projetos-doutorado/mimo_emb_fts/src/")

from embfts.util.DataSetUtil import DataSetUtil
from embfts.util.StatisticsUtil import StatisticsUtil

In [2]:
data_set_util = DataSetUtil()
statistics_util = StatisticsUtil()

In [3]:
def cal_nrmse(rmse, y):
    x = max(y)-min(y)
    return (rmse/x)

### Dataset

In [4]:
df = pd.read_csv('/home/hugo/projetos-doutorado/mimo_emb_fts/data/air/air_quality_beijing_6_sites.csv', sep=',')
df = df.drop(labels=['No','day','year','month','hour','wd','station'], axis=1)
df = df.drop(labels=['No.1','day.1','year.1','month.1','hour.1','wd.1','station.1'], axis=1)
df = df.drop(labels=['No.2','day.2','year.2','month.2','hour.2','wd.2','station.2'], axis=1)
df = df.drop(labels=['No.3','day.3','year.3','month.3','hour.3','wd.3','station.3'], axis=1)
df = df.drop(labels=['No.4','day.4','year.4','month.4','hour.4','wd.4','station.4'], axis=1)
df = df.drop(labels=['No.5','day.5','year.5','month.5','hour.5','wd.5','station.5'], axis=1)
df.dropna(inplace=True)
data = data_set_util.clean_dataset(df)
data = data_set_util.series_to_supervised_mimo(data, 1, 1)
data.head()

Unnamed: 0,PM2.5(t-1),PM10(t-1),SO2(t-1),NO2(t-1),CO(t-1),O3(t-1),TEMP(t-1),PRES(t-1),DEWP(t-1),RAIN(t-1),...,PM10.5(t),SO2.5(t),NO2.5(t),CO.5(t),O3.5(t),TEMP.5(t),PRES.5(t),DEWP.5(t),RAIN.5(t),WSPM.5(t)
24,24.0,24.0,26.0,54.0,600.0,36.0,-0.2,1030.5,-17.4,0.0,...,18.0,24.0,43.0,1399.0,25.0,-0.3,1028.9,-18.0,0.0,1.4
25,22.0,24.0,24.0,44.0,500.0,44.0,-0.4,1031.0,-17.6,0.0,...,39.0,25.0,28.0,1300.0,37.0,-0.7,1029.2,-17.9,0.0,1.0
29,14.0,17.0,21.0,36.0,400.0,50.0,-1.0,1031.3,-17.3,0.0,...,23.0,26.0,11.0,1200.0,50.0,-1.3,1028.0,-18.4,0.0,1.4
31,9.0,11.0,19.0,70.0,500.0,20.0,-1.8,1030.1,-17.5,0.0,...,6.0,19.0,11.0,1100.0,52.0,-1.3,1028.0,-17.9,0.0,0.6
32,3.0,11.0,34.0,58.0,500.0,27.0,-1.7,1029.8,-17.0,0.0,...,9.0,22.0,15.0,1200.0,51.0,-0.6,1027.7,-17.8,0.0,0.9


In [5]:
# data_train = data.loc[:,'PM2.5(t-1)':'WSPM.5(t-1)']
# data_test = data.loc[:,'PM2.5(t)':'WSPM.5(t)']

# Xtrain = data_set_util.sample_first_prows(data_train,0.75)
# ytrain = data_set_util.sample_first_prows(data_test,0.75)

# Xtest = data_train.iloc[max(Xtrain.index):]
# ytest = data_test.iloc[max(ytrain.index):]

## VAR 

In [6]:
def lags_v(dados, p):
  T, n = dados.shape
  X = np.zeros((T-p, n*p))
  Y = dados[p:, :]
  for i in range(p, T):
    for j in range(p):
      X[i - p, j*n:(j*n)+n] = dados[i-(p-j), : ]
  return X, Y

def var(dados, parametros):
  T, n = dados.shape
  coef, _ = parametros
  p = int(coef.shape[0]/n)
  X,_ = lags_v(dados, p)
  ret = np.zeros((T-p, n))
  for i in range(T-p):
    ret[i, :] = coef.T @ X[i, :] 
  return ret 

def ajustar_var(dados, p):
  X,Y = lags_v(dados, p)
  
  #coef = np.linalg.inv(X.T @ X) @ ( X.T @ Y )
  coef = np.linalg.pinv(X.T @ X) @ ( X.T @ Y )

  previsoes = var(dados, [coef, None])

  residuos = dados[p:, :] - previsoes

  Sigma = np.sqrt(np.cov(residuos, rowvar=False))

  return coef, Sigma


In [7]:
def sliding_window(data,n_windows,train_size,p):

    result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "smape": [],
         "nrmse": [],
         "variable":[]
    }
    
    final_result = {
         "window": [],
         "rmse": [],
         "mape": [],
         "mae": [],
         "r2": [],
         "smape": [],
         "nrmse": [],
         "variable":[]
    }

    tam = len(data)
    n_windows = n_windows
    windows_length = math.floor(tam / n_windows)
    for ct, ttrain, ttest in Util.sliding_window(data, windows_length, train_size, inc=1):
        if len(ttest) > 0:
            
            print('-' * 20)
            print(f'training window {(ct)}')
            
#             Xtrain = ttrain.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             ytrain = ttrain.loc[:,'Appliances(t)':'Tdewpoint(t)']
#             Xtest = ttest.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
#             ytest = ttest.loc[:,'Appliances(t)':'Tdewpoint(t)']

            scaler = StandardScaler()
            Xtrain = scaler.fit_transform(ttrain.loc[:,'PM2.5(t-1)':'WSPM.5(t-1)'])
            ytrain = scaler.fit_transform(ttrain.loc[:,'PM2.5(t)':'WSPM.5(t)'])
            Xtest = scaler.transform(ttest.loc[:,'PM2.5(t-1)':'WSPM.5(t-1)'])
            ytest = scaler.transform(ttest.loc[:,'PM2.5(t)':'WSPM.5(t)'])
                        
            param = ajustar_var(Xtrain, p)
            forecast = var(Xtest, param)
            
            
            forecast = scaler.inverse_transform(forecast)  
            ytest_metric = ttest.loc[:,'PM2.5(t)':'WSPM.5(t)']
            df_forecast = pd.DataFrame(forecast,columns=ytest_metric.columns)
            df_original = pd.DataFrame(ytest_metric,columns=ytest_metric.columns)
            
            
            for col in ytest_metric.columns:  
                original = df_original[col].values
                forecast = df_forecast[col].values
                original = original[p-1:len(original)-1]
#                 original = original[1:]
#                 forecast = forecast[:-1]

                
#                 fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 3])
#                 ax.plot(original, label='Original')
#                 ax.plot(forecast, label='Forecast')
#                 handles, labels = ax.get_legend_handles_labels()
#                 lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
#                 plt.show()
                
                #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics for variable: " + col)
                mae = round(mean_absolute_error(original,forecast),3)
                r2 = round(r2_score(original,forecast),3)
                #rmse = mean_squared_error(original,forecast,squared=False)
                rmse = round(Measures.rmse(original,forecast),3)
                mape = round(Measures.mape(original,forecast),3)
                nrmse = round(cal_nrmse(rmse, original),3)
                smape = round(Measures.smape(original,forecast),3)

                result["rmse"].append(rmse)
                result["nrmse"].append(nrmse)
                result["mape"].append(mape)
                result["mae"].append(mae)
                result["r2"].append(r2)
                result["smape"].append(smape)
                result["window"].append(ct)
                result["variable"].append(col)
                
                
        
    measures = pd.DataFrame(result)
    return measures

In [8]:
p = 1
var_result =  sliding_window(data=data,n_windows=30,train_size=0.75,p=p)

--------------------
training window 0
--------------------
training window 774


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, fore

--------------------
training window 1548
--------------------
training window 2322


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return

--------------------
training window 3096
--------------------
training window 3870


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 4644
--------------------
training window 5418


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 6192
--------------------
training window 6966


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subt

--------------------
training window 7740
--------------------
training window 8514


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 9288
--------------------
training window 10062


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 10836
--------------------
training window 11610


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 12384
--------------------
training window 13158


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.

--------------------
training window 13932
--------------------
training window 14706


  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 15480
--------------------
training window 16254


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 17028


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return (rmse/x)
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100


--------------------
training window 17802
--------------------
training window 18576


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.

--------------------
training window 19350


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 20124
--------------------
training window 20898


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets)))

--------------------
training window 21672
--------------------
training window 22446


  Sigma = np.sqrt(np.cov(residuos, rowvar=False))
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
  return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
  return np.nanmean(np.abs(np.divide(np.subtract(targets, forecas

In [11]:
columns = list(data.loc[:,'PM2.5(t)':'WSPM.5(t)'].columns)

final_result = {
    "variable": [],
    "rmse": [],
    "mae": [],
    "mape": [],
    "r2": [],
    "smape": [],
    "nrmse": []
}

measures = var_result
var = measures.groupby("variable")

for col in columns:
    
    var_agr = var.get_group(col)
           
    rmse = round(statistics.mean(var_agr.loc[:,'rmse']),3)
    mape = round(statistics.mean(var_agr.loc[:,'mape']),3)
    mae = round(statistics.mean(var_agr.loc[:,'mae']),3)
    r2 = round(statistics.mean(var_agr.loc[:,'r2']),3)
    smape = round(statistics.mean(var_agr.loc[:,'smape']),3)
    nrmse = round(statistics.mean(var_agr.loc[:,'nrmse']),3)

    final_result["variable"].append(col)
    final_result["rmse"].append(rmse)
    final_result["mape"].append(mape)
    final_result["mae"].append(mae)
    final_result["r2"].append(r2)
    final_result["smape"].append(smape)
    final_result["nrmse"].append(nrmse)
        
    #print(f'Results: {(col,rmse,mae,r2)}')
        
        
final_measures = round(pd.DataFrame(final_result),3) 



In [13]:
final_measures.to_csv (r'var_uci_air_quality_beijing_6_sites.csv', index = False, header=True)

In [12]:
pd.set_option('display.max_rows', None)
final_measures

Unnamed: 0,variable,rmse,mae,mape,r2,smape,nrmse
0,PM2.5(t),291.261,109.483,387.268,-420.636,17.646,1.301
1,PM10(t),457.036,171.426,299.803,-826.769,18.604,1.825
2,SO2(t),108.978,37.323,502.67,-1947.47,19.288,2.351
3,NO2(t),349.859,134.544,297.863,-3334.027,11.997,2.942
4,CO(t),2562.322,1036.421,139.969,-723.466,13.95,1.672
5,O3(t),910.339,297.0,7278.973,-1494.173,28.621,2.021
6,TEMP(t),344.296,188.366,inf,-9564.942,20.875,5.895
7,PRES(t),592.679,444.72,43.748,-127744.316,3.421,17.632
8,DEWP(t),27.072,10.265,inf,-1358.457,12.815,1.801
9,RAIN(t),19.781,7.762,,-4513.317,,
