In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.stattools import pacf, acf
import statsmodels.tsa.api as smt
from statsmodels.tsa.stattools import adfuller
from scipy.optimize import minimize
import cvxopt
import pyreadr
import os
import time
from datetime import timedelta
from scipy.stats import norm
import copy
import warnings
import utils
import regression_fit
import pickle
from importlib import reload
reload(utils)
reload(regression_fit)
from utils import *
from regression_fit import *

home = os.getcwd()

## Dataset preprocessing
Train set: from 2014 to 2018. Test set: from 2019 to 2021

In [70]:
# Import dataset of group H
GroupH = pyreadr.read_r(home + '\\Other_data\\GroupH.rda')
GroupH_df = pd.DataFrame(list(GroupH.values())[0])

# Add lagged net-load values (columns y0_96, y0_336, diff_96, diff_336)
GroupH_df = add_autoregressive_features(GroupH_df)

# Split in train and test set 
train_set = GroupH_df[720:87440] 
test_set = GroupH_df[87440:]

## Persistence benchmark (7 days)

In [73]:
pb_reg = Persistence_Benchmark(GroupH_df)
y_pers_benchmark = pb_reg.predict(test_set['targetTime'],delay_days=7)

In [74]:
idx_2020 = 17516
idx_2021 = 35104

print("\nPersistence Benchmark (7 days)")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark[idx_2021:]))


Persistence Benchmark (7 days)

RMSE in 2019:  181.479578290863
RMSE in 2020:  194.486372183224
RMSE in 2021:  178.837525773709

MAE in 2019:  123.70670244348022
MAE in 2020:  122.85391136001819
MAE in 2021:  122.86043040644171

MAPE in 2019:  0.07962968817316203
MAPE in 2020:  0.08681911004562255
MAPE in 2021:  0.08199694524332946


## Persistence benchmark (2 days)

In [76]:
pb_reg = Persistence_Benchmark(GroupH_df)
y_pers_benchmark_2 = pb_reg.predict(test_set['targetTime'],delay_days=2)

In [77]:
print("\nPersistence Benchmark (2 days)")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark_2[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark_2[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark_2[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark_2[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark_2[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark_2[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark_2[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark_2[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark_2[idx_2021:]))


Persistence Benchmark (2 days)

RMSE in 2019:  220.16040231886655
RMSE in 2020:  203.561657112538
RMSE in 2021:  189.04972687716034

MAE in 2019:  151.80675256907972
MAE in 2020:  133.33989623607005
MAE in 2021:  130.96505272239264

MAPE in 2019:  0.09733555424663555
MAPE in 2020:  0.09194374476979111
MAPE in 2021:  0.08767816394319017


## GAM-Point model 
Forecasts obtained in R with the bam function of the mgcv package

Target variable: Net-Load

In [79]:
y_R = pyreadr.read_r(home + '\\Other_data\\Results R\\GroupH_GAM_R_bam.rda')
y_R = pd.DataFrame(list(y_R.values())[0])
y_R_test_set = np.array(y_R).flatten()

In [80]:
print("GAM-Point model")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_R_test_set[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_R_test_set[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_R_test_set[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_R_test_set[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_R_test_set[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_R_test_set[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_R_test_set[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_R_test_set[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_R_test_set[idx_2021:]))

GAM-Point model

RMSE in 2019:  106.61893521380165
RMSE in 2020:  116.0124461556075
RMSE in 2021:  106.01161052295619

MAE in 2019:  81.29382779313119
MAE in 2020:  87.90709778782808
MAE in 2021:  83.7235301622973

MAPE in 2019:  0.053368960895191365
MAPE in 2020:  0.06491147170429751
MAPE in 2021:  0.057117468990631075


## Static Kalman GAM
Results obtained in R

In [82]:
GroupH_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupH_KF_static_delay.rda')
y_kf_static = pd.DataFrame(list(GroupH_KF.values())[0])
y_kf_static_delay_R = np.array(y_kf_static).flatten()

In [83]:
print("Static Kalman GAM")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_static_delay_R[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_static_delay_R[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_static_delay_R[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_static_delay_R[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_static_delay_R[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_static_delay_R[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_static_delay_R[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_static_delay_R[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_static_delay_R[idx_2021:]))

Static Kalman GAM

RMSE in 2019:  104.4420666339798
RMSE in 2020:  113.6898510293945
RMSE in 2021:  103.4757588891056

MAE in 2019:  78.47536247978918
MAE in 2020:  84.23939294363015
MAE in 2021:  80.68828547978833

MAPE in 2019:  0.0513161144125375
MAPE in 2020:  0.062175320525447685
MAPE in 2021:  0.05458901053826242


## Dynamic Kalman GAM 
Hyperparameters obtained with the Reduced Grid Search (I) method, which uses the Identity Initialization Matrix

In [85]:
GroupH_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupH_KF_dynamic_delay.rda')
y_kf_dynamic = pd.DataFrame(list(GroupH_KF.values())[0])
y_kf_dynamic_delay_R = np.array(y_kf_dynamic).flatten()

In [86]:
print("Dynamic Kalman GAM ")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

Dynamic Kalman GAM 

RMSE in 2019:  103.49325808707931
RMSE in 2020:  106.13262901440243
RMSE in 2021:  98.04103325650613

MAE in 2019:  76.16515732083865
MAE in 2020:  78.57499349269618
MAE in 2021:  72.59925237508313

MAPE in 2019:  0.04949959592166312
MAPE in 2020:  0.0564069568637273
MAPE in 2021:  0.04804592418166319


# LM-Point model
Linear Regression model 

Response variable: 7-days differenced net-load

In [88]:
St_train = regression_feature_matrix(train_set)

# Target variable is the 7-days differenced net-load
model = sm.OLS(train_set["diff_336"], St_train)
results = model.fit()             
params = results.params

St_test = regression_feature_matrix(test_set)

# The net-load value of 7 days prior is added back to the differenced net-load forecast 
y_lin = np.array(test_set['y0_336']) + St_test@params

In [89]:
idx_2020 = 17516 #Index of the first data point in 2020
idx_2021 = 35104 #Index of the first data point in 2021

print("LM-Point model")

print("\nRMSE in 2019 test set: ", RMSE(np.array(test_set['node'][:idx_2020]), y_lin[:idx_2020]))
print("RMSE in 2020 test set: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_lin[idx_2020:idx_2021]))
print("RMSE in 2021 test set: ", RMSE(np.array(test_set['node'][idx_2021:]), y_lin[idx_2021:]))

print("\nMAE in 2019 test set: ", MAE(np.array(test_set['node'][:idx_2020]), y_lin[:idx_2020]))
print("MAE in 2020 test set: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_lin[idx_2020:idx_2021]))
print("MAE in 2021 test set: ", MAE(np.array(test_set['node'][idx_2021:]), y_lin[idx_2021:]))

print("\nMAPE in 2019 test set: ", MAPE(np.array(test_set['node'][:idx_2020]), y_lin[:idx_2020]))
print("MAPE in 2020 test set: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_lin[idx_2020:idx_2021]))
print("MAPE in 2021 test set: ", MAPE(np.array(test_set['node'][idx_2021:]), y_lin[idx_2021:]))

LM-Point model

RMSE in 2019 test set:  96.60328239011804
RMSE in 2020 test set:  104.09345692540458
RMSE in 2021 test set:  97.60986621971585

MAE in 2019 test set:  70.29332605322017
MAE in 2020 test set:  77.1688325335597
MAE in 2021 test set:  71.63022459785049

MAPE in 2019 test set:  0.045065404895781626
MAPE in 2020 test set:  0.05535781565926888
MAPE in 2021 test set:  0.04767382634979983


# Static Kalman LM

In [91]:
kf_static_OLS = Kalman_Filter(GAM=False,params=params)
kf_static_OLS.fit(St_train, train_set['diff_336'])
# Computation of the standard deviation of the components of the state vector (required for the Reduced Grid Search (II) method)
std_static_OLS = np.std(kf_static_OLS.theta_mat[15000:],axis=0)

St_test = regression_feature_matrix(test_set)
y_static_mean, _ = kf_static_OLS.predict(St_test, test_set[["targetTime", "diff_336"]], delay=True) 

# The net-load value of 7 days prior is added back to the differenced net-load forecast 
y_static_mean += test_set["y0_336"]

In [92]:
print("Static Kalman GAM")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_static_mean[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_static_mean[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_static_mean[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_static_mean[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_static_mean[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_static_mean[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_static_mean[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_static_mean[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_static_mean[idx_2021:]))

Static Kalman GAM

RMSE in 2019:  95.28851719866422
RMSE in 2020:  100.74829602630595
RMSE in 2021:  95.77172009342382

MAE in 2019:  68.92433090360008
MAE in 2020:  73.8628658202332
MAE in 2021:  69.61375847701808

MAPE in 2019:  0.044203363776452544
MAPE in 2020:  0.053029564934247896
MAPE in 2021:  0.0461792229979768


# Dynamic Kalman LM

Hyperparameters optimization with the Reduced Grid Search (II) method, which uses the State Variance Optimization method 


In [95]:
q_list = [1e-10,1e-11, 1e-12, 1e-13,1e-14,1e-15,1e-16, 1e-17, 1e-18, 1e-19]
optim = Q_optimization(St_train, train_set[["targetTime", "diff_336"]],GAM=False,params=params)
Q, sigma, P, theta1 = optim.grid_search_reduced_likelihood(q_list=q_list, std_static=std_static_OLS) 

1e-10
-542159.7934466202
1e-11
-532596.5543116047
1e-12
-1135876.2049659952
1e-13
-535433.3719251675
1e-14
-525295.1600923741
1e-15
-525114.7228136403
1e-16
-525159.9836922858
1e-17
-525244.3670362837
1e-18
-525236.3715870035
1e-19
-525295.5102590172
Optimal q:  9.41630407301045e-12
Optimal sigma:  97.03764255695029
Maximum likelihood achieved:  -525114.7228136403


In [96]:
kf_dynamic_OLS = Kalman_Filter(GAM=False, Q=Q, sigma=sigma, P=P, theta1=theta1, params=params)
kf_dynamic_OLS.fit(St_train, train_set['diff_336'])

St_test = regression_feature_matrix(test_set)

y_dynamic_mean, _ = kf_dynamic_OLS.predict(St_test, test_set[["targetTime", "diff_336"]], delay=True)

# The net-load value of 7 days prior is added back to the differenced net-load forecast 
y_dynamic_mean += test_set["y0_336"]

In [97]:
print("Dynamic Kalman LM")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_dynamic_mean[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_dynamic_mean[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_dynamic_mean[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_dynamic_mean[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_dynamic_mean[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_dynamic_mean[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_dynamic_mean[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_dynamic_mean[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_dynamic_mean[idx_2021:]))

Dynamic Kalman LM

RMSE in 2019:  94.0424228970837
RMSE in 2020:  96.21955447374991
RMSE in 2021:  94.1786141086088

MAE in 2019:  67.60337927254251
MAE in 2020:  70.1672143965029
MAE in 2021:  68.18018338962975

MAPE in 2019:  0.04347423510950895
MAPE in 2020:  0.04958275088161171
MAPE in 2021:  0.04524038552223662


## Comparison among target variables for Dynamic Kalman GAM by using Iterative Grid Search

### Target variable: Normalized net-load

List: [1, 2^-1, 2^-2, 2^-3, 2^-4, 2^-5, 2^-6, 2^-7, 2^-8, 2^-9, 2^-10, 
         2^-11, 2^-12, 2^-13, 2^-14, 2^-15, 2^-16, 2^-17, 2^-18, 2^-19,2^-20, 
         2^-21, 2^-22, 2^-23, 2^-24, 2^-25, 2^-26, 2^-27, 2^-28, 2^-29,2^-30]

In [100]:
GroupH_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupH_KF_dynamic_delay_node_n.rda')
y_kf_dynamic = pd.DataFrame(list(GroupH_KF.values())[0])
y_kf_dynamic_delay_R_n = np.array(y_kf_dynamic).flatten()

In [101]:
print("Dynamic Kalman GAM - Normalized net-load")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R_n[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R_n[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R_n[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R_n[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R_n[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R_n[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R_n[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R_n[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R_n[idx_2021:]))

Dynamic Kalman GAM - Normalized net-load

RMSE in 2019:  105.51009323861815
RMSE in 2020:  113.11692027105288
RMSE in 2021:  101.30868149135145

MAE in 2019:  78.7350628756044
MAE in 2020:  86.69605550065232
MAE in 2021:  75.39267303333975

MAPE in 2019:  0.05210814669965793
MAPE in 2020:  0.06264185223499273
MAPE in 2021:  0.05138719835115927


### Target variable: Net-load

List: [2^-15,2^-20,2^-25,2^-28,2^-30,2^-32,2^-34,2^-36,2^-38,2^-40,2^-45,2^-50]

In [103]:
GroupH_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupH_KF_dynamic_delay_node.rda')
y_kf_dynamic = pd.DataFrame(list(GroupH_KF.values())[0])
y_kf_dynamic_delay_R = np.array(y_kf_dynamic).flatten()

In [104]:
print("Dynamic Kalman GAM - Net-load")
print("\nRMSE in 2019 test set: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("RMSE in 2020 test set: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("RMSE in 2021 test set: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAE in 2019 test set: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAE in 2020 test set: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAE in 2021 test set: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAPE in 2019 test set: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAPE in 2020 test set: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAPE in 2021 test set: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

Dynamic Kalman GAM - Net-load

RMSE in 2019 test set:  101.8352043361874
RMSE in 2020 test set:  102.87521626879193
RMSE in 2021 test set:  97.0009123257151

MAE in 2019 test set:  74.93466830353752
MAE in 2020 test set:  76.66047253478315
MAE in 2021 test set:  70.66913800071227

MAPE in 2019 test set:  0.04864816815397028
MAPE in 2020 test set:  0.05415622932216368
MAPE in 2021 test set:  0.04711334781275538


# Kalman LM Dynamic - Likelihood comparison 

### Hyperparameter Selection - Reduced Likelihood (I)

In [108]:
q_list = [1e-10,1e-11, 1e-12, 1e-13,1e-14,1e-15,1e-16, 1e-17, 1e-18,1e-19]

optim = Q_optimization(St_train, train_set[["targetTime", "diff_336"]],GAM=False,params=params)
Q, sigma, P, theta1 = optim.grid_search_reduced_likelihood(q_list=q_list) 

1e-10
-889194.8115960334
1e-11
-889011.0094138125
1e-12
-888139.0463266638
1e-13
-888845.6661388713
1e-14
-912618.5820219181
1e-15
-932729.7302932166
1e-16
-864005.8438412691
1e-17
-827219.8053207162
1e-18
-784422.9227173165
1e-19
-885904.309617551
Optimal q:  1.2473722243376765e-16
Optimal sigma:  11.168581934774336
Maximum likelihood achieved:  -784422.9227173165


### Hyperparameter selection - Reduced Likelihood (II)

In [110]:
q_list = [1e-10,1e-11, 1e-12, 1e-13,1e-14,1e-15,1e-16, 1e-17, 1e-18,1e-19]

optim = Q_optimization(St_train, train_set[["targetTime", "diff_336"]],GAM=False,params=params)
Q, sigma, P, theta1 = optim.grid_search_reduced_likelihood(q_list=q_list, std_static=std_static_OLS) 

1e-10
-542159.7934466202
1e-11
-532596.5543116047
1e-12
-1135876.2049659952
1e-13
-535433.3719251675
1e-14
-525295.1600923741
1e-15
-525114.7228136403
1e-16
-525159.9836922858
1e-17
-525244.3670362837
1e-18
-525236.3715870035
1e-19
-525295.5102590172
Optimal q:  9.41630407301045e-12
Optimal sigma:  97.03764255695029
Maximum likelihood achieved:  -525114.7228136403
