In [103]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.stattools import pacf, acf
import statsmodels.tsa.api as smt
from statsmodels.tsa.stattools import adfuller
from scipy.optimize import minimize
import cvxopt
import pyreadr
import os
import time
from datetime import timedelta
from scipy.stats import norm
import copy
import warnings
import utils
import regression_fit
import pickle
from importlib import reload
reload(utils)
reload(regression_fit)
from utils import *
from regression_fit import *

home = os.getcwd()

## Dataset preprocessing
Train set: from 2014 to 2018. Test set: from 2019 to 2021

In [105]:
# Import dataset of group H
GroupP = pyreadr.read_r(home + '\\Other_data\\GroupP.rda')
GroupP_df = pd.DataFrame(list(GroupP.values())[0])

# Add lagged net-load values (columns y0_96, y0_336, diff_96, diff_336)
GroupP_df = add_autoregressive_features(GroupP_df)

# Split in train and test set 
train_set = GroupP_df[720:87440] 
test_set = GroupP_df[87440:]

## Persistence benchmark (7 days)

In [108]:
pb_reg = Persistence_Benchmark(GroupP_df)
y_pers_benchmark = pb_reg.predict(test_set['targetTime'],delay_days=7)

In [109]:
idx_2020 = 17516
idx_2021 = 35104

print("\nPersistence Benchmark (7 days)")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark[idx_2021:]))


Persistence Benchmark (7 days)

RMSE in 2019:  292.55394652390055
RMSE in 2020:  259.1673348688048
RMSE in 2021:  265.2277872863094

MAE in 2019:  231.81921620232933
MAE in 2020:  202.5448496702297
MAE in 2021:  204.00244913087937

MAPE in 2019:  4.407910843840326
MAPE in 2020:  5.862696597558205
MAPE in 2021:  9.351673913750366


## Persistence benchmark (2 days)

In [111]:
pb_reg = Persistence_Benchmark(GroupP_df)
y_pers_benchmark_2 = pb_reg.predict(test_set['targetTime'],delay_days=2)

In [112]:
print("\nPersistence Benchmark (2 days)")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark_2[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark_2[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark_2[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark_2[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark_2[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark_2[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_pers_benchmark_2[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_pers_benchmark_2[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_pers_benchmark_2[idx_2021:]))


Persistence Benchmark (2 days)

RMSE in 2019:  262.32441266686504
RMSE in 2020:  257.881499528715
RMSE in 2021:  223.74165478365265

MAE in 2019:  201.99560259191594
MAE in 2020:  199.102959631567
MAE in 2021:  169.10488190184049

MAPE in 2019:  3.8606272937062394
MAPE in 2020:  5.02569956312503
MAPE in 2021:  9.31237564793908


## GAM-Point model 
Forecasts obtained in R with the bam function of the mgcv package

Target variable: Net-Load

In [114]:
y_R = pyreadr.read_r(home + '\\Other_data\\Results R\\GroupP_GAM_R_bam.rda')
y_R = pd.DataFrame(list(y_R.values())[0])
y_R_test_set = np.array(y_R).flatten()

In [115]:
print("GAM-Point model")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_R_test_set[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_R_test_set[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_R_test_set[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_R_test_set[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_R_test_set[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_R_test_set[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_R_test_set[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_R_test_set[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_R_test_set[idx_2021:]))

GAM-Point model

RMSE in 2019:  80.00135281297064
RMSE in 2020:  82.32824973099942
RMSE in 2021:  80.44287751753939

MAE in 2019:  61.713971832099666
MAE in 2020:  63.304719196287586
MAE in 2021:  65.29617055765894

MAPE in 2019:  1.5242563857138358
MAPE in 2020:  1.6796243694633595
MAPE in 2021:  2.7453055043591332


## Static Kalman GAM
Results obtained in R

In [117]:
GroupP_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupP_KF_static_delay.rda')
y_kf_static = pd.DataFrame(list(GroupP_KF.values())[0])
y_kf_static_delay_R = np.array(y_kf_static).flatten()

In [118]:
print("Static Kalman GAM")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_static_delay_R[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_static_delay_R[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_static_delay_R[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_static_delay_R[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_static_delay_R[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_static_delay_R[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_static_delay_R[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_static_delay_R[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_static_delay_R[idx_2021:]))

Static Kalman GAM

RMSE in 2019:  80.30028047354412
RMSE in 2020:  81.90940119773698
RMSE in 2021:  77.18460197357831

MAE in 2019:  62.08315748184486
MAE in 2020:  63.47175552025848
MAE in 2021:  61.24493518687378

MAPE in 2019:  1.5389645183685394
MAPE in 2020:  1.8005520654871139
MAPE in 2021:  2.5262437575897554


## Dynamic Kalman GAM 
Hyperparameters obtained with the Reduced Grid Search (I) method, which uses the Identity Initialization Matrix

In [120]:
GroupP_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupP_KF_dynamic_delay.rda')
y_kf_dynamic = pd.DataFrame(list(GroupP_KF.values())[0])
y_kf_dynamic_delay_R = np.array(y_kf_dynamic).flatten()

In [121]:
print("Dynamic Kalman GAM ")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

Dynamic Kalman GAM 

RMSE in 2019:  79.41636783328305
RMSE in 2020:  77.92443077497875
RMSE in 2021:  74.1238492177661

MAE in 2019:  61.180437472207146
MAE in 2020:  60.11536402904546
MAE in 2021:  56.16068739562044

MAPE in 2019:  1.5247846511908258
MAPE in 2020:  1.6776333552220644
MAPE in 2021:  2.3146284228930627


# LM-Point model
Linear Regression model 

Response variable: 7-days differenced net-load

In [123]:
St_train = regression_feature_matrix(train_set)

# Target variable is the 7-days differenced net-load
model = sm.OLS(train_set["diff_336"], St_train)
results = model.fit()             
params = results.params

St_test = regression_feature_matrix(test_set)

# The net-load value of 7 days prior is added back to the differenced net-load forecast 
y_lin = np.array(test_set['y0_336']) + St_test@params

In [124]:
idx_2020 = 17516 #Index of the first data point in 2020
idx_2021 = 35104 #Index of the first data point in 2021

print("LM-Point model")

print("\nRMSE in 2019 test set: ", RMSE(np.array(test_set['node'][:idx_2020]), y_lin[:idx_2020]))
print("RMSE in 2020 test set: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_lin[idx_2020:idx_2021]))
print("RMSE in 2021 test set: ", RMSE(np.array(test_set['node'][idx_2021:]), y_lin[idx_2021:]))

print("\nMAE in 2019 test set: ", MAE(np.array(test_set['node'][:idx_2020]), y_lin[:idx_2020]))
print("MAE in 2020 test set: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_lin[idx_2020:idx_2021]))
print("MAE in 2021 test set: ", MAE(np.array(test_set['node'][idx_2021:]), y_lin[idx_2021:]))

print("\nMAPE in 2019 test set: ", MAPE(np.array(test_set['node'][:idx_2020]), y_lin[:idx_2020]))
print("MAPE in 2020 test set: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_lin[idx_2020:idx_2021]))
print("MAPE in 2021 test set: ", MAPE(np.array(test_set['node'][idx_2021:]), y_lin[idx_2021:]))

LM-Point model

RMSE in 2019 test set:  80.09231986504119
RMSE in 2020 test set:  82.97615112523845
RMSE in 2021 test set:  75.49898950714623

MAE in 2019 test set:  61.89297145521798
MAE in 2020 test set:  63.72738997694685
MAE in 2021 test set:  58.028492314588036

MAPE in 2019 test set:  1.47477360177377
MAPE in 2020 test set:  1.497802805167625
MAPE in 2021 test set:  2.379081095550772


# Static Kalman LM

In [126]:
kf_static_OLS = Kalman_Filter(GAM=False,params=params)
kf_static_OLS.fit(St_train, train_set['diff_336'])
# Computation of the standard deviation of the components of the state vector (required for the Reduced Grid Search (II) method)
std_static_OLS = np.std(kf_static_OLS.theta_mat[1000:],axis=0)

St_test = regression_feature_matrix(test_set)
y_static_mean, _ = kf_static_OLS.predict(St_test, test_set[["targetTime", "diff_336"]], delay=True) 

# The net-load value of 7 days prior is added back to the differenced net-load forecast 
y_static_mean += test_set["y0_336"]

In [127]:
print("Static Kalman GAM")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_static_mean[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_static_mean[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_static_mean[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_static_mean[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_static_mean[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_static_mean[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_static_mean[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_static_mean[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_static_mean[idx_2021:]))

Static Kalman GAM

RMSE in 2019:  79.38248488518587
RMSE in 2020:  81.7540359652131
RMSE in 2021:  75.77715099577722

MAE in 2019:  61.204017014302316
MAE in 2020:  63.020080870085955
MAE in 2021:  58.40989492809905

MAPE in 2019:  1.4703606091216066
MAPE in 2020:  1.5531166440001754
MAPE in 2021:  2.3541306327072657


# Dynamic Kalman LM

Hyperparameters optimization with the Reduced Grid Search (II) method, which uses the State Variance Optimization method 


In [130]:
q_list = [1e-10,1e-11, 1e-12, 1e-13,1e-14,1e-15,1e-16, 1e-17, 1e-18, 1e-19]
optim = Q_optimization(St_train, train_set[["targetTime", "diff_336"]],GAM=False,params=params)
Q, sigma, P, theta1 = optim.grid_search_reduced_likelihood(q_list=q_list, std_static=std_static_OLS) 

1e-10
-677839.3291876058
1e-11
-1198532.2196018833
1e-12
-686188.4993597809
1e-13
-553302.2452294303
1e-14
-517469.17891004635
1e-15
-553565.7927980763
1e-16
-505393.2736914998
1e-17
-1445832.3660070014
1e-18
-1445622.9031761996
1e-19
-1445542.0915700044
Optimal q:  5.945813747741978e-13
Optimal sigma:  77.10910288508082
Maximum likelihood achieved:  -505393.2736914998


In [131]:
kf_dynamic_OLS = Kalman_Filter(GAM=False, Q=Q, sigma=sigma, P=P, theta1=theta1, params=params)
kf_dynamic_OLS.fit(St_train, train_set['diff_336'])

St_test = regression_feature_matrix(test_set)

y_dynamic_mean, _ = kf_dynamic_OLS.predict(St_test, test_set[["targetTime", "diff_336"]], delay=True)

# The net-load value of 7 days prior is added back to the differenced net-load forecast 
y_dynamic_mean += test_set["y0_336"]

In [132]:
print("Dynamic Kalman LM")

print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_dynamic_mean[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_dynamic_mean[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_dynamic_mean[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_dynamic_mean[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_dynamic_mean[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_dynamic_mean[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_dynamic_mean[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_dynamic_mean[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_dynamic_mean[idx_2021:]))

Dynamic Kalman LM

RMSE in 2019:  77.611503954898
RMSE in 2020:  79.33621414420367
RMSE in 2021:  74.42184481163841

MAE in 2019:  59.786100234258825
MAE in 2020:  60.52927144328827
MAE in 2021:  54.737758883090294

MAPE in 2019:  1.405238161765809
MAPE in 2020:  1.6608013646228688
MAPE in 2021:  2.1327754618646733


## Comparison among target variables for Dynamic Kalman GAM by using Iterative Grid Search

### Target variable: Normalized net-load

List: [1, 2^-1, 2^-2, 2^-3, 2^-4, 2^-5, 2^-6, 2^-7, 2^-8, 2^-9, 2^-10, 
         2^-11, 2^-12, 2^-13, 2^-14, 2^-15, 2^-16, 2^-17, 2^-18, 2^-19,2^-20, 
         2^-21, 2^-22, 2^-23, 2^-24, 2^-25, 2^-26, 2^-27, 2^-28, 2^-29,2^-30]

In [135]:
GroupP_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupP_KF_dynamic_delay_node_n.rda')
y_kf_dynamic = pd.DataFrame(list(GroupP_KF.values())[0])
y_kf_dynamic_delay_R_n = np.array(y_kf_dynamic).flatten()

In [136]:
print("Dynamic Kalman GAM - Normalized net-load")
print("\nRMSE in 2019: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R_n[:idx_2020]))
print("RMSE in 2020: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R_n[idx_2020:idx_2021]))
print("RMSE in 2021: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R_n[idx_2021:]))

print("\nMAE in 2019: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R_n[:idx_2020]))
print("MAE in 2020: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R_n[idx_2020:idx_2021]))
print("MAE in 2021: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R_n[idx_2021:]))

print("\nMAPE in 2019: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R_n[:idx_2020]))
print("MAPE in 2020: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R_n[idx_2020:idx_2021]))
print("MAPE in 2021: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R_n[idx_2021:]))

Dynamic Kalman GAM - Normalized net-load

RMSE in 2019:  88.70609551080786
RMSE in 2020:  89.19539806476557
RMSE in 2021:  83.42526588480932

MAE in 2019:  68.35596956773071
MAE in 2020:  68.46388723329187
MAE in 2021:  61.3536277624885

MAPE in 2019:  1.6359001791524739
MAPE in 2020:  1.623263096337977
MAPE in 2021:  1.9418762647767533


### Target variable: Net-load

List: [2^-15,2^-20,2^-25,2^-28,2^-30,2^-32,2^-34,2^-36,2^-38,2^-40,2^-45,2^-50]

In [138]:
GroupP_KF = pyreadr.read_r(home + '\\Other_data\\KF R\\GroupP_KF_dynamic_delay_node.rda')
y_kf_dynamic = pd.DataFrame(list(GroupP_KF.values())[0])
y_kf_dynamic_delay_R = np.array(y_kf_dynamic).flatten()

In [139]:
print("Dynamic Kalman GAM - Net-load")
print("\nRMSE in 2019 test set: ", RMSE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("RMSE in 2020 test set: ", RMSE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("RMSE in 2021 test set: ", RMSE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAE in 2019 test set: ", MAE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAE in 2020 test set: ", MAE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAE in 2021 test set: ", MAE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

print("\nMAPE in 2019 test set: ", MAPE(np.array(test_set['node'][:idx_2020]), y_kf_dynamic_delay_R[:idx_2020]))
print("MAPE in 2020 test set: ", MAPE(np.array(test_set['node'][idx_2020:idx_2021]), y_kf_dynamic_delay_R[idx_2020:idx_2021]))
print("MAPE in 2021 test set: ", MAPE(np.array(test_set['node'][idx_2021:]), y_kf_dynamic_delay_R[idx_2021:]))

Dynamic Kalman GAM - Net-load

RMSE in 2019 test set:  79.51694950410396
RMSE in 2020 test set:  78.28246975355825
RMSE in 2021 test set:  74.16464944425509

MAE in 2019 test set:  61.2832267608096
MAE in 2020 test set:  60.055578667167204
MAE in 2021 test set:  56.25918402952177

MAPE in 2019 test set:  1.525506699853394
MAPE in 2020 test set:  1.6554444325986326
MAPE in 2021 test set:  2.332470784719724


# Kalman LM Dynamic - Likelihood comparison 

### Hyperparameter Selection - Reduced Likelihood (I)

In [143]:
q_list = [1e-10,1e-11, 1e-12, 1e-13,1e-14,1e-15,1e-16, 1e-17, 1e-18,1e-19]

optim = Q_optimization(St_train, train_set[["targetTime", "diff_336"]],GAM=False,params=params)
Q, sigma, P, theta1 = optim.grid_search_reduced_likelihood(q_list=q_list) 

1e-10
-984232.9429336358
1e-11
-992795.8081650371
1e-12
-1093308.6127985553
1e-13
-1003460.6589296539
1e-14
-1123611.6700317971
1e-15
-994150.1703309866
1e-16
-965308.4518182022
1e-17
-1158496.923523732
1e-18
-927613.1330812259
1e-19
-796365.5855575106
Optimal q:  1.4213570948267482e-16
Optimal sigma:  37.700889841312076
Maximum likelihood achieved:  -796365.5855575106


### Hyperparameter selection - Reduced Likelihood (II)

In [145]:
q_list = [1e-10,1e-11, 1e-12, 1e-13,1e-14,1e-15,1e-16, 1e-17, 1e-18,1e-19]

optim = Q_optimization(St_train, train_set[["targetTime", "diff_336"]],GAM=False,params=params)
Q, sigma, P, theta1 = optim.grid_search_reduced_likelihood(q_list=q_list, std_static=std_static_OLS) 

1e-10
-677839.3291876058
1e-11
-1198532.2196018833
1e-12
-686188.4993597809
1e-13
-553302.2452294303
1e-14
-517469.17891004635
1e-15
-553565.7927980763
1e-16
-505393.2736914998
1e-17
-1445832.3660070014
1e-18
-1445622.9031761996
1e-19
-1445542.0915700044
Optimal q:  5.945813747741978e-13
Optimal sigma:  77.10910288508082
Maximum likelihood achieved:  -505393.2736914998
