In [1]:
import numpy as np
import pandas as pd
import yaml
import time
import sys
import os
import os.path as osp
import joblib
import pickle
from itertools import product
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
# Local modules
from src.utils import read_yml, Dict, time_range, time_intp, plot_styles

In [2]:
base_dir = "outputs/transfer_baseline_static"

conf = Dict(read_yml("etc/thesis_config.yaml"))
xgb = pd.read_pickle(osp.join(base_dir, "results_xgb_testset.pkl"))
lm  = pd.read_pickle(osp.join(base_dir, "results_lm_testset.pkl"))

In [3]:
# document-safe plotting defaults
FIGSIZE = (10, 6)
DPI = 300
LABEL_SIZE = 14
TICK_SIZE = 12
CBAR_LABEL_SIZE = 13

## Summarize Results

### FM1

In [4]:
# Metrics for All FMC
print("FM1 Results - All Observations")
print(f"Test Set Start: {conf.f_start}")
print(f"Test Set Start: {conf.f_end}")
print(f"N. Test Observations: {xgb['FM1']['base']['n']}")
tab1 = pd.DataFrame({
    "Metric": [r"$R^2$", r"Bias ($\%$)", r"RMSE ($\%$)"],
    "XGBoost": np.array([xgb["FM1"]["base"]["r2"], xgb["FM1"]["base"]["bias"], xgb["FM1"]["base"]["rmse"]]).round(2),
    "LM": np.array([lm["FM1"]["base"]["r2"], lm["FM1"]["base"]["bias"], lm["FM1"]["base"]["rmse"]]).round(2)
})

tab1

FM1 Results - All Observations
Test Set Start: 1997-08-13T12:00:00
Test Set Start: 1997-12-30T22:00:00
N. Test Observations: 271


Unnamed: 0,Metric,XGBoost,LM
0,$R^2$,0.53,0.59
1,Bias ($\%$),1.99,-0.2
2,RMSE ($\%$),9.15,8.57


In [5]:
# Metrics for All FMC
print(r"FM1 Results - FM1 <= 30 ")
print(f"Test Set Start: {conf.f_start}")
print(f"Test Set Start: {conf.f_end}")
print(f"N. Test Observations: {xgb['FM1']['base']['n']}")
tab1 = pd.DataFrame({
    "Metric": [r"$R^2$", r"Bias ($\%$)", r"RMSE ($\%$)"],
    "XGBoost": np.array([xgb["FM1"]["lt30"]["r2"], xgb["FM1"]["lt30"]["bias"], xgb["FM1"]["lt30"]["rmse"]]).round(2),
    "LM": np.array([lm["FM1"]["lt30"]["r2"], lm["FM1"]["lt30"]["bias"], lm["FM1"]["lt30"]["rmse"]]).round(2)
})

tab1

FM1 Results - FM1 <= 30 
Test Set Start: 1997-08-13T12:00:00
Test Set Start: 1997-12-30T22:00:00
N. Test Observations: 271


Unnamed: 0,Metric,XGBoost,LM
0,$R^2$,0.18,0.48
1,Bias ($\%$),0.5,-2.3
2,RMSE ($\%$),5.63,4.49


## FM100

In [6]:
print("FM100 Results - All Observations")
print(f"Test Set Start: {conf.f_start}")
print(f"Test Set Start: {conf.f_end}")
print(f"N. Test Observations: {xgb['FM100']['base']['n']}")
tab100 = pd.DataFrame({
    "Metric": [r"$R^2$", r"Bias ($\%$)", r"RMSE ($\%$)"],
    "XGBoost": np.array([xgb["FM100"]["base"]["r2"], xgb["FM100"]["base"]["bias"], xgb["FM100"]["base"]["rmse"]]).round(2),
    "LM": np.array([lm["FM100"]["base"]["r2"], lm["FM100"]["base"]["bias"], lm["FM100"]["base"]["rmse"]]).round(2)
})

tab100

FM100 Results - All Observations
Test Set Start: 1997-08-13T12:00:00
Test Set Start: 1997-12-30T22:00:00
N. Test Observations: 206


Unnamed: 0,Metric,XGBoost,LM
0,$R^2$,0.53,0.52
1,Bias ($\%$),-0.38,-0.57
2,RMSE ($\%$),3.0,3.03


## FM1000

In [7]:
print("FM1000 Results - All Observations")
print(f"Test Set Start: {conf.f_start}")
print(f"Test Set Start: {conf.f_end}")
print(f"N. Test Observations: {xgb['FM1000']['base']['n']}")
tab1000 = pd.DataFrame({
    "Metric": [r"$R^2$", r"Bias ($\%$)", r"RMSE ($\%$)"],
    "XGBoost": np.array([xgb["FM1000"]["base"]["r2"], xgb["FM1000"]["base"]["bias"], xgb["FM1000"]["base"]["rmse"]]).round(2),
    "LM": np.array([lm["FM1000"]["base"]["r2"], lm["FM1000"]["base"]["bias"], lm["FM1000"]["base"]["rmse"]]).round(2)
})

tab1000

FM1000 Results - All Observations
Test Set Start: 1997-08-13T12:00:00
Test Set Start: 1997-12-30T22:00:00
N. Test Observations: 209


Unnamed: 0,Metric,XGBoost,LM
0,$R^2$,0.33,0.42
1,Bias ($\%$),-0.66,-0.69
2,RMSE ($\%$),2.71,2.53
