In [2]:
import os
import pickle
import itertools
from datetime import datetime
from pprint import pprint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import mean_pinball_loss

os.chdir("C:/2023_11-PTSFC")
import model_train as model_train
import data_prepro as data_prepro
import model_eval as model_eval

In [13]:
import importlib
importlib.reload(model_eval)

<module 'model_eval' from 'C:\\2023_11-PTSFC\\model_eval.py'>

### Import

In [3]:
# os.environ["LOKY_MAX_CPU_COUNT"] = "1"  # Replace "4" with the desired number of cores

quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
fcast_hor = [36, 40, 44, 60, 64, 68] # in hours

# = = = = = = = = = = = = = 
# get data
# df_energy = data_prepro.get_energy_data_today(to_date=t_wednesday.strftime('%Y%m%d'))

# Read data from file with specified data types
df_energy = pd.read_csv("data/2015-01-01_2024-02-21_energy.csv", index_col=0, parse_dates=[0])
df_energy['timestamp_CET'] = pd.to_datetime(df_energy['timestamp_CET'], utc=True).dt.tz_convert('CET')
print(df_energy.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 80136 entries, 2014-12-31 23:00:00+00:00 to 2024-02-21 22:00:00+00:00
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   timestamp_CET  80136 non-null  datetime64[ns, CET]
 1   gesamt         80136 non-null  float64            
dtypes: datetime64[ns, CET](1), float64(1)
memory usage: 1.8 MB
None


In [4]:
# Read Pickle File of Scores
with open("2024-03-28_01-32-39 lightgbm, xgboost 2020/eval.pickle", "rb") as f:
    scores_dict_2020 = pickle.load(f)

# Read Pickle File of Scores
with open("2024-03-28_01-11-41 lightgbm, xgboost 2022/eval.pickle", "rb") as f:
    scores_dict_2022 = pickle.load(f)

# Read Pickle File of Fcasts
with open("2024-03-28_01-32-39 lightgbm, xgboost 2020/fcasts.pickle", "rb") as f:
    fcasts_dict_2020 = pickle.load(f)

# Read Pickle File of Fcasts
with open("2024-03-28_01-11-41 lightgbm, xgboost 2022/fcasts.pickle", "rb") as f:
    fcasts_dict_2022 = pickle.load(f)

In [5]:
def subset_dict(scores_dict, model_name):
    scores_dict_out = scores_dict.copy()
    for key in scores_dict.keys():
        scores_dict_out[key] = {k: v for k, v in scores_dict[key].items() if model_name in k}
    return scores_dict_out

scores_dict_2020_lgbm = subset_dict(scores_dict_2020, "lightgbm")
scores_dict_2022_lgbm = subset_dict(scores_dict_2022, "lightgbm")

scores_dict_2020_xgb = subset_dict(scores_dict_2020, "xgboost")
scores_dict_2022_xgb = subset_dict(scores_dict_2022, "xgboost")

In [6]:
# combine all sub dictionaries with same key in 2 dictionaries
def combine_dicts(dict1, dict2):
    dict_out = dict1.copy()
    for key in dict1.keys():
        dict_out[key] = {k: v for k, v in dict1[key].items()}
        dict_out[key].update({k: v for k, v in dict2[key].items()})
    return dict_out 

scores_dict_lgbm = combine_dicts(scores_dict_2020_lgbm, scores_dict_2022_lgbm)
scores_dict_xgb = combine_dicts(scores_dict_2020_xgb, scores_dict_2022_xgb)

### All Models Evaluation

In [7]:
scores_dict_all = combine_dicts(scores_dict_2020, scores_dict_2022)

# Remove all keys in subdicts except for the specified patterns
for key in scores_dict_all.keys():
    scores_dict_all[key] = {k: v for k, v in scores_dict_all[key].items() 
                            if ("xgboost_dummy_2020_1" in k) 
                            or ("xgboost_dummy_2" in k and "2020" not in k) 
                            or ("lightgbm_dummy_2020_5" in k) 
                            or ("lightgbm_dummy_21" in k) 
                            or ("xgboost" not in k and "lightgbm" not in k)}

input_dict = scores_dict_all

weekly_scores_df_out = pd.DataFrame(columns=input_dict.keys(),
                                    index=list(input_dict[list(input_dict.keys())[0]].keys()))

for week_key, weekly_scores in input_dict.items():

    for key, model_scores in weekly_scores.items():
        
        # print(model_scores)

        # entire mean
        model_qscore_mean = model_scores.values.mean()

        # print(f"> {key}: {model_qscore_mean}")
        weekly_scores_df_out.loc[key, week_key] = model_qscore_mean

ranking_df = weekly_scores_df_out.rank(axis=0, method='min')
ranking_sum = ranking_df.sum(axis=1)
ranking_sum = ranking_sum.sort_values(ascending=True)
print("> ranking sum")
pprint(ranking_sum.head(5))

mean_qscores_df = weekly_scores_df_out.mean(axis=1).sort_values(ascending=True)
print("> qscore sum")
pprint(mean_qscores_df.head(10))

> ranking sum
xgboost_dummy_2020_1      77.0
xgboost_dummy_2           83.0
mstl_0.5                  84.0
grad_boost_2018_dummy    102.0
bench_pm_1month          104.0
dtype: float64
> qscore sum
mstl_0.5                 0.752816
mstl_1                   0.800466
xgboost_dummy_2020_1     0.843151
xgboost_dummy_2          0.874116
bench_pm_1month          1.044384
grad_boost_2018_dummy    1.053459
grad_boost_2018_fturs    1.070209
bench_pm_2weeks          1.076728
quant_reg_2018_sk        1.109495
grad_boost_2015_dummy    1.145172
dtype: object


### Ensemble Combinations ...

In [8]:
fcasts_dict_all = combine_dicts(fcasts_dict_2020, fcasts_dict_2020)

# Remove all keys in subdicts except for the specified patterns
for key in fcasts_dict_all.keys():
    fcasts_dict_all[key] = {k: v for k, v in fcasts_dict_all[key].items() 
                            if ("xgboost_dummy_2020_1" in k) 
                            or ("xgboost_dummy_2" in k and "2020" not in k) 
                            or ("lightgbm_dummy_2020_5" in k) 
                            or ("lightgbm_dummy_21" in k) 
                            or ("xgboost" not in k and "lightgbm" not in k)}

fcasts_dict_all['2023-11-15'].keys()

dict_keys(['bench_pm_2weeks', 'bench_same_month', 'bench_pm_1month', 'mstl_1', 'mstl_0.5', 'quant_reg_2015_sk', 'quant_reg_2018_sk', 'quant_reg_2015_sm', 'quant_reg_2018_sm', 'grad_boost_2015_fturs', 'grad_boost_2018_fturs', 'grad_boost_2015_dummy', 'grad_boost_2018_dummy', 'lightgbm_dummy_2020_5', 'xgboost_dummy_2020_1'])

In [38]:
all_models = list(fcasts_dict_all['2023-11-15'].keys())
# Generate all possible combinations
ens_size = 3
combis = list(itertools.combinations(all_models, ens_size))
# filter out all ensembles with repeating models
combis = [combi for combi in combis if len(set(combi)) == ens_size]
print(len(combis))

# Define the start and end dates
start_date = pd.Timestamp('2023-11-15')
end_date = pd.Timestamp('2024-02-14')

# Generate a list of weekly dates in UTC
fcast_dates_cet = pd.date_range(start=start_date, end=end_date, freq='W-WED').tz_localize('CET').strftime('%Y-%m-%d').tolist()

all_combi_names = [f"ensemble | {', '.join(combi)}" for combi in combis]
print(all_combi_names[0])
res_df = pd.DataFrame(index=all_combi_names, columns=fcast_dates_cet)

# Iterate over the forecast dates
for fcast_idx, fcast_date in enumerate(fcast_dates_cet):

    print('= '*30)
    print(f"Forecasting for week starting from {fcast_date} ...")

    # = = = = = = = = = = = = = 
    # generate prediction timestamps based on t0 = following thursday 00:00
    # = = = = = = = = = = = = = 

    # Calculate the Thursday and Wednesday of the week
    t_wednesday = pd.Timestamp(fcast_date).replace(hour=0, minute=0, second=0, microsecond=0).tz_localize('CET')
    t_thursday = t_wednesday + pd.Timedelta(days=1)

    # Generate required submission timestamps
    subm_timestamps = [(t_thursday + pd.Timedelta(hours=fcast)) for fcast in fcast_hor]
    print(f"Submission timestamps = {subm_timestamps[0]} to {subm_timestamps[-1]}")

    weekly_fcasts = fcasts_dict_all[fcast_date]

    # = = = = = = = = = = = = = 
    # Evaluation based on submission timestamps
    # = = = = = = = = = = = = = 

    # get actual values at every submission timestamp
    df_energy_eval = df_energy.loc[df_energy['timestamp_CET'].isin(subm_timestamps)].copy()

    for combi_idx, combi in enumerate(combis):

        # calculate ensemble between the preds of the models in the current combination
        pred_list = [weekly_fcasts[model] for model in combi]
        new_name = f"ensemble | {', '.join(combi)}"

        # Ignore timestamp_CET column and take the average of the quantiles
        pred_vals_list = [pred.iloc[:, 1:].copy() for pred in pred_list]

        # Take the average of the quantiles across all models in the ensemble
        ens_pred_df = pred_list[0].copy()
        ens_pred_df.iloc[:, 1:] = sum(pred_vals_list) / len(pred_vals_list)
        
        df_scores = model_eval.eval_fcast_qscore(ens_pred_df, df_energy_eval, subm_timestamps, quantiles)

        # Save the last row of the scores dataframe to final output
        res_df.loc[new_name, fcast_date] = df_scores.values.mean()


455
ensemble | bench_pm_2weeks, bench_same_month, bench_pm_1month
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
Forecasting for week starting from 2023-11-15 ...
Submission timestamps = 2023-11-17 12:00:00+01:00 to 2023-11-18 20:00:00+01:00
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
Forecasting for week starting from 2023-11-22 ...
Submission timestamps = 2023-11-24 12:00:00+01:00 to 2023-11-25 20:00:00+01:00
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
Forecasting for week starting from 2023-11-29 ...
Submission timestamps = 2023-12-01 12:00:00+01:00 to 2023-12-02 20:00:00+01:00
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
Forecasting for week starting from 2023-12-06 ...
Submission timestamps = 2023-12-08 12:00:00+01:00 to 2023-12-09 20:00:00+01:00
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
Forecasting for week starting from 2023-12-13 ...
Submission timestamps = 2023-12-15 12:00:00+01:00 to 2023-12-16

In [39]:
res_df.mean(axis=1).sort_values(ascending=True).head(20)

ensemble | mstl_1, mstl_0.5, lightgbm_dummy_2020_5                    1.12704
ensemble | mstl_0.5, lightgbm_dummy_2020_5, xgboost_dummy_2020_1     1.177848
ensemble | mstl_1, lightgbm_dummy_2020_5, xgboost_dummy_2020_1       1.181781
ensemble | mstl_1, grad_boost_2018_fturs, lightgbm_dummy_2020_5      1.193096
ensemble | mstl_1, grad_boost_2018_dummy, lightgbm_dummy_2020_5      1.196894
ensemble | mstl_1, grad_boost_2015_fturs, lightgbm_dummy_2020_5      1.199461
ensemble | mstl_0.5, grad_boost_2018_fturs, lightgbm_dummy_2020_5    1.200798
ensemble | mstl_0.5, grad_boost_2018_dummy, lightgbm_dummy_2020_5    1.201326
ensemble | mstl_1, grad_boost_2015_dummy, lightgbm_dummy_2020_5      1.203797
ensemble | mstl_0.5, grad_boost_2015_dummy, lightgbm_dummy_2020_5    1.210538
ensemble | mstl_0.5, grad_boost_2015_fturs, lightgbm_dummy_2020_5    1.212675
ensemble | bench_pm_1month, mstl_1, lightgbm_dummy_2020_5            1.234264
ensemble | bench_pm_1month, mstl_0.5, lightgbm_dummy_2020_5     

In [45]:
# Create Own Ensembles
model_list = ['mstl_0.5', 'lightgbm_dummy_2020_5', 'bench_pm_1month', 'xgboost_dummy_2020_1']

model_scores = np.zeros(len(fcast_dates_cet))                
# Iterate over the forecast dates
for fcast_idx, fcast_date in enumerate(fcast_dates_cet):

    # Calculate the Thursday and Wednesday of the week
    t_wednesday = pd.Timestamp(fcast_date).replace(hour=0, minute=0, second=0, microsecond=0).tz_localize('CET')
    subm_timestamps = [(t_wednesday + pd.Timedelta(days=1, hours=fcast)) for fcast in fcast_hor]
    print(f"Submission timestamps = {subm_timestamps[0]} to {subm_timestamps[-1]}")

    # get actual values at every submission timestamp
    df_energy_eval = df_energy.loc[df_energy['timestamp_CET'].isin(subm_timestamps)].copy()
    # get weekly fcast
    weekly_fcasts = fcasts_dict_all[fcast_date]

    # calculate ensemble between the preds of the models in the current combination
    pred_list = [weekly_fcasts[model] for model in model_list]
    new_name = f"ensemble | {', '.join(combi)}"

    # Ignore timestamp_CET column and take the average of the quantiles
    pred_vals_list = [pred.iloc[:, 1:].copy() for pred in pred_list]

    # Take the average of the quantiles across all models in the ensemble
    ens_pred_df = pred_list[0].copy()
    ens_pred_df.iloc[:, 1:] = sum(pred_vals_list) / len(pred_vals_list)

    df_scores = model_eval.eval_fcast_qscore(ens_pred_df, df_energy_eval, subm_timestamps, quantiles)
    model_scores[fcast_idx] = df_scores.values.mean()

Submission timestamps = 2023-11-17 12:00:00+01:00 to 2023-11-18 20:00:00+01:00
Submission timestamps = 2023-11-24 12:00:00+01:00 to 2023-11-25 20:00:00+01:00
Submission timestamps = 2023-12-01 12:00:00+01:00 to 2023-12-02 20:00:00+01:00
Submission timestamps = 2023-12-08 12:00:00+01:00 to 2023-12-09 20:00:00+01:00
Submission timestamps = 2023-12-15 12:00:00+01:00 to 2023-12-16 20:00:00+01:00
Submission timestamps = 2023-12-22 12:00:00+01:00 to 2023-12-23 20:00:00+01:00
Submission timestamps = 2023-12-29 12:00:00+01:00 to 2023-12-30 20:00:00+01:00
Submission timestamps = 2024-01-05 12:00:00+01:00 to 2024-01-06 20:00:00+01:00
Submission timestamps = 2024-01-12 12:00:00+01:00 to 2024-01-13 20:00:00+01:00
Submission timestamps = 2024-01-19 12:00:00+01:00 to 2024-01-20 20:00:00+01:00
Submission timestamps = 2024-01-26 12:00:00+01:00 to 2024-01-27 20:00:00+01:00
Submission timestamps = 2024-02-02 12:00:00+01:00 to 2024-02-03 20:00:00+01:00
Submission timestamps = 2024-02-09 12:00:00+01:00 to

In [47]:
model_scores.mean()

1.2579703451958013

### LGBM & XGBoost Model Evaluation

In [10]:
# input_dict = scores_dict_xgb
input_dict = scores_dict_lgbm

weekly_scores_df_out = pd.DataFrame(columns=input_dict.keys(),
                                    index=list(input_dict[list(input_dict.keys())[0]].keys()))

for week_key, weekly_scores in input_dict.items():

    for key, model_scores in weekly_scores.items():
        
        # print(model_scores)

        # entire mean
        model_qscore_mean = model_scores.values.mean()
        # quantile mean
        # model_qscore_mean = model_scores.mean(axis=0).values[quantile]

        # print(f"> {key}: {model_qscore_mean}")
        weekly_scores_df_out.loc[key, week_key] = model_qscore_mean

    # output keys of weekly top 5 models with lowest mean score
    # top5 = weekly_scores_df_out.loc[:, week_key].sort_values().head(5)
    # print(f"{week_key}: {top5.index.to_list()}")

# take dataframe weekly_scores_dict_out
# assign rankings to each model (index) for each week (columns)
# sum the rankings for each model and sort by the sum

ranking_df = weekly_scores_df_out.rank(axis=0, method='min')
ranking_sum = ranking_df.sum(axis=1)
ranking_sum = ranking_sum.sort_values(ascending=True)
print("> ranking sum")
pprint(ranking_sum.head(5))

# # plot ranking_sum values as points and colour point red if 2020 is in index
# plt.figure(figsize=(6,3))
# plt.plot(ranking_sum.values, 'o', ms=4)
# for i, model in enumerate(ranking_sum.index):
#     if '2020' in model:
#         plt.plot(i, ranking_sum[model], 'ro', ms=4)
# plt.show()

# instead of taking sum of rankings, take lowest qscore
mean_qscores_df = weekly_scores_df_out.mean(axis=1).sort_values(ascending=True)
print("> qscore sum")
pprint(mean_qscores_df.head(10))

# # plot mean of qscores for each model
# plt.figure(figsize=(6,3))
# plt.plot(mean_qscores_df.values, 'o', ms=4)
# # plt.bar(mean_qscores_df.index, mean_qscores_df.values)
# for i, model in enumerate(mean_qscores_df.index):
#     if '2020' in model:
#         plt.plot(i, mean_qscores_df[model], 'ro', ms=4)
# plt.show()

> ranking sum
lightgbm_dummy_2020_9    273.0
lightgbm_dummy_2020_5    273.0
lightgbm_dummy_21        276.0
lightgbm_dummy_2020_4    285.0
lightgbm_dummy_2020_8    285.0
dtype: float64
> qscore sum
lightgbm_dummy_2020_5     1.328925
lightgbm_dummy_2020_9     1.328925
lightgbm_dummy_2020_23    1.347638
lightgbm_dummy_2020_22    1.348782
lightgbm_dummy_21         1.379352
lightgbm_dummy_2020_6     1.384033
lightgbm_dummy_2020_10    1.384033
lightgbm_dummy_2020_7     1.395336
lightgbm_dummy_2020_11    1.395336
lightgbm_dummy_20         1.397269
dtype: object


### Best Parameters

In [11]:
def generate_param_grids(params):
    
        param_values = list(itertools.product(*params.values()))
        param_names = list(params.keys())

        param_grids = []

        for values in param_values:
            param_dict = dict(zip(param_names, values))
            param_grids.append(param_dict)

        return param_grids

lgbm_params = {
    'max_depth': [4, 10],
    'num_leaves': [5, 15, 20],
    'learning_rate': [0.1, 0.3],
    'n_estimators': [100, 200],
    'boosting_type': ['gbdt'],
    'verbose': [-1]
}

xgb_params = {
    'objective': ['reg:quantileerror'],
    'eval_metric': ['quantile'],
    'booster': ['gbtree'],
    'max_depth': [4, 10],
    'learning_rate': [0.2],
    'n_estimators': [100, 200],
}

all_lgbm_params = generate_param_grids(lgbm_params)
all_xgb_params = generate_param_grids(xgb_params)

# pretty print 1, 0, 2 from all_xgb_params
xgb_idx_list = [0, 1, 2, 3]
lgbm_idx_list = [4, 5, 23]

for idx in xgb_idx_list:
    pprint(all_xgb_params[idx])

print('= '*20)
print('= '*20)

for idx in lgbm_idx_list:
    pprint(all_lgbm_params[idx])

{'booster': 'gbtree',
 'eval_metric': 'quantile',
 'learning_rate': 0.2,
 'max_depth': 4,
 'n_estimators': 100,
 'objective': 'reg:quantileerror'}
{'booster': 'gbtree',
 'eval_metric': 'quantile',
 'learning_rate': 0.2,
 'max_depth': 4,
 'n_estimators': 200,
 'objective': 'reg:quantileerror'}
{'booster': 'gbtree',
 'eval_metric': 'quantile',
 'learning_rate': 0.2,
 'max_depth': 10,
 'n_estimators': 100,
 'objective': 'reg:quantileerror'}
{'booster': 'gbtree',
 'eval_metric': 'quantile',
 'learning_rate': 0.2,
 'max_depth': 10,
 'n_estimators': 200,
 'objective': 'reg:quantileerror'}
= = = = = = = = = = = = = = = = = = = = 
= = = = = = = = = = = = = = = = = = = = 
{'boosting_type': 'gbdt',
 'learning_rate': 0.1,
 'max_depth': 4,
 'n_estimators': 100,
 'num_leaves': 15,
 'verbose': -1}
{'boosting_type': 'gbdt',
 'learning_rate': 0.1,
 'max_depth': 4,
 'n_estimators': 200,
 'num_leaves': 15,
 'verbose': -1}
{'boosting_type': 'gbdt',
 'learning_rate': 0.3,
 'max_depth': 10,
 'n_estimators'