In [1]:
import xarray
import os
import pandas as pd
import numpy as np
import dask.dataframe as dd
import math

from Preprocessing import *

prep = Preprocessing()

merged_hornsea = prep.perform_preprocessing_pipeline(geo_data_dict = {"dwd_icon_eu_hornsea":"nc_files", "ncep_gfs_hornsea":"nc_files"},
          deployment = False, energy_data_dict = {"Energy_data":"csv_files"})

merged_pes = prep.perform_preprocessing_pipeline(geo_data_dict = {"dwd_icon_eu_pes":"nc_files", "ncep_gfs_pes":"nc_files"},
          deployment = False, energy_data_dict = {"Energy_data":"csv_files"})

merged_demand = prep.perform_preprocessing_pipeline(geo_data_dict = {"dwd_icon_eu_demand":"nc_files", "ncep_gfs_demand":"nc_files"},
          deployment = False, energy_data_dict = {"Energy_data":"csv_files"})

In [2]:
merged_hornsea.to_parquet("preprocessed_hornsea_with_energy.parquet")
merged_pes.to_parquet("preprocessed_pes_with_energy.parquet")
merged_demand.to_parquet("preprocessed_demand_with_energy.parquet")

In [1]:
from Preprocessing import *
merged_hornsea = pd.read_parquet("preprocessed_hornsea_with_energy.parquet")
merged_pes = pd.read_parquet("preprocessed_pes_with_energy.parquet")
merged_demand = pd.read_parquet("preprocessed_demand_with_energy.parquet")

In [2]:
import importlib

feature_engineerer_wind = FeatureEngineerer(merged_hornsea, label = 'Wind_MWh_credit')
feature_engineerer_solar = FeatureEngineerer(merged_pes, label = 'Solar_MWh_credit')

merged_pes_simple = merged_pes[['solar_down_rad', 'Solar_MWh_credit', 'Wind_MWh_credit']]
feature_engineerer_solar_baseline = FeatureEngineerer(merged_pes_simple, label = 'Solar_MWh_credit')

merged_hornsea_simple = merged_hornsea[['wind_speed_100', 'Solar_MWh_credit', 'Wind_MWh_credit']]
feature_engineerer_wind_baseline = FeatureEngineerer(merged_hornsea_simple, label = 'Wind_MWh_credit')

### __Wind Energy Forecast__

__baseline__

In [6]:
import model_utils
import importlib
importlib.reload(model_utils)

quantiles = np.arange(0.1, 1.0, 0.1)

# Specify model save directory
model_save_dir_qr = "qr_model_wind"

qr_model = model_utils.QuantileRegressorModel(feature_engineerer_wind_baseline, quantiles, model_save_dir=model_save_dir_qr, load_pretrained=False)
qr_model.train_and_predict()  # This will skip training for already loaded models
print(f"Quantile Regressor Pinball Score: {qr_model.pinball_score()}")

Saved Quantile Regressor model for quantile 0.1 to qr_model_wind\qr_model_quantile_0.1.pkl
Saved Quantile Regressor model for quantile 0.2 to qr_model_wind\qr_model_quantile_0.2.pkl
Saved Quantile Regressor model for quantile 0.30000000000000004 to qr_model_wind\qr_model_quantile_0.30000000000000004.pkl
Saved Quantile Regressor model for quantile 0.4 to qr_model_wind\qr_model_quantile_0.4.pkl
Saved Quantile Regressor model for quantile 0.5 to qr_model_wind\qr_model_quantile_0.5.pkl
Saved Quantile Regressor model for quantile 0.6 to qr_model_wind\qr_model_quantile_0.6.pkl
Saved Quantile Regressor model for quantile 0.7000000000000001 to qr_model_wind\qr_model_quantile_0.7000000000000001.pkl
Saved Quantile Regressor model for quantile 0.8 to qr_model_wind\qr_model_quantile_0.8.pkl
Saved Quantile Regressor model for quantile 0.9 to qr_model_wind\qr_model_quantile_0.9.pkl
Quantile Regressor Pinball Score: 53.08909151412246


__xgboost__

In [22]:
merged_hornsea.columns

Index(['rel_hum', 'temp', 'wind_dir', 'wind_dir_100', 'wind_speed',
       'wind_speed_100', 'forecast_horizon', 'temp_mean', 'temp_std',
       'temp_min', 'temp_max', 'wind_speed_mean', 'wind_speed_std',
       'wind_speed_min', 'wind_speed_max', 'wind_speed_100_mean',
       'wind_speed_100_std', 'wind_speed_100_min', 'wind_speed_100_max',
       'wind_speed_range', 'wind_speed_100_range', 'wind_speed_altitude_diff',
       'temp_range', 'sin_month', 'cos_month', 'sin_day', 'cos_day',
       'sin_dayofweek', 'cos_dayofweek', 'sin_hour', 'cos_hour',
       'rel_hum_diff', 'temp_diff', 'wind_speed_diff', 'Wind_MWh_credit',
       'Solar_MWh_credit'],
      dtype='object')

In [3]:
import model_utils

feature_engineerer_wind = FeatureEngineerer(merged_hornsea.drop(["sin_dayofweek", "cos_dayofweek"], axis = 1), label = 'Wind_MWh_credit')

quantiles = np.arange(0.1, 1.0, 0.1)

model_save_dir_xgboost = "xgboost_model_wind"

hyperparams = {"objective": "reg:quantileerror",
            "tree_method": "hist",
            "quantile_alpha": quantiles,
            "learning_rate": 0.005,
            "max_depth": 8}

xgboost_model_wind = model_utils.XGBoostModel(feature_engineerer_wind, quantiles=quantiles, model_save_dir=model_save_dir_xgboost, load_pretrained=False, 
                                              #hyperparams = hyperparams
                                              )
xgboost_model_wind.train_and_predict()  # This will skip training if the model is already loaded
print(f"XGBoost Pinball Score: {xgboost_model_wind.pinball_score()}")

[0]	Train-quantile:94.90643	Val-quantile:87.66962
[1]	Train-quantile:90.85881	Val-quantile:84.59971
[2]	Train-quantile:86.99507	Val-quantile:81.67438
[3]	Train-quantile:83.30926	Val-quantile:78.90810
[4]	Train-quantile:79.79981	Val-quantile:76.30057
[5]	Train-quantile:76.44105	Val-quantile:73.83583
[6]	Train-quantile:73.24606	Val-quantile:71.48407
[7]	Train-quantile:70.19986	Val-quantile:69.23573
[8]	Train-quantile:67.29435	Val-quantile:67.09918
[9]	Train-quantile:64.55494	Val-quantile:65.06881
[10]	Train-quantile:61.95088	Val-quantile:63.15929
[11]	Train-quantile:59.46669	Val-quantile:61.37005
[12]	Train-quantile:57.10618	Val-quantile:59.66145
[13]	Train-quantile:54.85920	Val-quantile:58.04787
[14]	Train-quantile:52.72404	Val-quantile:56.53057
[15]	Train-quantile:50.67193	Val-quantile:55.07907
[16]	Train-quantile:48.71840	Val-quantile:53.72495
[17]	Train-quantile:46.86247	Val-quantile:52.42748
[18]	Train-quantile:45.09735	Val-quantile:51.18923
[19]	Train-quantile:43.41956	Val-quantile

### __Solar Energy Forecast__

__baseline modell__

In [8]:
quantiles = np.arange(0.1, 1.0, 0.1)

# Specify model save directory
model_save_dir_qr = "qr_model_solar"

qr_model_solar = model_utils.QuantileRegressorModel(feature_engineerer_solar_baseline, quantiles, model_save_dir=model_save_dir_qr, load_pretrained=False)
qr_model_solar.train_and_predict()  # This will skip training for already loaded models
print(f"Quantile Regressor Pinball Score: {qr_model_solar.pinball_score()}")

Saved Quantile Regressor model for quantile 0.1 to qr_model_solar\qr_model_quantile_0.1.pkl
Saved Quantile Regressor model for quantile 0.2 to qr_model_solar\qr_model_quantile_0.2.pkl
Saved Quantile Regressor model for quantile 0.30000000000000004 to qr_model_solar\qr_model_quantile_0.30000000000000004.pkl
Saved Quantile Regressor model for quantile 0.4 to qr_model_solar\qr_model_quantile_0.4.pkl
Saved Quantile Regressor model for quantile 0.5 to qr_model_solar\qr_model_quantile_0.5.pkl
Saved Quantile Regressor model for quantile 0.6 to qr_model_solar\qr_model_quantile_0.6.pkl
Saved Quantile Regressor model for quantile 0.7000000000000001 to qr_model_solar\qr_model_quantile_0.7000000000000001.pkl
Saved Quantile Regressor model for quantile 0.8 to qr_model_solar\qr_model_quantile_0.8.pkl
Saved Quantile Regressor model for quantile 0.9 to qr_model_solar\qr_model_quantile_0.9.pkl
Quantile Regressor Pinball Score: 13.779483166533769


__xgboost__

In [9]:
quantiles = np.arange(0.1, 1.0, 0.1)

model_save_dir_xgboost = "xgboost_model_solar"

xgboost_model_solar = model_utils.XGBoostModel(feature_engineerer_solar, quantiles=quantiles, model_save_dir=model_save_dir_xgboost, load_pretrained=False)
xgboost_model_solar.train_and_predict()  # This will skip training if the model is already loaded
print(f"XGBoost Pinball Score: {xgboost_model_solar.pinball_score()}")

[0]	Train-quantile:44.12999	Val-quantile:92.45223
[1]	Train-quantile:42.25440	Val-quantile:88.23172
[2]	Train-quantile:40.50419	Val-quantile:84.27149
[3]	Train-quantile:38.84159	Val-quantile:80.41121
[4]	Train-quantile:37.26078	Val-quantile:76.75385
[5]	Train-quantile:35.75178	Val-quantile:73.24482
[6]	Train-quantile:34.30906	Val-quantile:69.88570
[7]	Train-quantile:32.93070	Val-quantile:66.71439
[8]	Train-quantile:31.62027	Val-quantile:63.69509
[9]	Train-quantile:30.35903	Val-quantile:60.82798
[10]	Train-quantile:29.16290	Val-quantile:58.17260
[11]	Train-quantile:28.01222	Val-quantile:55.60625
[12]	Train-quantile:26.92059	Val-quantile:53.15848
[13]	Train-quantile:25.90057	Val-quantile:50.80891
[14]	Train-quantile:24.92071	Val-quantile:48.59624
[15]	Train-quantile:23.98265	Val-quantile:46.52111
[16]	Train-quantile:23.09908	Val-quantile:44.51724
[17]	Train-quantile:22.24975	Val-quantile:42.63751
[18]	Train-quantile:21.43395	Val-quantile:40.85613
[19]	Train-quantile:20.65773	Val-quantile

### __Lightgbm implementation__

In [134]:
import lightgbm as lgb
importlib.reload(model_utils)

quantiles = [x for x in np.arange(0.1, 1.0, 0.1)]
qr_solar_lightgbm = {}
qr_solar_lightgbm["true"] = feature_engineerer_solar.y_test.values
#out_bounds_predictions = np.zeros_like(y_true_mean, dtype=np.bool_)
for quantile in quantiles:
    qr_lightgbm = lgb.LGBMRegressor(objective='quantile', alpha=quantile)
    qr_lightgbm.fit(feature_engineerer_solar.X_train, feature_engineerer_solar.y_train)
    qr_solar_lightgbm[str(quantile)] = qr_lightgbm.predict(feature_engineerer_solar.X_test)

qr_solar_lightgbm_df = pd.DataFrame(qr_solar_lightgbm)
model_utils.pinball_score(qr_solar_lightgbm_df, quantiles=quantiles)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5040
[LightGBM] [Info] Number of data points in the train set: 48168, number of used features: 27
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000772 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5040
[LightGBM] [Info] Number of data points in the train set: 48168, number of used features: 27
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002927 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5040
[LightGBM] [Info] Number of data points in the train set: 48168, number of used features: 27
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhea

7.138515816261509

In [135]:
quantiles = [x for x in np.arange(0.1, 1.0, 0.1)]
qr_wind_lightgbm = {}
qr_wind_lightgbm["true"] = feature_engineerer_wind.y_test.values
#out_bounds_predictions = np.zeros_like(y_true_mean, dtype=np.bool_)
for quantile in quantiles:
    qr_lightgbm_wind = lgb.LGBMRegressor(objective='quantile', alpha=quantile)
    qr_lightgbm_wind.fit(feature_engineerer_wind.X_train, feature_engineerer_wind.y_train)
    qr_wind_lightgbm[str(quantile)] = qr_lightgbm_wind.predict(feature_engineerer_wind.X_test)

qr_wind_lightgbm_df = pd.DataFrame(qr_wind_lightgbm)
model_utils.pinball_score(qr_wind_lightgbm_df, quantiles=quantiles)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005574 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6825
[LightGBM] [Info] Number of data points in the train set: 48168, number of used features: 34
[LightGBM] [Info] Start training from score 9.724700
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6825
[LightGBM] [Info] Number of data points in the train set: 48168, number of used features: 34
[LightGBM] [Info] Start training from score 45.381004
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004482 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6825
[LightGBM] [Info] Number of data points in the train set: 48168, number of used features: 34
[LightGBM] [Info] Start tr

135.05236152705046