In [1]:
import xarray
import os
import pandas as pd
import Preprocessing_test

extractor = Preprocessing_test.FileExtractor()

df_dwd_hornsea = extractor.combine_files("data", "dwd_icon_eu_hornsea")
df_dwd_pes = extractor.combine_files("data", "dwd_icon_eu_pes10")
df_dwd_demand = extractor.combine_files("data", "dwd_icon_eu_demand")

ncep_gfs_hornsea = extractor.combine_files("data", "ncep_gfs_hornsea")
ncep_gfs_pes = extractor.combine_files("data", "ncep_gfs_pes10")
ncep_gfs_demand = extractor.combine_files("data", "ncep_gfs_demand")

import numpy as np
import dask.dataframe as dd
import math

preprocesser = Preprocessing_test.Preprocessing()

df_dwd_hornsea = preprocesser.preprocess_geo_data(df_dwd_hornsea)
ncep_gfs_hornsea = preprocesser.preprocess_geo_data(ncep_gfs_hornsea)
df_dwd_pes = preprocesser.preprocess_geo_data(df_dwd_pes)
ncep_gfs_pes = preprocesser.preprocess_geo_data(ncep_gfs_pes)
df_dwd_demand = preprocesser.preprocess_geo_data(df_dwd_demand)
ncep_gfs_demand = preprocesser.preprocess_geo_data(ncep_gfs_demand)

hornsea = preprocesser.merge_weather_stations_data(df_dwd_hornsea, ncep_gfs_hornsea)
demand = preprocesser.merge_weather_stations_data(df_dwd_demand, ncep_gfs_demand)
pes = preprocesser.merge_weather_stations_data(df_dwd_pes, ncep_gfs_pes)

df_energy = extractor.combine_files("data", "Energy_data", ".csv")
df_energy = preprocesser.preprocess_energy_data(df_energy)

merged_hornsea = preprocesser.merge_geo_energy_outage_data(hornsea, df_energy)
merged_pes = preprocesser.merge_geo_energy_outage_data(pes, df_energy)
merged_demand = preprocesser.merge_geo_energy_outage_data(demand, df_energy)

merged_hornsea = preprocesser.add_difference_features(merged_hornsea)
merged_pes = preprocesser.add_difference_features(merged_pes)
merged_demand = preprocesser.add_difference_features(merged_demand)

In [3]:
import importlib
import Preprocessing_test
importlib.reload(Preprocessing_test)


feature_engineerer_wind = Preprocessing_test.FeatureEngineerer(merged_hornsea, label = 'Wind_MWh_credit')
feature_engineerer_solar = Preprocessing_test.FeatureEngineerer(merged_pes, label = 'Solar_MWh_credit')

In [35]:
from sklearn.utils.fixes import parse_version, sp_version

# This is line is to avoid incompatibility if older SciPy version.
# You should use `solver="highs"` with recent version of SciPy.
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"

from sklearn.linear_model import QuantileRegressor

quantiles = [x for x in np.arange(0.1, 1.0, 0.4)]
q = {}
q["true"] = feature_engineerer_wind.y_test.values
#out_bounds_predictions = np.zeros_like(y_true_mean, dtype=np.bool_)
for quantile in quantiles:
    qr = QuantileRegressor(quantile=quantile, alpha=0, solver=solver)
    y_pred = qr.fit(feature_engineerer_wind.X_train, feature_engineerer_wind.y_train)
    q[str(quantile)] = qr.predict(feature_engineerer_wind.X_test)
    #predictions[quantile] = y_pred


In [58]:
y_pred = pd.DataFrame()
y_pred["true"] = feature_engineerer_wind.y_test

In [71]:
q

{'true': array([373.688, 360.038, 361.818, ...,  85.653,  81.582,  89.046]),
 '0.05': array([156.15321675, 151.57032446, 148.19237557, ..., 151.8650951 ,
        148.02486512, 177.05073289]),
 '0.5': array([236.0797815 , 232.89523571, 234.99558743, ..., 186.10758139,
        180.89298282, 192.81268814]),
 '0.95': array([399.0153393 , 397.44056674, 402.77643325, ..., 381.4866221 ,
        375.02930648, 367.02407186])}

In [5]:
def pinball(y, q, alpha):
    return (y - q) * alpha * (y >= q) + (q - y) * (1 - alpha) * (y < q)

def pinball_score(df, quantiles):
    score = list()
    for qu in quantiles:
        # Berechne den Pinball Loss für jedes Quantil
        score.append(pinball(y=df["true"],
                             q=df[f"{qu}"],
                             alpha=qu/100).mean())
    return sum(score)/len(score)  # Durchschnittlicher Pinball Score

In [72]:
q_df = pd.DataFrame(q)

In [80]:
q_df

Unnamed: 0,true,0.05,0.5,0.95
0,373.688,156.153217,236.079781,399.015339
1,360.038,151.570324,232.895236,397.440567
2,361.818,148.192376,234.995587,402.776433
3,350.358,151.519207,231.803544,396.017240
4,342.218,147.404771,228.277675,395.516948
...,...,...,...,...
6418,113.759,152.266421,192.167889,389.206420
6419,98.545,151.446493,187.281273,383.433174
6420,85.653,151.865095,186.107581,381.486622
6421,81.582,148.024865,180.892983,375.029306


In [83]:
pinball_score(q_df)

140.61483473774035

In [100]:
from sklearn.utils.fixes import parse_version, sp_version

# This is line is to avoid incompatibility if older SciPy version.
# You should use `solver="highs"` with recent version of SciPy.
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"

from sklearn.linear_model import QuantileRegressor

quantiles = [x for x in np.arange(0.1, 1.0, 0.4)]
q_solar = {}
q_solar["true"] = feature_engineerer_solar.y_test.values
#out_bounds_predictions = np.zeros_like(y_true_mean, dtype=np.bool_)
for quantile in quantiles:
    qr_solar = QuantileRegressor(quantile=quantile, alpha=0, solver=solver)
    qr_solar.fit(feature_engineerer_solar.X_train, feature_engineerer_solar.y_train)
    q_solar[str(quantile)] = qr_solar.predict(feature_engineerer_solar.X_test)
    #predictions[quantile] = y_pred

In [102]:
qsolar_df = pd.DataFrame(q_solar)

pinball_score(qsolar_df, quantiles)

50.06823329099725

In [103]:
qsolar_df.tail(20)

Unnamed: 0,true,0.1,0.5,0.9
6403,807.979685,913.332103,913.332103,913.332103
6404,738.90563,846.103965,846.103965,846.103965
6405,641.360705,781.061683,781.061683,781.061683
6406,540.135685,696.062668,696.062668,696.062668
6407,428.443677,613.175265,613.175265,613.175265
6408,301.218273,518.52493,518.52493,518.52493
6409,185.929933,426.080883,426.080883,426.080883
6410,104.851963,333.176427,333.176427,333.176427
6411,66.54314,242.781477,242.781477,242.781477
6412,43.347884,167.65238,167.65238,167.65238


In [9]:
merged_pes.columns

Index(['cloud_cover', 'solar_down_rad', 'temp', 'forecast_horizon',
       'temp_mean', 'temp_std', 'temp_min', 'temp_max', 'solar_down_rad_mean',
       'solar_down_rad_std', 'solar_down_rad_min', 'solar_down_rad_max',
       'cloud_cover_mean', 'cloud_cover_std', 'cloud_cover_min',
       'cloud_cover_max', 'temp_range', 'sin_month', 'cos_month', 'sin_day',
       'cos_day', 'sin_dayofweek', 'cos_dayofweek', 'sin_hour', 'cos_hour',
       'Wind_MWh_credit', 'Solar_MWh_credit', 'temp_diff', 'cloud_cover_diff'],
      dtype='object')

In [11]:
merged_pes_simple = merged_pes[['solar_down_rad', 'solar_down_rad_mean', 'solar_down_rad_std', 'sin_month', 'cos_month', 'sin_day','cos_day', 'sin_dayofweek', 'cos_dayofweek', 'sin_hour', 'cos_hour', 'Solar_MWh_credit', 'Wind_MWh_credit']]

feature_engineerer_solar_simple = Preprocessing_test.FeatureEngineerer(merged_pes_simple, label = 'Solar_MWh_credit')

from sklearn.utils.fixes import parse_version, sp_version

# This is line is to avoid incompatibility if older SciPy version.
# You should use `solver="highs"` with recent version of SciPy.
solver = "highs" if sp_version >= parse_version("1.6.0") else "interior-point"

from sklearn.linear_model import QuantileRegressor

quantiles = [x for x in np.arange(0.1, 1.0, 0.4)]
q_solar_simple = {}
q_solar_simple["true"] = feature_engineerer_solar_simple.y_test.values
#out_bounds_predictions = np.zeros_like(y_true_mean, dtype=np.bool_)
for quantile in quantiles:
    qr_solar_simple = QuantileRegressor(quantile=quantile, alpha=0, solver=solver)
    qr_solar_simple.fit(feature_engineerer_solar_simple.X_train, feature_engineerer_solar_simple.y_train)
    q_solar_simple[str(quantile)] = qr_solar_simple.predict(feature_engineerer_solar_simple.X_test)
    #predictions[quantile] = y_pred

qsolar_simple_df = pd.DataFrame(q_solar_simple)

In [12]:
pinball_score(qsolar_simple_df, quantiles)

12.033811268236333