# Capacity Factors Forecast with Regression

In [35]:
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
from enum import Enum

from ngboost.scores import LogScore
from shapely.geometry import Point
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

from ngboost import NGBRegressor
from sklearn.model_selection import train_test_split
from ngboost.distns import Exponential, Normal, LogNormal

from sklearn.metrics import mean_pinball_loss
from sklearn.metrics import mean_squared_error

from scipy.stats import norm
import ephem
from datetime import datetime
import pickle
from pathlib import Path

paths = {"era5_eu_2013": "resources/europe-2013-era5.nc",
         "era5_tutorial": "resources/europe-2013-era5-tutorial.nc",
         "offshore_shape": "resources/regions_offshore_elec_s_37.geojson",
         "onshore_shape": "resources/regions_onshore_elec_s_37.geojson",
         "capfacs": "resources/capfacs_37.csv",
         "era5_regions": "resources/europe-2013-era5-regions.nc"}

In [29]:
ds = xr.open_dataset(filename_or_obj=paths["era5_regions"], engine="netcdf4")
ds

In [30]:
capfacts = pd.read_csv(paths["capfacs"])
capfacts

Unnamed: 0,snapshot,AL0 0 offwind-ac,AL0 0 onwind,AL0 0 solar,AT0 0 onwind,AT0 0 ror,AT0 0 solar,BA0 0 onwind,BA0 0 solar,BE0 0 offwind-ac,...,SE4 0 onwind,SE4 0 ror,SE4 0 solar,SI0 0 offwind-ac,SI0 0 onwind,SI0 0 ror,SI0 0 solar,SK0 0 onwind,SK0 0 ror,SK0 0 solar
0,2013-01-01 00:00:00,0.003291,0.001469,0.0,0.163262,0.224456,0.0,0.007340,0.0,1.000000,...,0.459609,0.626955,0.0,0.000000,0.055146,0.344668,0.0,0.361009,0.106197,0.0
1,2013-01-01 01:00:00,0.002103,0.000000,0.0,0.171340,0.224369,0.0,0.007939,0.0,0.999998,...,0.463265,0.625502,0.0,0.000000,0.052605,0.344657,0.0,0.368912,0.106012,0.0
2,2013-01-01 02:00:00,0.000000,0.000000,0.0,0.171035,0.224300,0.0,0.007829,0.0,0.993941,...,0.463777,0.624810,0.0,0.000000,0.052222,0.344593,0.0,0.382949,0.105968,0.0
3,2013-01-01 03:00:00,0.000000,0.000000,0.0,0.169685,0.224249,0.0,0.005766,0.0,0.916094,...,0.463041,0.623794,0.0,0.000000,0.050762,0.344626,0.0,0.388344,0.106215,0.0
4,2013-01-01 04:00:00,0.000000,0.000000,0.0,0.159757,0.224213,0.0,0.004262,0.0,0.704786,...,0.457253,0.623085,0.0,0.000000,0.047285,0.344607,0.0,0.409303,0.106364,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2013-12-31 19:00:00,0.029235,0.024321,0.0,0.109127,0.275353,0.0,0.001076,0.0,0.998922,...,0.358191,0.469586,0.0,0.027877,0.001708,0.696975,0.0,0.125735,0.157729,0.0
8756,2013-12-31 20:00:00,0.034024,0.031357,0.0,0.120485,0.275159,0.0,0.001090,0.0,0.965757,...,0.324709,0.469395,0.0,0.023567,0.000000,0.696262,0.0,0.142431,0.157817,0.0
8757,2013-12-31 21:00:00,0.037104,0.034522,0.0,0.128186,0.274975,0.0,0.001062,0.0,0.852484,...,0.288415,0.469208,0.0,0.000000,0.004891,0.695524,0.0,0.153930,0.157899,0.0
8758,2013-12-31 22:00:00,0.029730,0.027419,0.0,0.124740,0.274784,0.0,0.001351,0.0,0.776442,...,0.268695,0.469028,0.0,0.000000,0.009091,0.694665,0.0,0.163442,0.157945,0.0


In [31]:
class EnergyType(Enum):
    """
    Represents the different type of renewable energy sources in pypsa-eur
    """
    OFFWIND_AC = "offwind-ac"
    OFFWIND_DC = "offwind-dc"
    ONWIND = "onwind-dc"
    SOLAR = "solar"
    ROR = "ror"
    NOT_DEFINED = "not_defined"

class Feature(Enum):
    """
    Represents the features, that can be extracted from the era-5 weather data set
    """
    HEIGHT = "height"
    WND100M = "wnd100m"
    ROUGHNESS = "roughness"
    INFLUX_TOA = "influx_toa"
    INFLUX_DIRECT = "influx_direct"
    INFLUX_DIFFUSE = "influx_diffuse"
    ALBEDO = "albedo"
    TEMPERATURE = "temperature"
    SOIL_TEMPERATURE = "soil_temperature"
    RUNOFF = "runoff"

"""
Determines which features are selected to calculate the capacity factor of a certain energy type.
"""
feature_set = {
    EnergyType.OFFWIND_AC: [Feature.HEIGHT, Feature.WND100M, Feature.ROUGHNESS],
    EnergyType.OFFWIND_DC: [Feature.HEIGHT, Feature.WND100M, Feature.ROUGHNESS],
    EnergyType.ONWIND: [Feature.HEIGHT, Feature.WND100M, Feature.ROUGHNESS],
    EnergyType.SOLAR: [Feature.INFLUX_TOA, Feature.INFLUX_DIRECT, Feature.INFLUX_DIFFUSE, Feature.TEMPERATURE],
    EnergyType.ROR: []
}

def find_countries_in_capfacts(country_name="") -> list:
    """
    Returns the full region names and energy types of the given name abbreviation that can be found in the .csv file with capacity factors.
    :param country_name: Two character abbreviation of the searched country
    :return: list of all regions and energy types to the given country name
    """
    countries = []
    for column in capfacts:
        if column.find(country_name) >= 0:
            countries.append(column)
    return countries


def get_energy_type(name: str) -> EnergyType:
    """
    Returns the energy type for a given string
    :param name: energy type as string
    :return: energy type for the given string
    """
    match name:
        case "offwind-ac":
            return EnergyType.OFFWIND_AC
        case "offwind-dc":
            return EnergyType.OFFWIND_DC
        case "onwind":
            return EnergyType.ONWIND
        case "solar":
            return EnergyType.SOLAR
        case "ror":
            return EnergyType.ROR
        case _:
            return EnergyType.NOT_DEFINED

def get_ds_region_name(region_name: str, energy_type: EnergyType) -> str:
    """
    Returns the name or string that addresses the given region and energy type which can be used to address the data in the feature data set
    :param region_name: name of the region
    :param energy_type: the uses energy type in that region
    :return: string that can be used to fetch data from the feature data set
    """
    ds_region_name = region_name + " 0"
    if energy_type == EnergyType.ONWIND or energy_type == EnergyType.SOLAR or energy_type == EnergyType.ROR:
        ds_region_name += " on"
    elif energy_type == EnergyType.OFFWIND_AC or energy_type == EnergyType.OFFWIND_DC:
        ds_region_name += " off"
    else:
        ds_region_name += ""
    return ds_region_name

def parse_capfac_col(column_name: str) -> (str, EnergyType):
    """
    Returns a tuple of the region name and energy type for a given column name of the capfacts .csv file
    :param column_name: column name of the capfacts .csv file
    :return: Tuple of a region name and energy type, None if no region is found
    """
    col_args = column_name.split(" ")
    if len(col_args) == 3:
        region_name = col_args[0]
        energy_type = get_energy_type(col_args[2])
        return region_name, energy_type
    return None, None

def create_training_data_for_col(column_name: str) -> (np.ndarray, dict):
    """
    Creates and returns the the training data set with the relevant data for a given column name from the capfacts .csv file.
    The training data set is a tuple of a numpy array of capacity factors (target values) and a dictionary of the era5 data (feature data),
    :param column_name: column name of the capfacts .csv file
    :return: Tuple of capacity factor (Y) and trainings data (X)
            X                       : DataFrame object or List or numpy array of predictors (n x p) in Numeric format
            Y                       : DataFrame object or List or numpy array of outcomes (n) in Numeric format.
    """
    region_name, energy_type = parse_capfac_col(column_name)
    # print(region_name)
    # print(energy_type)
    ds_region_name = get_ds_region_name(region_name, energy_type)
    # print(ds_region_name)
    features = feature_set.get(energy_type)
    # print(features)

    Y_capfac = capfacts[column_name].values
    X = {}
    for feature in features:
        X[feature] = ds.sel(region=ds_region_name)[feature.value].values

    return Y_capfac, X


def shape_multi_feature_data(training_data: dict):
    """
    Reshapes the trainingsdata in an array of shape (n_samples, n_features)
    (8760, 2) ---> [[x_f1_1, x_f2_1], [x_f1_2, x_f2_2], ... , [x_f1_8760, x_f2_8760]]
    :param training_data as a dictinary of multiple 1-d arrays:
    :return: trainingsdata in array of shape (n_samples, n_features)
    """
    # tup = tuple(list(training_data.values()))
    # multi_feature_train_data = np.column_stack(tup)
    arrays = list(training_data.values())
    return np.stack(arrays, axis=-1)

In [32]:
def get_date_time_obj(date_time_str: str):
    # 2013-01-01 21:00:00
    return datetime.strptime(date_time_str, "%Y-%m-%d %H:%M:%S")

In [33]:
capfacts_columns = capfacts.columns.values


# capfacts_pred_q40 = pd.DataFrame(columns=capfacts_columns)
capfacts_pred_q40 = pd.DataFrame()
capfacts_pred_q40["snapshot"] = capfacts["snapshot"]

# capfacts_pred_q60 = pd.DataFrame(columns=capfacts_columns)
capfacts_pred_q60 = pd.DataFrame()
capfacts_pred_q60["snapshot"] = capfacts["snapshot"]

capfacts_pred_q40

Unnamed: 0,snapshot
0,2013-01-01 00:00:00
1,2013-01-01 01:00:00
2,2013-01-01 02:00:00
3,2013-01-01 03:00:00
4,2013-01-01 04:00:00
...,...
8755,2013-12-31 19:00:00
8756,2013-12-31 20:00:00
8757,2013-12-31 21:00:00
8758,2013-12-31 22:00:00


In [34]:
col_names = capfacts_columns[1:]

i = 1
for col in col_names:
    print("Processing \"", col, "\" (", i, "/", len(col_names), ")")
    i += 1
    region_name, energy_type = parse_capfac_col(col)

    if(energy_type == EnergyType.NOT_DEFINED):
        print("Skipped column: ", col)
        print("-------------------------------------------------------------------")
    elif(energy_type == EnergyType.ROR):
        print("Skipped column: ", col)
        print("-------------------------------------------------------------------")
    else:
        print("Create Trainings data for region: ", region_name, " with energy type: ", energy_type)

        Y, X = create_training_data_for_col(col)
        X_pred = shape_multi_feature_data(X)
        X_train, X_test, Y_train, Y_test = train_test_split(shape_multi_feature_data(X), Y, test_size=0.25)

        print("Fit Regression Model for region ", region_name)
        ngb = NGBRegressor(verbose=False).fit(X_train, Y_train)

        print("Predict capacity factors for region ", region_name)
        # Y_preds = ngb.predict(X_pred)
        Y_dists = ngb.pred_dist(X_pred)

        Y_preds_q40 = Y_dists.ppf(0.4)
        capfacts_pred_q40[col] = Y_preds_q40

        Y_preds_q60 = Y_dists.ppf(0.6)
        capfacts_pred_q60[col] = Y_preds_q60

        # print(capfacts_pred.head())
        print(col, " with q = 0.4")
        print("Smallest value: ", capfacts_pred_q40[col].min())
        print("Biggest value: ", capfacts_pred_q40[col].max())
        print("#Negative Values: ", (capfacts_pred_q40[col] < 0).sum())
        print("#Values > 1: ", (capfacts_pred_q40[col] > 1).sum())

        print("")
        print(col, " with q = 0.6")
        print("Smallest value: ", capfacts_pred_q40[col].min())
        print("Biggest value: ", capfacts_pred_q40[col].max())
        print("#Negative Values: ", (capfacts_pred_q40[col] < 0).sum())
        print("#Values > 1: ", (capfacts_pred_q40[col] > 1).sum())

        print("-------------------------------------------------------------------")

        print("")


Processing " AL0 0 offwind-ac " ( 1 / 150 )
Create Trainings data for region:  AL0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  AL0
Predict capacity factors for region  AL0
AL0 0 offwind-ac  with q = 0.4
Smallest value:  -0.009436281505676975
Biggest value:  0.9695911576489085
#Negative Values:  36
#Values > 1:  0

AL0 0 offwind-ac  with q = 0.6
Smallest value:  -0.009436281505676975
Biggest value:  0.9695911576489085
#Negative Values:  36
#Values > 1:  0
-------------------------------------------------------------------

Processing " AL0 0 onwind " ( 2 / 150 )
Create Trainings data for region:  AL0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  AL0
Predict capacity factors for region  AL0
AL0 0 onwind  with q = 0.4
Smallest value:  -0.0038522684827349265
Biggest value:  0.6239638832141546
#Negative Values:  167
#Values > 1:  0

AL0 0 onwind  with q = 0.6
Smallest value:  -0.0038522684827349265
Biggest value:  0.6239638832141546
#N

  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 offwind-ac  with q = 0.4
Smallest value:  0.04446164880795418
Biggest value:  0.9126740465708317
#Negative Values:  0
#Values > 1:  0

NO4 0 offwind-ac  with q = 0.6
Smallest value:  0.04446164880795418
Biggest value:  0.9126740465708317
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " NO4 0 offwind-dc " ( 101 / 150 )
Create Trainings data for region:  NO4  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  NO4
Predict capacity factors for region  NO4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 offwind-dc  with q = 0.4
Smallest value:  0.07149659851481542
Biggest value:  0.966986280142144
#Negative Values:  0
#Values > 1:  0

NO4 0 offwind-dc  with q = 0.6
Smallest value:  0.07149659851481542
Biggest value:  0.966986280142144
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " NO4 0 onwind " ( 102 / 150 )
Create Trainings data for region:  NO4  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  NO4
Predict capacity factors for region  NO4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 onwind  with q = 0.4
Smallest value:  0.008019821518132813
Biggest value:  0.8036248558264131
#Negative Values:  0
#Values > 1:  0

NO4 0 onwind  with q = 0.6
Smallest value:  0.008019821518132813
Biggest value:  0.8036248558264131
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " NO4 0 solar " ( 103 / 150 )
Create Trainings data for region:  NO4  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  NO4
Predict capacity factors for region  NO4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 solar  with q = 0.4
Smallest value:  -0.013940648968150483
Biggest value:  0.5603457574329882
#Negative Values:  809
#Values > 1:  0

NO4 0 solar  with q = 0.6
Smallest value:  -0.013940648968150483
Biggest value:  0.5603457574329882
#Negative Values:  809
#Values > 1:  0
-------------------------------------------------------------------

Processing " PL0 0 offwind-ac " ( 104 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 offwind-ac  with q = 0.4
Smallest value:  0.0005491848403440941
Biggest value:  1.0027064662219096
#Negative Values:  0
#Values > 1:  1

PL0 0 offwind-ac  with q = 0.6
Smallest value:  0.0005491848403440941
Biggest value:  1.0027064662219096
#Negative Values:  0
#Values > 1:  1
-------------------------------------------------------------------

Processing " PL0 0 offwind-dc " ( 105 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 offwind-dc  with q = 0.4
Smallest value:  -0.002105570320106799
Biggest value:  1.0084745808923714
#Negative Values:  27
#Values > 1:  25

PL0 0 offwind-dc  with q = 0.6
Smallest value:  -0.002105570320106799
Biggest value:  1.0084745808923714
#Negative Values:  27
#Values > 1:  25
-------------------------------------------------------------------

Processing " PL0 0 onwind " ( 106 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 onwind  with q = 0.4
Smallest value:  0.0012761102643033034
Biggest value:  0.9904122581962196
#Negative Values:  0
#Values > 1:  0

PL0 0 onwind  with q = 0.6
Smallest value:  0.0012761102643033034
Biggest value:  0.9904122581962196
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " PL0 0 ror " ( 107 / 150 )
Skipped column:  PL0 0 ror
-------------------------------------------------------------------
Processing " PL0 0 solar " ( 107 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 solar  with q = 0.4
Smallest value:  -0.0399040819144094
Biggest value:  0.6303860634785307
#Negative Values:  983
#Values > 1:  0

PL0 0 solar  with q = 0.6
Smallest value:  -0.0399040819144094
Biggest value:  0.6303860634785307
#Negative Values:  983
#Values > 1:  0
-------------------------------------------------------------------

Processing " PT0 0 offwind-ac " ( 108 / 150 )
Create Trainings data for region:  PT0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  PT0
Predict capacity factors for region  PT0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PT0 0 offwind-ac  with q = 0.4
Smallest value:  0.005652285144151923
Biggest value:  0.9855684789718535
#Negative Values:  0
#Values > 1:  0

PT0 0 offwind-ac  with q = 0.6
Smallest value:  0.005652285144151923
Biggest value:  0.9855684789718535
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " PT0 0 onwind " ( 109 / 150 )
Create Trainings data for region:  PT0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  PT0
Predict capacity factors for region  PT0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PT0 0 onwind  with q = 0.4
Smallest value:  -0.0019744335005926436
Biggest value:  0.969468849751864
#Negative Values:  26
#Values > 1:  0

PT0 0 onwind  with q = 0.6
Smallest value:  -0.0019744335005926436
Biggest value:  0.969468849751864
#Negative Values:  26
#Values > 1:  0
-------------------------------------------------------------------

Processing " PT0 0 ror " ( 110 / 150 )
Skipped column:  PT0 0 ror
-------------------------------------------------------------------
Processing " PT0 0 solar " ( 110 / 150 )
Create Trainings data for region:  PT0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  PT0
Predict capacity factors for region  PT0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PT0 0 solar  with q = 0.4
Smallest value:  -0.058733515351489365
Biggest value:  0.6576600123596984
#Negative Values:  2315
#Values > 1:  0

PT0 0 solar  with q = 0.6
Smallest value:  -0.058733515351489365
Biggest value:  0.6576600123596984
#Negative Values:  2315
#Values > 1:  0
-------------------------------------------------------------------

Processing " RO0 0 offwind-ac " ( 111 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 offwind-ac  with q = 0.4
Smallest value:  -0.002452844154462853
Biggest value:  1.002904864981459
#Negative Values:  5
#Values > 1:  4

RO0 0 offwind-ac  with q = 0.6
Smallest value:  -0.002452844154462853
Biggest value:  1.002904864981459
#Negative Values:  5
#Values > 1:  4
-------------------------------------------------------------------

Processing " RO0 0 offwind-dc " ( 112 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 offwind-dc  with q = 0.4
Smallest value:  -0.0077786345669563495
Biggest value:  1.0034144425744131
#Negative Values:  9
#Values > 1:  6

RO0 0 offwind-dc  with q = 0.6
Smallest value:  -0.0077786345669563495
Biggest value:  1.0034144425744131
#Negative Values:  9
#Values > 1:  6
-------------------------------------------------------------------

Processing " RO0 0 onwind " ( 113 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 onwind  with q = 0.4
Smallest value:  0.000690445833526353
Biggest value:  0.6972670914578674
#Negative Values:  0
#Values > 1:  0

RO0 0 onwind  with q = 0.6
Smallest value:  0.000690445833526353
Biggest value:  0.6972670914578674
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " RO0 0 ror " ( 114 / 150 )
Skipped column:  RO0 0 ror
-------------------------------------------------------------------
Processing " RO0 0 solar " ( 114 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 solar  with q = 0.4
Smallest value:  -0.025169244483454707
Biggest value:  0.6425662737482467
#Negative Values:  1532
#Values > 1:  0

RO0 0 solar  with q = 0.6
Smallest value:  -0.025169244483454707
Biggest value:  0.6425662737482467
#Negative Values:  1532
#Values > 1:  0
-------------------------------------------------------------------

Processing " RS0 0 onwind " ( 115 / 150 )
Create Trainings data for region:  RS0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  RS0
Predict capacity factors for region  RS0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RS0 0 onwind  with q = 0.4
Smallest value:  -0.003535587227386045
Biggest value:  0.9460375516819918
#Negative Values:  55
#Values > 1:  0

RS0 0 onwind  with q = 0.6
Smallest value:  -0.003535587227386045
Biggest value:  0.9460375516819918
#Negative Values:  55
#Values > 1:  0
-------------------------------------------------------------------

Processing " RS0 0 solar " ( 116 / 150 )
Create Trainings data for region:  RS0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  RS0
Predict capacity factors for region  RS0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RS0 0 solar  with q = 0.4
Smallest value:  -0.026658306293587487
Biggest value:  0.6058307461472289
#Negative Values:  1266
#Values > 1:  0

RS0 0 solar  with q = 0.6
Smallest value:  -0.026658306293587487
Biggest value:  0.6058307461472289
#Negative Values:  1266
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 offwind-ac " ( 117 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 offwind-ac  with q = 0.4
Smallest value:  0.01634051339539101
Biggest value:  0.9873943219997767
#Negative Values:  0
#Values > 1:  0

SE4 0 offwind-ac  with q = 0.6
Smallest value:  0.01634051339539101
Biggest value:  0.9873943219997767
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 offwind-dc " ( 118 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 offwind-dc  with q = 0.4
Smallest value:  -0.014557334624010649
Biggest value:  0.9724419654302331
#Negative Values:  1
#Values > 1:  0

SE4 0 offwind-dc  with q = 0.6
Smallest value:  -0.014557334624010649
Biggest value:  0.9724419654302331
#Negative Values:  1
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 onwind " ( 119 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 onwind  with q = 0.4
Smallest value:  0.00538933362267466
Biggest value:  0.9004403279794397
#Negative Values:  0
#Values > 1:  0

SE4 0 onwind  with q = 0.6
Smallest value:  0.00538933362267466
Biggest value:  0.9004403279794397
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 ror " ( 120 / 150 )
Skipped column:  SE4 0 ror
-------------------------------------------------------------------
Processing " SE4 0 solar " ( 120 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 solar  with q = 0.4
Smallest value:  -0.028288252472106194
Biggest value:  0.5485939500723878
#Negative Values:  968
#Values > 1:  0

SE4 0 solar  with q = 0.6
Smallest value:  -0.028288252472106194
Biggest value:  0.5485939500723878
#Negative Values:  968
#Values > 1:  0
-------------------------------------------------------------------

Processing " SI0 0 offwind-ac " ( 121 / 150 )
Create Trainings data for region:  SI0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  SI0
Predict capacity factors for region  SI0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SI0 0 offwind-ac  with q = 0.4
Smallest value:  -0.00508658469416725
Biggest value:  1.0035704679817417
#Negative Values:  2934
#Values > 1:  3

SI0 0 offwind-ac  with q = 0.6
Smallest value:  -0.00508658469416725
Biggest value:  1.0035704679817417
#Negative Values:  2934
#Values > 1:  3
-------------------------------------------------------------------

Processing " SI0 0 onwind " ( 122 / 150 )
Create Trainings data for region:  SI0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  SI0
Predict capacity factors for region  SI0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SI0 0 onwind  with q = 0.4
Smallest value:  -0.003411050892618146
Biggest value:  0.905871507148373
#Negative Values:  374
#Values > 1:  0

SI0 0 onwind  with q = 0.6
Smallest value:  -0.003411050892618146
Biggest value:  0.905871507148373
#Negative Values:  374
#Values > 1:  0
-------------------------------------------------------------------

Processing " SI0 0 ror " ( 123 / 150 )
Skipped column:  SI0 0 ror
-------------------------------------------------------------------
Processing " SI0 0 solar " ( 123 / 150 )
Create Trainings data for region:  SI0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  SI0
Predict capacity factors for region  SI0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SI0 0 solar  with q = 0.4
Smallest value:  -0.02130675753469376
Biggest value:  0.6092207783131321
#Negative Values:  1318
#Values > 1:  0

SI0 0 solar  with q = 0.6
Smallest value:  -0.02130675753469376
Biggest value:  0.6092207783131321
#Negative Values:  1318
#Values > 1:  0
-------------------------------------------------------------------

Processing " SK0 0 onwind " ( 124 / 150 )
Create Trainings data for region:  SK0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  SK0
Predict capacity factors for region  SK0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SK0 0 onwind  with q = 0.4
Smallest value:  -0.011613402261556792
Biggest value:  0.941117532206984
#Negative Values:  102
#Values > 1:  0

SK0 0 onwind  with q = 0.6
Smallest value:  -0.011613402261556792
Biggest value:  0.941117532206984
#Negative Values:  102
#Values > 1:  0
-------------------------------------------------------------------

Processing " SK0 0 ror " ( 125 / 150 )
Skipped column:  SK0 0 ror
-------------------------------------------------------------------
Processing " SK0 0 solar " ( 125 / 150 )
Create Trainings data for region:  SK0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  SK0
Predict capacity factors for region  SK0
SK0 0 solar  with q = 0.4
Smallest value:  -0.03127151363372832
Biggest value:  0.5972583409925452
#Negative Values:  653
#Values > 1:  0

SK0 0 solar  with q = 0.6
Smallest value:  -0.03127151363372832
Biggest value:  0.5972583409925452
#Negative Values:  653
#Values > 1:  0
----------------------------------------------

  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


FileNotFoundError: [Errno 2] No such file or directory: 'results/capfacts_pred_q40.csv'

In [41]:
capfacts_pred_q40.to_csv('results/capfacts_pred_q40.csv')
capfacts_pred_q60.to_csv('results/capfacts_pred_q60.csv')
capfacts_pred_q40

Unnamed: 0,snapshot,AL0 0 offwind-ac,AL0 0 onwind,AL0 0 solar,AT0 0 onwind,AT0 0 solar,BA0 0 onwind,BA0 0 solar,BE0 0 offwind-ac,BE0 0 offwind-dc,...,RS0 0 solar,SE4 0 offwind-ac,SE4 0 offwind-dc,SE4 0 onwind,SE4 0 solar,SI0 0 offwind-ac,SI0 0 onwind,SI0 0 solar,SK0 0 onwind,SK0 0 solar
0,2013-01-01 00:00:00,0.001001,0.016754,0.002671,0.176184,0.001804,0.008026,0.001486,0.983357,0.993057,...,0.001067,0.893059,0.940247,0.384582,0.000936,0.000000,0.015465,0.001514,0.245974,0.001028
1,2013-01-01 01:00:00,0.001001,0.010650,0.002671,0.176319,0.001804,0.009683,0.001314,0.962701,0.993057,...,0.001067,0.867999,0.940557,0.384582,0.000936,0.000000,0.017966,0.001514,0.246151,0.001028
2,2013-01-01 02:00:00,0.001001,0.003739,0.002671,0.152621,0.002198,0.009683,0.001314,0.936809,0.993062,...,0.001067,0.867999,0.940557,0.390470,0.000936,0.000000,0.024509,0.001514,0.260346,0.001028
3,2013-01-01 03:00:00,0.001001,0.002168,0.002296,0.132709,0.002412,0.009683,0.001472,0.827292,0.960281,...,0.001067,0.867999,0.940557,0.390470,0.000936,0.000000,0.035605,0.001514,0.259987,0.001028
4,2013-01-01 04:00:00,0.001001,0.000000,0.002296,0.132709,0.002412,0.010944,0.001472,0.649142,0.762976,...,0.001067,0.867999,0.940557,0.384582,0.000936,0.000000,0.041852,0.001514,0.259504,0.001028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2013-12-31 19:00:00,0.026058,0.054861,0.000000,0.114615,0.001643,0.001638,0.000839,0.909782,0.984229,...,0.001128,0.784948,0.868244,0.302674,0.000936,0.025875,0.004002,0.000730,0.075937,0.001028
8756,2013-12-31 20:00:00,0.026058,0.066099,0.000000,0.132333,0.001643,0.001638,0.000839,0.787526,0.878584,...,0.002672,0.744354,0.873603,0.275342,0.000936,0.021788,0.005200,0.000730,0.085197,0.001028
8757,2013-12-31 21:00:00,0.027812,0.071902,0.000733,0.132333,0.001590,0.001638,0.000839,0.688164,0.841269,...,0.003404,0.687318,0.790730,0.243239,0.000936,0.000161,0.005206,0.000730,0.087333,0.001028
8758,2013-12-31 22:00:00,0.031195,0.054667,0.000733,0.157984,0.001590,0.001638,0.000836,0.780206,0.916484,...,0.001208,0.663815,0.758920,0.214875,0.000936,0.000000,0.008251,0.000730,0.089404,0.001028


In [42]:
cols_num = capfacts_pred_q40.select_dtypes(np.number).columns
print(cols_num)

capfacts_pred_q40[cols_num] = capfacts_pred_q40[cols_num].clip(lower=0, upper=1.02)
capfacts_pred_q60[cols_num] = capfacts_pred_q60[cols_num].clip(lower=0, upper=1.02)

# capfacts_pred_q40.iloc[:, 1:].clip(lower=0, upper=1.02)
# capfacts_pred_q60.iloc[:, 1:].clip(lower=0, upper=1.02)

capfacts_pred_q40.to_csv('results/capfacts_pred_q40_clipped.csv')
capfacts_pred_q60.to_csv('results/capfacts_pred_q60_clipped.csv')

# Alles klar, danke euch. Dann werde ich das auf [0, 1.02] beschränken
capfacts_pred_q40

Index(['AL0 0 offwind-ac', 'AL0 0 onwind', 'AL0 0 solar', 'AT0 0 onwind',
       'AT0 0 solar', 'BA0 0 onwind', 'BA0 0 solar', 'BE0 0 offwind-ac',
       'BE0 0 offwind-dc', 'BE0 0 onwind',
       ...
       'RS0 0 solar', 'SE4 0 offwind-ac', 'SE4 0 offwind-dc', 'SE4 0 onwind',
       'SE4 0 solar', 'SI0 0 offwind-ac', 'SI0 0 onwind', 'SI0 0 solar',
       'SK0 0 onwind', 'SK0 0 solar'],
      dtype='object', length=125)


Unnamed: 0,snapshot,AL0 0 offwind-ac,AL0 0 onwind,AL0 0 solar,AT0 0 onwind,AT0 0 solar,BA0 0 onwind,BA0 0 solar,BE0 0 offwind-ac,BE0 0 offwind-dc,...,RS0 0 solar,SE4 0 offwind-ac,SE4 0 offwind-dc,SE4 0 onwind,SE4 0 solar,SI0 0 offwind-ac,SI0 0 onwind,SI0 0 solar,SK0 0 onwind,SK0 0 solar
0,2013-01-01 00:00:00,0.001001,0.016754,0.002671,0.176184,0.001804,0.008026,0.001486,0.983357,0.993057,...,0.001067,0.893059,0.940247,0.384582,0.000936,0.000000,0.015465,0.001514,0.245974,0.001028
1,2013-01-01 01:00:00,0.001001,0.010650,0.002671,0.176319,0.001804,0.009683,0.001314,0.962701,0.993057,...,0.001067,0.867999,0.940557,0.384582,0.000936,0.000000,0.017966,0.001514,0.246151,0.001028
2,2013-01-01 02:00:00,0.001001,0.003739,0.002671,0.152621,0.002198,0.009683,0.001314,0.936809,0.993062,...,0.001067,0.867999,0.940557,0.390470,0.000936,0.000000,0.024509,0.001514,0.260346,0.001028
3,2013-01-01 03:00:00,0.001001,0.002168,0.002296,0.132709,0.002412,0.009683,0.001472,0.827292,0.960281,...,0.001067,0.867999,0.940557,0.390470,0.000936,0.000000,0.035605,0.001514,0.259987,0.001028
4,2013-01-01 04:00:00,0.001001,0.000000,0.002296,0.132709,0.002412,0.010944,0.001472,0.649142,0.762976,...,0.001067,0.867999,0.940557,0.384582,0.000936,0.000000,0.041852,0.001514,0.259504,0.001028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2013-12-31 19:00:00,0.026058,0.054861,0.000000,0.114615,0.001643,0.001638,0.000839,0.909782,0.984229,...,0.001128,0.784948,0.868244,0.302674,0.000936,0.025875,0.004002,0.000730,0.075937,0.001028
8756,2013-12-31 20:00:00,0.026058,0.066099,0.000000,0.132333,0.001643,0.001638,0.000839,0.787526,0.878584,...,0.002672,0.744354,0.873603,0.275342,0.000936,0.021788,0.005200,0.000730,0.085197,0.001028
8757,2013-12-31 21:00:00,0.027812,0.071902,0.000733,0.132333,0.001590,0.001638,0.000839,0.688164,0.841269,...,0.003404,0.687318,0.790730,0.243239,0.000936,0.000161,0.005206,0.000730,0.087333,0.001028
8758,2013-12-31 22:00:00,0.031195,0.054667,0.000733,0.157984,0.001590,0.001638,0.000836,0.780206,0.916484,...,0.001208,0.663815,0.758920,0.214875,0.000936,0.000000,0.008251,0.000730,0.089404,0.001028


In [44]:
capfacts_pred_q40.dtypes

snapshot             object
AL0 0 offwind-ac    float64
AL0 0 onwind        float64
AL0 0 solar         float64
AT0 0 onwind        float64
                     ...   
SI0 0 offwind-ac    float64
SI0 0 onwind        float64
SI0 0 solar         float64
SK0 0 onwind        float64
SK0 0 solar         float64
Length: 126, dtype: object