# Capacity Factors Forecast with Regression

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
from enum import Enum

from ngboost.scores import LogScore
from shapely.geometry import Point
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

from ngboost import NGBRegressor
from sklearn.model_selection import train_test_split
from ngboost.distns import Exponential, Normal, LogNormal

from sklearn.metrics import mean_pinball_loss
from sklearn.metrics import mean_squared_error

from scipy.stats import norm
import ephem
from datetime import datetime
import pickle
from pathlib import Path

paths = {"era5_eu_2013": "resources/europe-2013-era5.nc",
         "era5_tutorial": "resources/europe-2013-era5-tutorial.nc",
         "offshore_shape": "resources/regions_offshore_elec_s_37.geojson",
         "onshore_shape": "resources/regions_onshore_elec_s_37.geojson",
         "capfacs": "resources/capfacs_37.csv",
         "era5_regions": "resources/europe-2013-era5-regions.nc"}

In [2]:
ds = xr.open_dataset(filename_or_obj=paths["era5_regions"], engine="netcdf4")
ds

In [3]:
capfacts = pd.read_csv(paths["capfacs"])
capfacts

Unnamed: 0,snapshot,AL0 0 offwind-ac,AL0 0 onwind,AL0 0 solar,AT0 0 onwind,AT0 0 ror,AT0 0 solar,BA0 0 onwind,BA0 0 solar,BE0 0 offwind-ac,...,SE4 0 onwind,SE4 0 ror,SE4 0 solar,SI0 0 offwind-ac,SI0 0 onwind,SI0 0 ror,SI0 0 solar,SK0 0 onwind,SK0 0 ror,SK0 0 solar
0,2013-01-01 00:00:00,0.003291,0.001469,0.0,0.163262,0.224456,0.0,0.007340,0.0,1.000000,...,0.459609,0.626955,0.0,0.000000,0.055146,0.344668,0.0,0.361009,0.106197,0.0
1,2013-01-01 01:00:00,0.002103,0.000000,0.0,0.171340,0.224369,0.0,0.007939,0.0,0.999998,...,0.463265,0.625502,0.0,0.000000,0.052605,0.344657,0.0,0.368912,0.106012,0.0
2,2013-01-01 02:00:00,0.000000,0.000000,0.0,0.171035,0.224300,0.0,0.007829,0.0,0.993941,...,0.463777,0.624810,0.0,0.000000,0.052222,0.344593,0.0,0.382949,0.105968,0.0
3,2013-01-01 03:00:00,0.000000,0.000000,0.0,0.169685,0.224249,0.0,0.005766,0.0,0.916094,...,0.463041,0.623794,0.0,0.000000,0.050762,0.344626,0.0,0.388344,0.106215,0.0
4,2013-01-01 04:00:00,0.000000,0.000000,0.0,0.159757,0.224213,0.0,0.004262,0.0,0.704786,...,0.457253,0.623085,0.0,0.000000,0.047285,0.344607,0.0,0.409303,0.106364,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2013-12-31 19:00:00,0.029235,0.024321,0.0,0.109127,0.275353,0.0,0.001076,0.0,0.998922,...,0.358191,0.469586,0.0,0.027877,0.001708,0.696975,0.0,0.125735,0.157729,0.0
8756,2013-12-31 20:00:00,0.034024,0.031357,0.0,0.120485,0.275159,0.0,0.001090,0.0,0.965757,...,0.324709,0.469395,0.0,0.023567,0.000000,0.696262,0.0,0.142431,0.157817,0.0
8757,2013-12-31 21:00:00,0.037104,0.034522,0.0,0.128186,0.274975,0.0,0.001062,0.0,0.852484,...,0.288415,0.469208,0.0,0.000000,0.004891,0.695524,0.0,0.153930,0.157899,0.0
8758,2013-12-31 22:00:00,0.029730,0.027419,0.0,0.124740,0.274784,0.0,0.001351,0.0,0.776442,...,0.268695,0.469028,0.0,0.000000,0.009091,0.694665,0.0,0.163442,0.157945,0.0


In [4]:
class EnergyType(Enum):
    """
    Represents the different type of renewable energy sources in pypsa-eur
    """
    OFFWIND_AC = "offwind-ac"
    OFFWIND_DC = "offwind-dc"
    ONWIND = "onwind-dc"
    SOLAR = "solar"
    ROR = "ror"
    NOT_DEFINED = "not_defined"

class Feature(Enum):
    """
    Represents the features, that can be extracted from the era-5 weather data set
    """
    HEIGHT = "height"
    WND100M = "wnd100m"
    ROUGHNESS = "roughness"
    INFLUX_TOA = "influx_toa"
    INFLUX_DIRECT = "influx_direct"
    INFLUX_DIFFUSE = "influx_diffuse"
    ALBEDO = "albedo"
    TEMPERATURE = "temperature"
    SOIL_TEMPERATURE = "soil_temperature"
    RUNOFF = "runoff"

"""
Determines which features are selected to calculate the capacity factor of a certain energy type.
"""
feature_set = {
    EnergyType.OFFWIND_AC: [Feature.HEIGHT, Feature.WND100M, Feature.ROUGHNESS],
    EnergyType.OFFWIND_DC: [Feature.HEIGHT, Feature.WND100M, Feature.ROUGHNESS],
    EnergyType.ONWIND: [Feature.HEIGHT, Feature.WND100M, Feature.ROUGHNESS],
    EnergyType.SOLAR: [Feature.INFLUX_TOA, Feature.INFLUX_DIRECT, Feature.INFLUX_DIFFUSE, Feature.TEMPERATURE],
    EnergyType.ROR: []
}

def find_countries_in_capfacts(country_name="") -> list:
    """
    Returns the full region names and energy types of the given name abbreviation that can be found in the .csv file with capacity factors.
    :param country_name: Two character abbreviation of the searched country
    :return: list of all regions and energy types to the given country name
    """
    countries = []
    for column in capfacts:
        if column.find(country_name) >= 0:
            countries.append(column)
    return countries


def get_energy_type(name: str) -> EnergyType:
    """
    Returns the energy type for a given string
    :param name: energy type as string
    :return: energy type for the given string
    """
    match name:
        case "offwind-ac":
            return EnergyType.OFFWIND_AC
        case "offwind-dc":
            return EnergyType.OFFWIND_DC
        case "onwind":
            return EnergyType.ONWIND
        case "solar":
            return EnergyType.SOLAR
        case "ror":
            return EnergyType.ROR
        case _:
            return EnergyType.NOT_DEFINED

def get_ds_region_name(region_name: str, energy_type: EnergyType) -> str:
    """
    Returns the name or string that addresses the given region and energy type which can be used to address the data in the feature data set
    :param region_name: name of the region
    :param energy_type: the uses energy type in that region
    :return: string that can be used to fetch data from the feature data set
    """
    ds_region_name = region_name + " 0"
    if energy_type == EnergyType.ONWIND or energy_type == EnergyType.SOLAR or energy_type == EnergyType.ROR:
        ds_region_name += " on"
    elif energy_type == EnergyType.OFFWIND_AC or energy_type == EnergyType.OFFWIND_DC:
        ds_region_name += " off"
    else:
        ds_region_name += ""
    return ds_region_name

def parse_capfac_col(column_name: str) -> (str, EnergyType):
    """
    Returns a tuple of the region name and energy type for a given column name of the capfacts .csv file
    :param column_name: column name of the capfacts .csv file
    :return: Tuple of a region name and energy type, None if no region is found
    """
    col_args = column_name.split(" ")
    if len(col_args) == 3:
        region_name = col_args[0]
        energy_type = get_energy_type(col_args[2])
        return region_name, energy_type
    return None, None

def create_training_data_for_col(column_name: str) -> (np.ndarray, dict):
    """
    Creates and returns the the training data set with the relevant data for a given column name from the capfacts .csv file.
    The training data set is a tuple of a numpy array of capacity factors (target values) and a dictionary of the era5 data (feature data),
    :param column_name: column name of the capfacts .csv file
    :return: Tuple of capacity factor (Y) and trainings data (X)
            X                       : DataFrame object or List or numpy array of predictors (n x p) in Numeric format
            Y                       : DataFrame object or List or numpy array of outcomes (n) in Numeric format.
    """
    region_name, energy_type = parse_capfac_col(column_name)
    # print(region_name)
    # print(energy_type)
    ds_region_name = get_ds_region_name(region_name, energy_type)
    # print(ds_region_name)
    features = feature_set.get(energy_type)
    # print(features)

    Y_capfac = capfacts[column_name].values
    X = {}
    for feature in features:
        X[feature] = ds.sel(region=ds_region_name)[feature.value].values

    return Y_capfac, X


def shape_multi_feature_data(training_data: dict):
    """
    Reshapes the trainingsdata in an array of shape (n_samples, n_features)
    (8760, 2) ---> [[x_f1_1, x_f2_1], [x_f1_2, x_f2_2], ... , [x_f1_8760, x_f2_8760]]
    :param training_data as a dictinary of multiple 1-d arrays:
    :return: trainingsdata in array of shape (n_samples, n_features)
    """
    # tup = tuple(list(training_data.values()))
    # multi_feature_train_data = np.column_stack(tup)
    arrays = list(training_data.values())
    return np.stack(arrays, axis=-1)

In [5]:
def get_date_time_obj(date_time_str: str):
    # 2013-01-01 21:00:00
    return datetime.strptime(date_time_str, "%Y-%m-%d %H:%M:%S")

In [6]:
capfacts_columns = capfacts.columns.values


# capfacts_pred_q40 = pd.DataFrame(columns=capfacts_columns)
capfacts_pred_q40 = pd.DataFrame()
capfacts_pred_q40["snapshot"] = capfacts["snapshot"]

# capfacts_pred_q60 = pd.DataFrame(columns=capfacts_columns)
capfacts_pred_q60 = pd.DataFrame()
capfacts_pred_q60["snapshot"] = capfacts["snapshot"]

capfacts_pred_q40

Unnamed: 0,snapshot
0,2013-01-01 00:00:00
1,2013-01-01 01:00:00
2,2013-01-01 02:00:00
3,2013-01-01 03:00:00
4,2013-01-01 04:00:00
...,...
8755,2013-12-31 19:00:00
8756,2013-12-31 20:00:00
8757,2013-12-31 21:00:00
8758,2013-12-31 22:00:00


In [7]:
col_names = capfacts_columns[1:]

# new_columns_q40 = {}
# new_columns_q60 = {}

i = 1
for col in col_names:
    print("Processing \"", col, "\" (", i, "/", len(col_names), ")")
    i += 1
    region_name, energy_type = parse_capfac_col(col)

    if(energy_type == EnergyType.NOT_DEFINED):
        print("Skipped column: ", col)
        print("-------------------------------------------------------------------")
    elif(energy_type == EnergyType.ROR):
        print("Skipped column: ", col)
        print("-------------------------------------------------------------------")
    else:
        print("Create Trainings data for region: ", region_name, " with energy type: ", energy_type)

        Y, X = create_training_data_for_col(col)
        X_pred = shape_multi_feature_data(X)
        X_train, X_test, Y_train, Y_test = train_test_split(shape_multi_feature_data(X), Y, test_size=0.25)

        print("Fit Regression Model for region ", region_name)
        ngb = NGBRegressor(verbose=False).fit(X_train, Y_train)

        print("Predict capacity factors for region ", region_name)
        # Y_preds = ngb.predict(X_pred)
        Y_dists = ngb.pred_dist(X_pred)

        Y_preds_q40 = Y_dists.ppf(0.4)
        capfacts_pred_q40[col] = Y_preds_q40

        Y_preds_q60 = Y_dists.ppf(0.6)
        capfacts_pred_q60[col] = Y_preds_q60

        # print(capfacts_pred.head())
        print(col, " with q = 0.4")
        print("Smallest value: ", capfacts_pred_q40[col].min())
        print("Biggest value: ", capfacts_pred_q40[col].max())
        print("#Negative Values: ", (capfacts_pred_q40[col] < 0).sum())
        print("#Values > 1: ", (capfacts_pred_q40[col] > 1).sum())

        print("")
        print(col, " with q = 0.6")
        print("Smallest value: ", capfacts_pred_q40[col].min())
        print("Biggest value: ", capfacts_pred_q40[col].max())
        print("#Negative Values: ", (capfacts_pred_q40[col] < 0).sum())
        print("#Values > 1: ", (capfacts_pred_q40[col] > 1).sum())

        print("-------------------------------------------------------------------")

        print("")


Processing " AL0 0 offwind-ac " ( 1 / 150 )
Create Trainings data for region:  AL0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  AL0
Predict capacity factors for region  AL0
AL0 0 offwind-ac  with q = 0.4
Smallest value:  -0.013950169289750076
Biggest value:  0.9762193788012983
#Negative Values:  7
#Values > 1:  0

AL0 0 offwind-ac  with q = 0.6
Smallest value:  -0.013950169289750076
Biggest value:  0.9762193788012983
#Negative Values:  7
#Values > 1:  0
-------------------------------------------------------------------

Processing " AL0 0 onwind " ( 2 / 150 )
Create Trainings data for region:  AL0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  AL0
Predict capacity factors for region  AL0
AL0 0 onwind  with q = 0.4
Smallest value:  -0.0037642643020512796
Biggest value:  0.6216024341611046
#Negative Values:  110
#Values > 1:  0

AL0 0 onwind  with q = 0.6
Smallest value:  -0.0037642643020512796
Biggest value:  0.6216024341611046
#Neg

  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 offwind-ac  with q = 0.4
Smallest value:  0.04800540833438678
Biggest value:  0.9099138092023531
#Negative Values:  0
#Values > 1:  0

NO4 0 offwind-ac  with q = 0.6
Smallest value:  0.04800540833438678
Biggest value:  0.9099138092023531
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " NO4 0 offwind-dc " ( 120 / 150 )
Create Trainings data for region:  NO4  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  NO4
Predict capacity factors for region  NO4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 offwind-dc  with q = 0.4
Smallest value:  0.07081896904765013
Biggest value:  0.9702823471484876
#Negative Values:  0
#Values > 1:  0

NO4 0 offwind-dc  with q = 0.6
Smallest value:  0.07081896904765013
Biggest value:  0.9702823471484876
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " NO4 0 onwind " ( 121 / 150 )
Create Trainings data for region:  NO4  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  NO4
Predict capacity factors for region  NO4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 onwind  with q = 0.4
Smallest value:  0.008795275100127908
Biggest value:  0.8035745525538682
#Negative Values:  0
#Values > 1:  0

NO4 0 onwind  with q = 0.6
Smallest value:  0.008795275100127908
Biggest value:  0.8035745525538682
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " NO4 0 solar " ( 122 / 150 )
Create Trainings data for region:  NO4  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  NO4
Predict capacity factors for region  NO4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


NO4 0 solar  with q = 0.4
Smallest value:  -0.014618836395023411
Biggest value:  0.5804809988125864
#Negative Values:  722
#Values > 1:  0

NO4 0 solar  with q = 0.6
Smallest value:  -0.014618836395023411
Biggest value:  0.5804809988125864
#Negative Values:  722
#Values > 1:  0
-------------------------------------------------------------------

Processing " PL0 0 offwind-ac " ( 123 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 offwind-ac  with q = 0.4
Smallest value:  -0.007512730318620409
Biggest value:  1.003034163432096
#Negative Values:  6
#Values > 1:  2

PL0 0 offwind-ac  with q = 0.6
Smallest value:  -0.007512730318620409
Biggest value:  1.003034163432096
#Negative Values:  6
#Values > 1:  2
-------------------------------------------------------------------

Processing " PL0 0 offwind-dc " ( 124 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 offwind-dc  with q = 0.4
Smallest value:  -0.007542106204131944
Biggest value:  1.0049835643484688
#Negative Values:  37
#Values > 1:  34

PL0 0 offwind-dc  with q = 0.6
Smallest value:  -0.007542106204131944
Biggest value:  1.0049835643484688
#Negative Values:  37
#Values > 1:  34
-------------------------------------------------------------------

Processing " PL0 0 onwind " ( 125 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 onwind  with q = 0.4
Smallest value:  0.0008483621757227227
Biggest value:  0.994535520515736
#Negative Values:  0
#Values > 1:  0

PL0 0 onwind  with q = 0.6
Smallest value:  0.0008483621757227227
Biggest value:  0.994535520515736
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " PL0 0 ror " ( 126 / 150 )
Skipped column:  PL0 0 ror
-------------------------------------------------------------------
Processing " PL0 0 solar " ( 127 / 150 )
Create Trainings data for region:  PL0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  PL0
Predict capacity factors for region  PL0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PL0 0 solar  with q = 0.4
Smallest value:  -0.03167119584477808
Biggest value:  0.6277096581721879
#Negative Values:  1552
#Values > 1:  0

PL0 0 solar  with q = 0.6
Smallest value:  -0.03167119584477808
Biggest value:  0.6277096581721879
#Negative Values:  1552
#Values > 1:  0
-------------------------------------------------------------------

Processing " PT0 0 offwind-ac " ( 128 / 150 )
Create Trainings data for region:  PT0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  PT0
Predict capacity factors for region  PT0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PT0 0 offwind-ac  with q = 0.4
Smallest value:  0.0069693193103584445
Biggest value:  0.9936273860743188
#Negative Values:  0
#Values > 1:  0

PT0 0 offwind-ac  with q = 0.6
Smallest value:  0.0069693193103584445
Biggest value:  0.9936273860743188
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " PT0 0 onwind " ( 129 / 150 )
Create Trainings data for region:  PT0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  PT0
Predict capacity factors for region  PT0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PT0 0 onwind  with q = 0.4
Smallest value:  -0.004240881050304814
Biggest value:  0.9702171083594796
#Negative Values:  22
#Values > 1:  0

PT0 0 onwind  with q = 0.6
Smallest value:  -0.004240881050304814
Biggest value:  0.9702171083594796
#Negative Values:  22
#Values > 1:  0
-------------------------------------------------------------------

Processing " PT0 0 ror " ( 130 / 150 )
Skipped column:  PT0 0 ror
-------------------------------------------------------------------
Processing " PT0 0 solar " ( 131 / 150 )
Create Trainings data for region:  PT0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  PT0
Predict capacity factors for region  PT0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


PT0 0 solar  with q = 0.4
Smallest value:  -0.031586774504020754
Biggest value:  0.6463360228620973
#Negative Values:  1942
#Values > 1:  0

PT0 0 solar  with q = 0.6
Smallest value:  -0.031586774504020754
Biggest value:  0.6463360228620973
#Negative Values:  1942
#Values > 1:  0
-------------------------------------------------------------------

Processing " RO0 0 offwind-ac " ( 132 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 offwind-ac  with q = 0.4
Smallest value:  -0.012230478823944325
Biggest value:  1.0116576821903824
#Negative Values:  3
#Values > 1:  4

RO0 0 offwind-ac  with q = 0.6
Smallest value:  -0.012230478823944325
Biggest value:  1.0116576821903824
#Negative Values:  3
#Values > 1:  4
-------------------------------------------------------------------

Processing " RO0 0 offwind-dc " ( 133 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 offwind-dc  with q = 0.4
Smallest value:  -0.006713187718888954
Biggest value:  1.0089956327056075
#Negative Values:  25
#Values > 1:  18

RO0 0 offwind-dc  with q = 0.6
Smallest value:  -0.006713187718888954
Biggest value:  1.0089956327056075
#Negative Values:  25
#Values > 1:  18
-------------------------------------------------------------------

Processing " RO0 0 onwind " ( 134 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 onwind  with q = 0.4
Smallest value:  7.330381704698783e-05
Biggest value:  0.7000603009786297
#Negative Values:  0
#Values > 1:  0

RO0 0 onwind  with q = 0.6
Smallest value:  7.330381704698783e-05
Biggest value:  0.7000603009786297
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " RO0 0 ror " ( 135 / 150 )
Skipped column:  RO0 0 ror
-------------------------------------------------------------------
Processing " RO0 0 solar " ( 136 / 150 )
Create Trainings data for region:  RO0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  RO0
Predict capacity factors for region  RO0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RO0 0 solar  with q = 0.4
Smallest value:  -0.025798678308852085
Biggest value:  0.6221005366715231
#Negative Values:  1486
#Values > 1:  0

RO0 0 solar  with q = 0.6
Smallest value:  -0.025798678308852085
Biggest value:  0.6221005366715231
#Negative Values:  1486
#Values > 1:  0
-------------------------------------------------------------------

Processing " RS0 0 onwind " ( 137 / 150 )
Create Trainings data for region:  RS0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  RS0
Predict capacity factors for region  RS0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RS0 0 onwind  with q = 0.4
Smallest value:  -0.0047512868921700535
Biggest value:  0.9447532186773316
#Negative Values:  42
#Values > 1:  0

RS0 0 onwind  with q = 0.6
Smallest value:  -0.0047512868921700535
Biggest value:  0.9447532186773316
#Negative Values:  42
#Values > 1:  0
-------------------------------------------------------------------

Processing " RS0 0 solar " ( 138 / 150 )
Create Trainings data for region:  RS0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  RS0
Predict capacity factors for region  RS0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


RS0 0 solar  with q = 0.4
Smallest value:  -0.024445164467173906
Biggest value:  0.6094581986403652
#Negative Values:  1680
#Values > 1:  0

RS0 0 solar  with q = 0.6
Smallest value:  -0.024445164467173906
Biggest value:  0.6094581986403652
#Negative Values:  1680
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 offwind-ac " ( 139 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 offwind-ac  with q = 0.4
Smallest value:  0.0111456742602285
Biggest value:  0.9713816980602595
#Negative Values:  0
#Values > 1:  0

SE4 0 offwind-ac  with q = 0.6
Smallest value:  0.0111456742602285
Biggest value:  0.9713816980602595
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 offwind-dc " ( 140 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.OFFWIND_DC
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 offwind-dc  with q = 0.4
Smallest value:  -0.0015167099575458757
Biggest value:  0.9815866881923403
#Negative Values:  1
#Values > 1:  0

SE4 0 offwind-dc  with q = 0.6
Smallest value:  -0.0015167099575458757
Biggest value:  0.9815866881923403
#Negative Values:  1
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 onwind " ( 141 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 onwind  with q = 0.4
Smallest value:  0.0060728157588164945
Biggest value:  0.8894709095534107
#Negative Values:  0
#Values > 1:  0

SE4 0 onwind  with q = 0.6
Smallest value:  0.0060728157588164945
Biggest value:  0.8894709095534107
#Negative Values:  0
#Values > 1:  0
-------------------------------------------------------------------

Processing " SE4 0 ror " ( 142 / 150 )
Skipped column:  SE4 0 ror
-------------------------------------------------------------------
Processing " SE4 0 solar " ( 143 / 150 )
Create Trainings data for region:  SE4  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  SE4
Predict capacity factors for region  SE4


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SE4 0 solar  with q = 0.4
Smallest value:  -0.02445743083902684
Biggest value:  0.5528645106803167
#Negative Values:  919
#Values > 1:  0

SE4 0 solar  with q = 0.6
Smallest value:  -0.02445743083902684
Biggest value:  0.5528645106803167
#Negative Values:  919
#Values > 1:  0
-------------------------------------------------------------------

Processing " SI0 0 offwind-ac " ( 144 / 150 )
Create Trainings data for region:  SI0  with energy type:  EnergyType.OFFWIND_AC
Fit Regression Model for region  SI0
Predict capacity factors for region  SI0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SI0 0 offwind-ac  with q = 0.4
Smallest value:  -0.01082783388566262
Biggest value:  1.0009837412600473
#Negative Values:  2074
#Values > 1:  2

SI0 0 offwind-ac  with q = 0.6
Smallest value:  -0.01082783388566262
Biggest value:  1.0009837412600473
#Negative Values:  2074
#Values > 1:  2
-------------------------------------------------------------------

Processing " SI0 0 onwind " ( 145 / 150 )
Create Trainings data for region:  SI0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  SI0
Predict capacity factors for region  SI0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SI0 0 onwind  with q = 0.4
Smallest value:  -0.0015057697319850005
Biggest value:  0.9085354247592886
#Negative Values:  339
#Values > 1:  0

SI0 0 onwind  with q = 0.6
Smallest value:  -0.0015057697319850005
Biggest value:  0.9085354247592886
#Negative Values:  339
#Values > 1:  0
-------------------------------------------------------------------

Processing " SI0 0 ror " ( 146 / 150 )
Skipped column:  SI0 0 ror
-------------------------------------------------------------------
Processing " SI0 0 solar " ( 147 / 150 )
Create Trainings data for region:  SI0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  SI0
Predict capacity factors for region  SI0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SI0 0 solar  with q = 0.4
Smallest value:  -0.0232153581517395
Biggest value:  0.615626792224792
#Negative Values:  989
#Values > 1:  0

SI0 0 solar  with q = 0.6
Smallest value:  -0.0232153581517395
Biggest value:  0.615626792224792
#Negative Values:  989
#Values > 1:  0
-------------------------------------------------------------------

Processing " SK0 0 onwind " ( 148 / 150 )
Create Trainings data for region:  SK0  with energy type:  EnergyType.ONWIND
Fit Regression Model for region  SK0
Predict capacity factors for region  SK0


  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


SK0 0 onwind  with q = 0.4
Smallest value:  -0.010333331448648415
Biggest value:  0.9330064016677606
#Negative Values:  83
#Values > 1:  0

SK0 0 onwind  with q = 0.6
Smallest value:  -0.010333331448648415
Biggest value:  0.9330064016677606
#Negative Values:  83
#Values > 1:  0
-------------------------------------------------------------------

Processing " SK0 0 ror " ( 149 / 150 )
Skipped column:  SK0 0 ror
-------------------------------------------------------------------
Processing " SK0 0 solar " ( 150 / 150 )
Create Trainings data for region:  SK0  with energy type:  EnergyType.SOLAR
Fit Regression Model for region  SK0
Predict capacity factors for region  SK0
SK0 0 solar  with q = 0.4
Smallest value:  -0.029604993873426563
Biggest value:  0.6380258666536386
#Negative Values:  960
#Values > 1:  0

SK0 0 solar  with q = 0.6
Smallest value:  -0.029604993873426563
Biggest value:  0.6380258666536386
#Negative Values:  960
#Values > 1:  0
--------------------------------------------

  capfacts_pred_q40[col] = Y_preds_q40
  capfacts_pred_q60[col] = Y_preds_q60


In [8]:
capfacts_pred_q40 = capfacts_pred_q40.copy()
capfacts_pred_q40.to_csv('results/capfacts_pred_q40.csv')
capfacts_pred_q60.to_csv('results/capfacts_pred_q60.csv')
capfacts_pred_q40

Unnamed: 0,snapshot,AL0 0 offwind-ac,AL0 0 onwind,AL0 0 solar,AT0 0 onwind,AT0 0 solar,BA0 0 onwind,BA0 0 solar,BE0 0 offwind-ac,BE0 0 offwind-dc,...,RS0 0 solar,SE4 0 offwind-ac,SE4 0 offwind-dc,SE4 0 onwind,SE4 0 solar,SI0 0 offwind-ac,SI0 0 onwind,SI0 0 solar,SK0 0 onwind,SK0 0 solar
0,2013-01-01 00:00:00,0.000557,0.021196,0.001133,0.175853,0.001539,0.007715,0.002115,0.983076,0.994047,...,0.000664,0.885203,0.947417,0.383720,0.000822,0.000013,0.015176,0.002273,0.246939,0.001099
1,2013-01-01 01:00:00,0.000557,0.011273,0.001133,0.175853,0.001539,0.010271,0.001863,0.959990,0.994047,...,0.000664,0.866432,0.937913,0.387787,0.000822,0.000013,0.020111,0.002273,0.246939,0.001099
2,2013-01-01 02:00:00,0.000557,0.003918,0.001133,0.151878,0.001462,0.010271,0.001863,0.934882,0.992861,...,0.000664,0.866432,0.938748,0.393843,0.000822,-0.000017,0.024133,0.002273,0.262308,0.001099
3,2013-01-01 03:00:00,0.000557,0.002196,0.001223,0.133736,0.001462,0.010271,0.001863,0.834084,0.927521,...,0.000664,0.866432,0.938748,0.393843,0.000822,-0.000016,0.034097,0.002273,0.262308,0.001099
4,2013-01-01 04:00:00,0.000230,-0.001919,0.001223,0.134358,0.001462,0.010271,0.001863,0.641281,0.742088,...,0.000664,0.866432,0.938109,0.382584,0.000822,-0.000016,0.040402,0.002273,0.262308,0.001099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2013-12-31 19:00:00,0.028722,0.052580,0.000591,0.115194,0.001555,0.001481,0.001227,0.903035,0.982712,...,0.001162,0.783856,0.867427,0.310559,0.000822,0.026362,0.004147,0.000846,0.071569,0.001099
8756,2013-12-31 20:00:00,0.028722,0.065004,0.000591,0.130153,0.001555,0.001477,0.001227,0.771291,0.891949,...,0.001162,0.756514,0.864705,0.280206,0.000822,0.021999,0.005361,0.000846,0.081011,0.001099
8757,2013-12-31 21:00:00,0.028722,0.073034,0.001024,0.133361,0.001555,0.001700,0.001227,0.690141,0.835503,...,0.001162,0.697051,0.801973,0.243447,0.000822,0.000047,0.005236,0.000846,0.081428,0.001099
8758,2013-12-31 22:00:00,0.033329,0.052580,0.001024,0.152847,0.001555,0.001733,0.001227,0.765522,0.912825,...,0.001162,0.673013,0.771472,0.208780,0.000822,-0.000201,0.007687,0.000846,0.091286,0.001099


In [9]:
cols_num = capfacts_pred_q40.select_dtypes(np.number).columns
print(cols_num)

capfacts_pred_q40[cols_num] = capfacts_pred_q40[cols_num].clip(lower=0, upper=1.02)
capfacts_pred_q60[cols_num] = capfacts_pred_q60[cols_num].clip(lower=0, upper=1.02)

# capfacts_pred_q40.iloc[:, 1:].clip(lower=0, upper=1.02)
# capfacts_pred_q60.iloc[:, 1:].clip(lower=0, upper=1.02)

capfacts_pred_q40.to_csv('results/capfacts_pred_q40_clipped.csv')
capfacts_pred_q60.to_csv('results/capfacts_pred_q60_clipped.csv')

# Alles klar, danke euch. Dann werde ich das auf [0, 1.02] beschränken
capfacts_pred_q40

Index(['AL0 0 offwind-ac', 'AL0 0 onwind', 'AL0 0 solar', 'AT0 0 onwind',
       'AT0 0 solar', 'BA0 0 onwind', 'BA0 0 solar', 'BE0 0 offwind-ac',
       'BE0 0 offwind-dc', 'BE0 0 onwind',
       ...
       'RS0 0 solar', 'SE4 0 offwind-ac', 'SE4 0 offwind-dc', 'SE4 0 onwind',
       'SE4 0 solar', 'SI0 0 offwind-ac', 'SI0 0 onwind', 'SI0 0 solar',
       'SK0 0 onwind', 'SK0 0 solar'],
      dtype='object', length=125)


Unnamed: 0,snapshot,AL0 0 offwind-ac,AL0 0 onwind,AL0 0 solar,AT0 0 onwind,AT0 0 solar,BA0 0 onwind,BA0 0 solar,BE0 0 offwind-ac,BE0 0 offwind-dc,...,RS0 0 solar,SE4 0 offwind-ac,SE4 0 offwind-dc,SE4 0 onwind,SE4 0 solar,SI0 0 offwind-ac,SI0 0 onwind,SI0 0 solar,SK0 0 onwind,SK0 0 solar
0,2013-01-01 00:00:00,0.000557,0.021196,0.001133,0.175853,0.001539,0.007715,0.002115,0.983076,0.994047,...,0.000664,0.885203,0.947417,0.383720,0.000822,0.000013,0.015176,0.002273,0.246939,0.001099
1,2013-01-01 01:00:00,0.000557,0.011273,0.001133,0.175853,0.001539,0.010271,0.001863,0.959990,0.994047,...,0.000664,0.866432,0.937913,0.387787,0.000822,0.000013,0.020111,0.002273,0.246939,0.001099
2,2013-01-01 02:00:00,0.000557,0.003918,0.001133,0.151878,0.001462,0.010271,0.001863,0.934882,0.992861,...,0.000664,0.866432,0.938748,0.393843,0.000822,0.000000,0.024133,0.002273,0.262308,0.001099
3,2013-01-01 03:00:00,0.000557,0.002196,0.001223,0.133736,0.001462,0.010271,0.001863,0.834084,0.927521,...,0.000664,0.866432,0.938748,0.393843,0.000822,0.000000,0.034097,0.002273,0.262308,0.001099
4,2013-01-01 04:00:00,0.000230,0.000000,0.001223,0.134358,0.001462,0.010271,0.001863,0.641281,0.742088,...,0.000664,0.866432,0.938109,0.382584,0.000822,0.000000,0.040402,0.002273,0.262308,0.001099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2013-12-31 19:00:00,0.028722,0.052580,0.000591,0.115194,0.001555,0.001481,0.001227,0.903035,0.982712,...,0.001162,0.783856,0.867427,0.310559,0.000822,0.026362,0.004147,0.000846,0.071569,0.001099
8756,2013-12-31 20:00:00,0.028722,0.065004,0.000591,0.130153,0.001555,0.001477,0.001227,0.771291,0.891949,...,0.001162,0.756514,0.864705,0.280206,0.000822,0.021999,0.005361,0.000846,0.081011,0.001099
8757,2013-12-31 21:00:00,0.028722,0.073034,0.001024,0.133361,0.001555,0.001700,0.001227,0.690141,0.835503,...,0.001162,0.697051,0.801973,0.243447,0.000822,0.000047,0.005236,0.000846,0.081428,0.001099
8758,2013-12-31 22:00:00,0.033329,0.052580,0.001024,0.152847,0.001555,0.001733,0.001227,0.765522,0.912825,...,0.001162,0.673013,0.771472,0.208780,0.000822,0.000000,0.007687,0.000846,0.091286,0.001099


In [10]:
capfacts_pred_q40.dtypes

snapshot             object
AL0 0 offwind-ac    float64
AL0 0 onwind        float64
AL0 0 solar         float64
AT0 0 onwind        float64
                     ...   
SI0 0 offwind-ac    float64
SI0 0 onwind        float64
SI0 0 solar         float64
SK0 0 onwind        float64
SK0 0 solar         float64
Length: 126, dtype: object