In [1]:
import os
import pandas as pd
import numpy as np

# set the working directory
os.chdir("/home/bjaha/Documents/phd_delight/courses/Renewables/assignment2/renewables-assignment2/")

print("Step 1: Prepare Data")
dataset_dir = "datasets/"

# Create the results directory
plot_dir = "results/Part1/"
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)



Step 1: Prepare Data


In [2]:
def get_wind_forecast(dataset_dir, file_name = "WindForecast_20250201-20250420.csv", n_scenarios = 20) -> np.ndarray:
    target_col = 'Day-ahead forecast [MW]'
    
    # Load the data
    data = pd.read_csv(dataset_dir + file_name, skiprows=5)

    # Downsample the data to 1 hour
    data['DateTime'] = pd.to_datetime(data['DateTime'], dayfirst=True)
    data = data.drop(columns=['Active Decremental Bids [yes/no]'])
    data = data.resample('h', on='DateTime').mean()

    # Randomly select n_samples days
    days_np = pd.to_datetime(
        np.sort(
            np.random.choice(
                pd.unique(data.index.date), size=n_scenarios, replace=False
            )
        )
    )
    data = data[data.index.normalize().isin(days_np)]
    
    # Select the target column and convert to numpy array
    wind_forecast = data[target_col].values.reshape(n_scenarios, -1) # (n_scenarios, 24)

    return wind_forecast


## Day ahead price forecasts

In [3]:
import glob


def get_price_forecasts(dataset_dir, n_scenarios = 20):
    files = glob.glob(dataset_dir + "day_ahead_prices/*.csv")
    files.sort()

    assert len(files) >= n_scenarios

    # Load the price forecasts into a numpy array
    da_prices = np.zeros((n_scenarios, 24))
    for i, file in enumerate(files[:n_scenarios]):
        data = pd.read_csv(file, delimiter="\t", header=None, names=["hour", "price"])
        data["price"] = data["price"].str.replace(",", ".").astype(float)
        da_prices[i, :] = data["price"].values

    return da_prices


## Sample 24 boolean values from a Bernoulli distribution with probability 0.5

In [4]:
def get_system_condition(p: float, n_scenarios: int=4) -> np.ndarray:
    """ Sample 24 boolean values from a Bernoulli distribution with probability p. """
    return np.random.binomial(1, p, 24*n_scenarios).reshape(n_scenarios, 24)


In [14]:
# Get the wind forecast
n_wind_forecasts = 20
wind_forecast = get_wind_forecast(dataset_dir, n_scenarios=n_wind_forecasts)
print(wind_forecast)

# Get the price forecasts
n_price_forecasts = 20
price_forecast = get_price_forecasts(dataset_dir, n_scenarios=n_price_forecasts)
print(price_forecast)

# Get the system condition
n_system_conditions = 4
system_condition = get_system_condition(p=0.5, n_scenarios=n_system_conditions)
print(system_condition)

for tensor in [wind_forecast, price_forecast, system_condition]:
    print(tensor.shape)

[[ 256.6    225.475  196.15   166.5    146.075  135.2    134.35   142.5
   152.35   166.775  176.375  123.75    64.35    57.15    60.875   76.7
   122.875  223.55   374.825  510.55   590.95   618.     594.8    554.975]
 [ 596.025  540.9    453.625  345.8    251.95   187.325  158.475  139.7
   127.025  119.55    97.425   77.675   56.1     38.775   23.3     17.825
    15.025   16.95    21.075   23.825   26.25    35.825   52.475   74.55 ]
 [ 117.775  136.6    149.35   164.275  182.375  205.425  222.95   246.
   263.325  288.225  302.475  231.5    184.625  177.     168.4    172.225
   203.625  288.575  403.175  528.825  689.275  871.325 1005.375 1100.225]
 [1041.4   1063.95  1055.65  1036.575 1041.925 1090.825 1164.35  1237.75
  1300.8   1327.875 1337.85  1369.575 1439.1   1536.25  1635.925 1732.8
  1859.35  2098.775 2376.675 2582.75  2711.85  2768.875 2792.25  2767.025]
 [3210.5   3183.675 3196.65  3277.05  3401.525 3476.75  3490.4   3505.025
  3551.1   3552.575 3580.65  3781.7   3890.05 

In [15]:
system_condition

array([[1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
        0, 1],
       [1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        0, 1],
       [0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
        1, 0],
       [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
        1, 1]])

In [16]:
scenarios = {} # we will create 1600 scenarios based on 20 wind forecasts, 20 price forecasts and 4 system conditions

i = 0
for wind_day in wind_forecast:
    for price_day in price_forecast:
        for system_day in system_condition:
            scenarios[i] = {
                "wind_forecast": wind_day,
                "price_forecast": price_day,
                "system_condition": system_day
            }
            i += 1

In [18]:
n_is = 200
n_oos = 1400

# select 200 scenarios for the in-sample set at random
in_sample_scenarios = np.random.choice(list(scenarios.keys()), size=n_is, replace=False)

# the remaining scenarios are used for the out-of-sample set
out_of_sample_scenarios = [scenario for scenario in scenarios.keys() if scenario not in in_sample_scenarios]



