In [6]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

def generate_pv_scenarios(csv_file, num_scenarios=3, num_periods=4):
    # Load the PV production data
    df = pd.read_csv(csv_file, sep=';')
    df.dropna(axis='columns', inplace=True)
    
    # Ensure proper datetime parsing
    df["time"] = pd.to_datetime(df["time"])
    df.set_index("time", inplace=True)
    
    # Aggregate data into daily segments
    df["date"] = df.index.date
    daily_pv = df.groupby("date")["electricity"].sum()
    
    # Normalize by daily maximum to create patterns
    daily_pv_normalized = daily_pv / daily_pv.max()
    daily_pv_normalized = daily_pv_normalized.fillna(0)
    
    # Reshape data into daily profiles
    df["hour"] = df.index.hour
    daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)
    daily_profiles = daily_profiles.fillna(0)
    
    # Cluster days into `num_scenarios` using KMeans
    kmeans = KMeans(n_clusters=num_scenarios, random_state=42, n_init=10)
    daily_profiles["scenario"] = kmeans.fit_predict(daily_profiles)
    
    # Get representative days for each scenario
    scenario_representatives = daily_profiles.groupby("scenario").mean()
    
    # Break each representative day into `num_periods` by averaging over periods
    period_length = 24 // num_periods
    scenario_periods = {}
    
    for scenario, row in scenario_representatives.iterrows():
        hourly_values = np.mean(row.values[:24].reshape(num_periods, period_length), axis=1)
        scenario_periods[scenario + 1] = { (scenario + 1, t + 1): round(v, 1) for t, v in enumerate(hourly_values) }
    
    return scenario_periods

# Example usage:
scenarios = generate_pv_scenarios("/Users/chris/Downloads/pv_hourly2019_cologne.csv", num_scenarios=3, num_periods=4)

# Format the output to match Julia dictionary style
print("PV_Scenarios =", scenarios)


PV_Scenarios = {1: {(1, 1): 2.6, (1, 2): 290.1, (1, 3): 204.0, (1, 4): 0.4}, 2: {(2, 1): 11.9, (2, 2): 521.1, (2, 3): 393.8, (2, 4): 2.4}, 3: {(3, 1): 0.2, (3, 2): 76.4, (3, 3): 64.4, (3, 4): 0.1}}


  daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)


In [7]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

def generate_scenarios(csv_file, num_scenarios=3, num_periods=4):
    # Load the production data
    df = pd.read_csv(csv_file, sep=';')
    df.dropna(axis='columns', inplace=True)
    
    # Ensure proper datetime parsing
    df["time"] = pd.to_datetime(df["time"])
    df.set_index("time", inplace=True)
    
    # Aggregate data into daily segments
    df["date"] = df.index.date
    daily_prod = df.groupby("date")["electricity"].sum()
    
    # Normalize by daily maximum to create patterns
    daily_prod_normalized = daily_prod / daily_prod.max()
    daily_prod_normalized = daily_prod_normalized.fillna(0)
    
    # Reshape data into daily profiles
    df["hour"] = df.index.hour
    daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)
    daily_profiles = daily_profiles.fillna(0)
    
    # Cluster days into `num_scenarios` using KMeans
    kmeans = KMeans(n_clusters=num_scenarios, random_state=42, n_init=10)
    daily_profiles["scenario"] = kmeans.fit_predict(daily_profiles)
    
    # Get representative days for each scenario
    scenario_representatives = daily_profiles.groupby("scenario").mean()
    
    # Break each representative day into `num_periods` by averaging over periods
    period_length = 24 // num_periods
    scenario_periods = {}
    
    for scenario, row in scenario_representatives.iterrows():
        hourly_values = np.mean(row.values[:24].reshape(num_periods, period_length), axis=1)
        scenario_periods[scenario + 1] = { (scenario + 1, t + 1): round(float(v), 1) for t, v in enumerate(hourly_values) }
    
    return scenario_periods

# Generate PV and wind scenarios
pv_scenarios = generate_scenarios("/Users/chris/Downloads/pv_hourly2019_cologne.csv", num_scenarios=3, num_periods=4)
wind_scenarios = generate_scenarios("/Users/chris/Downloads/wind_hourly_2019_cgn.csv", num_scenarios=3, num_periods=4)

  daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)
  daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)


In [9]:
print(wind_scenarios)

{1: {(1, 1): 320.1, (1, 2): 299.6, (1, 3): 333.5, (1, 4): 343.3}, 2: {(2, 1): 144.1, (2, 2): 102.2, (2, 3): 120.8, (2, 4): 162.1}, 3: {(3, 1): 539.6, (3, 2): 629.1, (3, 3): 659.3, (3, 4): 579.8}}


In [10]:
print(pv_scenarios)

{1: {(1, 1): 2.6, (1, 2): 290.1, (1, 3): 204.0, (1, 4): 0.4}, 2: {(2, 1): 11.9, (2, 2): 521.1, (2, 3): 393.8, (2, 4): 2.4}, 3: {(3, 1): 0.2, (3, 2): 76.4, (3, 3): 64.4, (3, 4): 0.1}}
