In [9]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

def generate_scenarios(csv_file, num_scenarios=3, num_periods=4):
    # Load the production data
    df = pd.read_csv(csv_file, sep=';')
    df.dropna(axis='columns', inplace=True)
    
    # Ensure proper datetime parsing
    df["time"] = pd.to_datetime(df["time"])
    df.set_index("time", inplace=True)
    
    # Aggregate data into daily segments
    df["date"] = df.index.date
    daily_prod = df.groupby("date")["electricity"].sum()
    
    # Normalize by daily maximum to create patterns
    daily_prod_normalized = daily_prod / daily_prod.max()
    daily_prod_normalized = daily_prod_normalized.fillna(0)
    
    # Reshape data into daily profiles
    df["hour"] = df.index.hour
    daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)
    daily_profiles = daily_profiles.fillna(0)
    
    # Cluster days into `num_scenarios` using KMeans
    kmeans = KMeans(n_clusters=num_scenarios, random_state=42, n_init=10)
    daily_profiles["scenario"] = kmeans.fit_predict(daily_profiles)
    
    # Get representative days for each scenario
    scenario_representatives = daily_profiles.groupby("scenario").mean()
    
    # Break each representative day into `num_periods` by averaging over periods
    period_length = 24 // num_periods
    scenario_periods = {}

    for scenario, row in scenario_representatives.iterrows():
        hourly_values = np.mean(row.values[:24].reshape(num_periods, period_length), axis=1)
        scenario_periods[scenario + 1] = [round(float(v), 1)/10 for v in hourly_values]

    # Convert to DataFrame
    df_result = pd.DataFrame.from_dict(scenario_periods, orient='index', columns=[i+1 for i in range(num_periods)])
    
    return df_result

pv_scenarios = generate_scenarios("/Users/chris/Downloads/pv_hourly2019_cologne.csv", num_scenarios=3, num_periods=8)
wind_scenarios = generate_scenarios("/Users/chris/Downloads/wind_hourly_2019_cgn.csv", num_scenarios=3, num_periods=8)

  daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)
  daily_profiles = df.pivot_table(index="date", columns="hour", values="electricity", aggfunc=np.mean)


In [10]:
print(wind_scenarios)


       1      2      3      4      5      6      7      8
1  32.63  31.40  28.52  31.40  33.26  33.43  34.59  34.06
2  15.79  13.04  10.11  10.33  11.47  12.69  15.69  16.73
3  53.79  54.13  59.43  66.39  67.63  64.22  60.45  55.51


In [11]:
print(pv_scenarios)


     1     2      3      4      5      6     7    8
1  0.0  0.51  14.71  43.31  34.40   6.39  0.09  0.0
2  0.0  2.39  35.49  68.73  59.69  19.08  0.49  0.0
3  0.0  0.04   2.66  12.62  11.59   1.30  0.01  0.0


In [12]:
pd.DataFrame.to_csv(wind_scenarios, 'wind_scenarios.csv')
pd.DataFrame.to_csv(pv_scenarios, 'pv_scenarios.csv')