# Scenarios generations
TODO:
- Here I am using geo to have the generators for which to generate a scenario. How to generalise this? Should I have input a pypsa network instead and genarate the scenarios for the generators inside of that?
- put everything into functions as good practice

In [23]:

import powerplantmatching as pm
import math
import os
from pathlib import Path 
import pandas as pd
import matplotlib.pyplot as plt
from six.moves import cPickle as pickle
import numpy as np
import datetime
from scipy.stats import expon
from scipy.optimize import curve_fit
from astropy.visualization import hist
#for fitting:
from scipy.stats import expon, rv_discrete
from scipy.optimize import curve_fit
from sklearn.neighbors import KernelDensity
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
#for plotting Markov Chain graph
import networkx as nx
import matplotlib.pyplot as plt


#helper functions

def get_week(date):
    """
    input: date in date_time format
    output: what week of the year the date corresponds to
    """
    return date.week


def truncate(number, digits) -> float:
    # Improve accuracy with floating point operations, to avoid truncate(16.4, 2) = 16.39 or truncate(-1.13, 2) = -1.12
    nbDecimals = len(str(number).split('.')[1]) 
    if nbDecimals <= digits:
        return number
    stepper = 10.0 ** digits
    return math.trunc(stepper * number) / stepper

def markov_graph(transitions, seed = 42, digits = 4, title = ""):
    """
    input: transitions, a dictionary having as 
    keys: touples with 2 elements being the from state and from state
    values: the transition probability
    output: markov chain graph
    """
    G = nx.MultiDiGraph()

    for transition, probability in transitions.items():
        state_from, state_to = transition
        if probability != 0: 
        #if probability state_from to state_to is not 0 we add an edge to the graph
            G.add_edge(state_from, state_to, weight=truncate(probability, digits))

    #create positions of nodes: dictionary with coordinates
    pos = nx.spring_layout(G, seed) 

    # Increase the scale to avoid overlap
    pos = {k: [v[0] * 2, v[1] * 2] for k, v in pos.items()}

    labels = nx.get_edge_attributes(G, 'weight')
    nx.draw(G, pos, with_labels=True, node_size=700, node_color='skyblue', font_size=8, font_color='black',
            connectionstyle='arc3,rad=0.1')

    # Annotate edges manually with adjusted positions to avoid overlap
    for edge, weight in labels.items():
        (x, y) = pos[edge[0]]
        text_x = 3/4*x + 1/4*pos[edge[1]][0]
        text_y = 3/4*y + 1/4*pos[edge[1]][1]
        #shift text to avoid overlap
        text_y += 0.2 if edge[0] == edge[1] else 0


        plt.text(text_x, text_y, f"{weight}", fontsize=8, color='blue', verticalalignment='center',
                 horizontalalignment='center')
    plt.title(title)
    plt.show()
    

def weighted_values(values, probabilities, size):
    bins = np.add.accumulate(probabilities)
    return values[np.digitize(np.random.random_sample(size), bins)]

def next_state_markov(markov, current_state):
    possible_states = []
    transition_probs = []
    for key, prob in markov.items():
        if key[0] == current_state and prob != 0:
            possible_states.append(key[1])
            transition_probs.append(prob)
    return weighted_values(np.array(possible_states), np.array(transition_probs),1)[0]

def get_gen_type(geo_df):
    geo_to_entsoe_gen_d = {
        "Hard Coal": "Fossil Hard coal ", 
        'Lignite' : 'Fossil Brown coal/Lignite ',
        'Oil': "Fossil Oil ",
        'Waste': "Waste ",
        'Natural Gas': "Fossil Gas ",#o ci andrebbe qualcos altro
        #'Hydro',
        'Nuclear': "Nuclear ", 
        'Other' : "Other ", 
        'Solar':"Solar ",
        'Wind':"Wind Onshore ",
        'Geothermal':"Geothermal "
        }
    #given a unit row of geo dataframe give gen type in ENTSOE format
    #in geo there is just one windpower type (not onshore or offshore) or maybe you can see from the dataframe
    fuel_type = geo_df["Fueltype"]
    gen_type = geo_df["Technology"]
    if fuel_type == "Hydro":
        if gen_type == "Reservoir":
            return 'Hydro Water Reservoir '
        elif gen_type == "Run-Of-River":
            return 'Hydro Run-of-river and poundage '
        elif gen_type == "Pumped Storage":
            return 'Hydro Pumped Storage '
    elif fuel_type in geo_to_entsoe_gen_d.keys():
        return geo_to_entsoe_gen_d[fuel_type]
    else:
        print(f"Generetor {gen_type},{fuel_type} not found classfied as Other")
        return "Other "



In [24]:
#Set parameters
#import generators
geo = pm.data.GEO()
geo = geo.groupby("projectID").head(1)
#set generator types in the imported dataset to generate scenarios for, example (removed wind and PV):
scenario_types = ['Hard Coal',
             'Lignite',
             'Oil',
             'Waste',
             'Natural Gas',
             'Hydro',
             'Nuclear',
             'Other',
             'Geothermal']

#set start time and end time for which to generate scenarios
start_time = np.datetime64("2023-01-01T00:00:00")
end_time = np.datetime64("2023-06-01T00:00:00")

#set number of scenarios to generate
n_scenarios = 5

#set seed for random generation for replicable scenario generation
seed = 2193 #set 

#import distribution parameters
statetime_df = pd.read_csv("exponential_statetime_df.csv")
capacity_df = pd.read_csv("kernel_capacity_df.csv")
markov_d = np.load("markov_state_change_d.npy", allow_pickle = True).item()

## Dataset descripiton

    - projectID - Immutable identifier of the power plant
        
    - Power plant name - claim of each database

    - Fueltype - {Bioenergy, Geothermal, Hard Coal, Hydro, Lignite, Nuclear, Natural Gas, Oil, Solar, Wind, Other}

    - Technology - {CCGT, OCGT, Steam Turbine, Combustion Engine, Run-Of-River, Pumped Storage, Reservoir}

    - Set - {Power Plant (PP), Combined Heat and Power (CHP), Storages (Stores)}

    - Capacity - [MW]

    - Duration - Maximum state of charge capacity in terms of hours at full output capacity

    - Dam Information - Dam volume [Mm^3] and Dam Height [m]

    - Geo-position - Latitude, Longitude

    - Country - EU-27 + CH + NO (+ UK) minus Cyprus and Malta

    - YearCommissioned - Commmisioning year of the powerplant

    - RetroFit - Year of last retrofit




In [25]:
# Recreate Capacity density kernels

grouped_df = capacity_df.groupby(["ProductionType", "Type"])
capacity_d = {}
#idea: calculate prob for capacity = 0, remove and the use non parametric fit.
for production_type, prod_df in grouped_df:
    #calculate per unit available capacity
    PU = prod_df["p.u."]
    p_0 = np.sum(PU == 0) / len(PU)
    PUplus = np.sort(PU[PU != 0]) #non zero capacities
    if len(PUplus) != 0:
        colors = ["r"]
        kernels = ["gaussian"]
        lw = 2   
        for color, kernel in zip(colors, kernels):
            #fit with KDE
            kde = KernelDensity(kernel=kernel, bandwidth=0.05).fit(PUplus[:, np.newaxis])
            capacity_d[production_type] = (p_0, kde)

In [26]:
# Generating Scenarios

#Starting state --> we roll random running time for every generator the end
# todo: make the markov chaing run for a while to make the various generators get further in states
#remove generators not in the scenario_types
geo = geo[geo.Fueltype.apply(lambda x: x in scenario_types)]
np.random.seed(seed)
gen_names = list(geo["projectID"])
gen_types = []
for index, geo_row in geo.iterrows():
    gen_type = get_gen_type(geo_row)
    if gen_type is None:
        tech = geo_row["Technology"]
        fuel = geo_row["Fueltype"]
        print("Weird thing:", tech, fuel)
        gen_types.append("Other ")
    else:
        gen_types.append(gen_type)

n_gens = len(gen_names)
pre_run_hours = 6*30*24 #time the chain is run for before producing data

states = ["Running", "Forced", "Planned"]
state_df = pd.DataFrame({"UnitName": gen_names, "ProductionType": gen_types, "State":["Running"]*n_gens, "Counter":[0]*n_gens, "Capacity":[1]*n_gens})
perc = np.ceil(pre_run_hours / 100)

for h in np.arange(pre_run_hours):
    
    if h % (perc) == 0:
        print(f"{h/pre_run_hours *100} %")
        print(state_df.head(5))
    
    for index, gen_row in state_df[state_df["Counter"] == 0].iterrows():
        gen_name = gen_row["UnitName"] #remove?
        gen_type = gen_row["ProductionType"]
        current_state = gen_row["State"]
        markov = markov_d[gen_type] #get associate markov chain
        
        new_state = next_state_markov(markov, current_state) #get new state of the generator
        
        scale = statetime_df.loc[statetime_df["ProductionType"] == gen_type, new_state + "Time"]
        new_counter = np.ceil(np.random.exponential(scale, 1))[0] #get number of hours spent in new_state
        
        #get the capacity of the generator in the currnent state
        if new_state == "Running":
            new_capacity = 1
        elif (gen_type, new_state) in capacity_d.keys():
            p_zero, pu_pdf = capacity_d[(gen_type, new_state)]
            if np.random.random_sample(1)[0] <= p_zero:
                new_capacity = 0
            else:
                new_capacity = pu_pdf.sample(1)[0][0]
                if new_capacity < 0:
                    new_capacity = 0
                elif new_capacity > 1:
                    new_capacity = 1
        else:
            #todo this should not happen
            #print(f"no capacity distribution for {gen_type}")
            new_capacity = 1
        
        state_df.loc[index, ["State", "Counter", "Capacity"]] = [new_state, new_counter, new_capacity]
        
    state_df.loc[state_df["Counter"] != 0, "Counter"] -= 1
    

Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing

In [27]:
# Generating Scenarios

np.random.seed(seed)
tot_hours = (end_time - start_time) / np.timedelta64(1, "h")
for n_scenario in np.arange(n_scenarios):
    data_list = []
    current_time = start_time
    #state_df = pd.DataFrame({"UnitName": gen_names, "ProductionType": gen_types, "State":["Running"]*n_gens, "Counter":[0]*n_gens, "Capacity":[1]*n_gens})
    
    perc = np.ceil(tot_hours / 100)
    columns = ["TimeStamp"] + gen_names
    
    for h in np.arange(tot_hours):
        
        if h % (10*perc) == 0:
            print(f"{(n_scenario*tot_hours + h)/(tot_hours * (n_scenarios)) *100} %")
            print(state_df.head(5))
        
        for index, gen_row in state_df[state_df["Counter"] == 0].iterrows():
            gen_name = gen_row["UnitName"] #remove?
            gen_type = gen_row["ProductionType"]
            current_state = gen_row["State"]
            markov = markov_d[gen_type] #get associate markov chain
            
            new_state = next_state_markov(markov, current_state) #get new state of the generator
            
            scale = statetime_df.loc[statetime_df["ProductionType"] == gen_type, new_state + "Time"]
            new_counter = np.ceil(np.random.exponential(scale, 1))[0] #get number of hours spent in new_state
            
            #get the capacity of the generator in the currnent state
            if new_state == "Running":
                new_capacity = 1
            elif (gen_type, new_state) in capacity_d.keys():
                p_zero, pu_pdf = capacity_d[(gen_type, new_state)]
                if np.random.random_sample(1)[0] <= p_zero:
                    new_capacity = 0
                else:
                    new_capacity = pu_pdf.sample(1,random_state = seed)[0][0]
                    if new_capacity < 0:
                        new_capacity = 0
                    elif new_capacity > 1:
                        new_capacity = 1
            else:
                #todo this should not happen
                #print(f"no capacity distribution for {gen_type}")
                new_capacity = 1
            
            state_df.loc[index, ["State", "Counter", "Capacity"]] = [new_state, new_counter, new_capacity]
            
        state_df.loc[state_df["Counter"] != 0, "Counter"] -= 1
        current_time = current_time + np.timedelta64(1, "h") #move forward one hour
        new_row = [current_time] + list(state_df["Capacity"])
        row_d = dict(zip(columns, new_row))
        data_list.append(row_d)
    
    scenario = pd.DataFrame(data_list)

    # Check if there is a folder called "scenarios"
    folder_path = 'scenarios'
    if not os.path.exists(folder_path):
        # If the folder does not exist, create it
        os.makedirs(folder_path)
        print("Folder 'scenarios' created.")
        num_files = 0
    else:
        # If the folder exists, count the number of files inside
        num_files = len(os.listdir(folder_path))
        print(f"Folder 'scenarios' already exists with {num_files} files inside.")


    scenario.to_csv(f"scenarios/scenario_{num_files + 1}")
       

0.0 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running      241       1.0
1  GEO-45150  Fossil Hard coal   Running     1042       1.0
2  GEO-45719  Fossil Hard coal   Running      158       1.0
3  GEO-41956  Fossil Hard coal   Running      509       1.0
4  GEO-41974  Fossil Hard coal   Running      124       1.0
2.041942604856512 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       28       1.0
1  GEO-45150  Fossil Hard coal   Running      672       1.0
2  GEO-45719  Fossil Hard coal   Running       61       1.0
3  GEO-41956  Fossil Hard coal   Running      139       1.0
4  GEO-41974  Fossil Hard coal   Running      162       1.0
4.083885209713024 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       65       1.0
1  GEO-45150  Fossil Hard coal   Running      302       1.0
2  GEO-45719  Fossil Hard coal   Planned       90     