In [154]:
import powerplantmatching as pm
import math
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
from scipy.stats import expon
from scipy.optimize import curve_fit
from astropy.visualization import hist
#for fitting:
from scipy.stats import expon, rv_discrete
from scipy.optimize import curve_fit
from sklearn.neighbors import KernelDensity
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
#for plotting Markov Chain graph
import networkx as nx
import matplotlib.pyplot as plt


#helper functions

def import_df(path, cols = ['StartTS', 'EndTS', 'TimeZone', 'Status', 'Type', 'AreaCode',
       'AreaTypeCode', 'AreaName', 'MapCode', 'PowerResourceEIC', 'UnitName',
       'ProductionType', 'InstalledCapacity', 'AvailableCapacity',
       'Reason']):
    """
    imports and preprocess data_frame
    path: string containing path of csv file containing table
    cols: list of column names to select in df
    returns: non redundat dataframe with only failures
    """
    
    df = pd.read_csv(path, sep = "\t", parse_dates = [0,1])
    df = df[cols] #selects only column
    df = df.drop_duplicates(subset = ["UnitName","StartTS"]) #deletes redundant rows
    #df = df[(df["Reason"] == "Failure")] # WHERE | (df["Reason"] == 'Foreseen Maintenance')
    #maybe can do df[df["Reason"] in reasons]?
    return df

def get_week(date):
    """
    input: date in date_time format
    output: what week of the year the date corresponds to
    """
    return date.week


def truncate(number, digits) -> float:
    # Improve accuracy with floating point operations, to avoid truncate(16.4, 2) = 16.39 or truncate(-1.13, 2) = -1.12
    nbDecimals = len(str(number).split('.')[1]) 
    if nbDecimals <= digits:
        return number
    stepper = 10.0 ** digits
    return math.trunc(stepper * number) / stepper

def markov_graph(transitions, seed = 42, digits = 4, title = ""):
    """
    input: transitions, a dictionary having as 
    keys: touples with 2 elements being the from state and from state
    values: the transition probability
    output: markov chain graph
    """
    G = nx.MultiDiGraph()

    for transition, probability in transitions.items():
        state_from, state_to = transition
        if probability != 0: 
        #if probability state_from to state_to is not 0 we add an edge to the graph
            G.add_edge(state_from, state_to, weight=truncate(probability, digits))

    #create positions of nodes: dictionary with coordinates
    pos = nx.spring_layout(G, seed) 

    # Increase the scale to avoid overlap
    pos = {k: [v[0] * 2, v[1] * 2] for k, v in pos.items()}

    labels = nx.get_edge_attributes(G, 'weight')
    nx.draw(G, pos, with_labels=True, node_size=700, node_color='skyblue', font_size=8, font_color='black',
            connectionstyle='arc3,rad=0.1')

    # Annotate edges manually with adjusted positions to avoid overlap
    for edge, weight in labels.items():
        (x, y) = pos[edge[0]]
        text_x = 3/4*x + 1/4*pos[edge[1]][0]
        text_y = 3/4*y + 1/4*pos[edge[1]][1]
        #shift text to avoid overlap
        text_y += 0.2 if edge[0] == edge[1] else 0


        plt.text(text_x, text_y, f"{weight}", fontsize=8, color='blue', verticalalignment='center',
                 horizontalalignment='center')
    plt.title(title)
    plt.show()
    
    
def combine_overlaps(df):
    """
    this functions combines any time overlaps present in the dataframe for each generator
    so that for every time t there is at most one row describing the generator at time t.
    df: dataframe containing UnitName, StartTS, EndTS
    """

    # Step 1: Sort the DataFrame
    df.sort_values(by=["UnitName", "StartTS"], inplace=True)

    # Step 2 and 3: Combine overlapping intervals
    result = []
    current_interval = None
    n_rows = df.shape[0]
    perc = n_rows // 100

    for k, row in df.iterrows():
        if k % perc == 0:
            print(f"percentage of rows parsed = {k / n_rows *100:.2f}%")
        if current_interval is None:
             current_interval = row.copy()
        elif row["StartTS"] >= current_interval["EndTS"] or row["UnitName"] != current_interval["UnitName"]:
            # No overlap or new UnitID
            result.append(current_interval)
            current_interval = row.copy()
        else:
            # Overlapping intervals, update the EndTS
            current_interval["EndTS"] = row["StartTS"]

    result_df = pd.DataFrame(result)

    return result_df


def get_markov_probs(df, states_column):
    """
    input:
    df: dataframe having as columns: states_column, "ProductionType", "StartTS", "UpTime"
    states_column: string with name of column where the state of the generator is saved
    output: dictionary having as keys tuples with two states and the associated probability transition
    """
    states = list(df[states_column].unique())
    states.append("Running")
    transitions = []
    for x in states:
        for y in states:
            transitions.append((x,y))
            
    transitions_counter = dict(zip(transitions, [0]*len(transitions)))
    GenGroups = df.groupby("UnitName")
    previous_state = "Running"
    current_state = "Running"
    for unit_name, unit_df in GenGroups:
        unit_df = unit_df.sort_values(["StartTS"])
        #count transition occurante for unit
        for index, row in unit_df.iterrows():
            uptime = row["RunningTime"]
            #get current state from row
            current_state = row[states_column]

            if pd.isna(uptime):
                #if uptime == "Nan" then it was the first recorded instance of the generator in the dataframe so before it was running.
                previous_state = "Running"
            elif uptime > 10 / (60 * 24): # and previous_state != "Running"
                #if the generator had some time between the previous row than the previous state was running
                #and we must add 1 to previousprevious state and running
                transitions_counter[(previous_state, "Running")] += 1
                previous_state = "Running"    

            transitions_counter[(previous_state, current_state)] += 1
            #the current state becomes the previous_state
            previous_state = current_state

    #get the transtions probabilities
    transitions_probs = transitions_counter
    counter_dict = dict(zip(states, [0]*len(states)))
    for state in states:
        for transition, counter in transitions_probs.items():
            if transition[0] == state:
                counter_dict[state] += counter 

    for transition, counter in transitions_probs.items():
        if counter_dict[transition[0]] != 0:
            #if transition[0] occurs at least one time
            transitions_probs[transition] = transitions_probs[transition] / counter_dict[transition[0]]
    return transitions_probs

def weighted_values(values, probabilities, size):
    bins = np.add.accumulate(probabilities)
    return values[np.digitize(np.random.random_sample(size), bins)]

def next_state_markov(markov, current_state):
    possible_states = []
    transition_probs = []
    for key, prob in markov.items():
        if key[0] == current_state and prob != 0:
            possible_states.append(key[1])
            transition_probs.append(prob)
    #if len(possible_states) == 0:
    #    return current_state
    #else:
    return weighted_values(np.array(possible_states), np.array(transition_probs),1)[0]

def get_gen_type(geo_df):
    geo_to_entsoe_gen_d = {
        "Hard Coal": "Fossil Hard coal ", 
        'Lignite' : 'Fossil Brown coal/Lignite ',
        'Oil': "Fossil Oil ",
        'Waste': "Waste ",
        'Natural Gas': "Fossil Gas ",#o ci andrebbe qualcos altro
        #'Hydro',
        'Nuclear': "Nuclear ", 
        'Other' : "Other ", 
        'Solar':"Solar ",
        'Wind':"Wind Onshore ",
        'Geothermal':"Geothermal "
        }
    #given a unit row of geo dataframe give gen type in ENTSOE format
    #in geo there is just one windpower type (not onshore or offshore) or maybe you can see from the dataframe
    fuel_type = geo_df["Fueltype"]
    gen_type = geo_df["Technology"]
    if fuel_type == "Hydro":
        if gen_type == "Reservoir":
            return 'Hydro Water Reservoir '
        elif gen_type == "Run-Of-River":
            return 'Hydro Run-of-river and poundage '
        elif gen_type == "Pumped Storage":
            return 'Hydro Pumped Storage '
    elif fuel_type in geo_to_entsoe_gen_d.keys():
        return geo_to_entsoe_gen_d[fuel_type]
    else:
        print(f"Generetor {gen_type},{fuel_type} not found classfied as Other")
        return "Other "



# Questions:
1. There is no onshore / offshore windpower
2. What is Hydro without further specification
3. For what timeframe should I generate data?

# TODO:
- remove redundant gens in geo

In [13]:
#Generators for which we want to generate scenarios
geo = pm.data.GEO()

## Dataset descripiton

    - projectID - Immutable identifier of the power plant
        
    - Power plant name - claim of each database

    - Fueltype - {Bioenergy, Geothermal, Hard Coal, Hydro, Lignite, Nuclear, Natural Gas, Oil, Solar, Wind, Other}

    - Technology - {CCGT, OCGT, Steam Turbine, Combustion Engine, Run-Of-River, Pumped Storage, Reservoir}

    - Set - {Power Plant (PP), Combined Heat and Power (CHP), Storages (Stores)}

    - Capacity - [MW]

    - Duration - Maximum state of charge capacity in terms of hours at full output capacity

    - Dam Information - Dam volume [Mm^3] and Dam Height [m]

    - Geo-position - Latitude, Longitude

    - Country - EU-27 + CH + NO (+ UK) minus Cyprus and Malta

    - YearCommissioned - Commmisioning year of the powerplant

    - RetroFit - Year of last retrofit




In [16]:
#Import Outages Dataframe to construct model ENTSO-E format
data_path = "../outagesmodelingdata/"
df = pd.read_csv(data_path+"deltaWithEverything_df.csv", parse_dates = [0,1])
df["StartTS"] = pd.to_datetime(df["StartTS"])
df["EndTS"] = pd.to_datetime(df["EndTS"])
df = df.sort_values(["UnitName","StartTS"])

#Combine overlaps fo that every generator is only at one state at the time
df = combine_overlaps(df)
#combine countries
df["MapCode"] = df["MapCode"].apply(lambda x: x[0:2])


percentage of rows parsed = 0.00%
percentage of rows parsed = 1.00%
percentage of rows parsed = 2.00%
percentage of rows parsed = 3.00%
percentage of rows parsed = 4.00%
percentage of rows parsed = 5.00%
percentage of rows parsed = 6.00%
percentage of rows parsed = 7.00%
percentage of rows parsed = 8.00%
percentage of rows parsed = 9.00%
percentage of rows parsed = 10.00%
percentage of rows parsed = 11.00%
percentage of rows parsed = 12.00%
percentage of rows parsed = 13.00%
percentage of rows parsed = 14.00%
percentage of rows parsed = 15.00%
percentage of rows parsed = 16.00%
percentage of rows parsed = 17.00%
percentage of rows parsed = 18.00%
percentage of rows parsed = 19.00%
percentage of rows parsed = 20.00%
percentage of rows parsed = 21.00%
percentage of rows parsed = 22.00%
percentage of rows parsed = 23.00%
percentage of rows parsed = 24.00%
percentage of rows parsed = 25.00%
percentage of rows parsed = 26.00%
percentage of rows parsed = 27.00%
percentage of rows parsed = 28

In [31]:
#Create model
#statetime_df: dataframe with the parametres of the exponantialdistribution of statetime
#markov_d: dictionary of markov chains of the various generator types
#capacity_d: dictionary of distributions of capacity in p.u.
#STATE TIME MODELING

#In this section we model the time a generator spends in a certain state. We model the random variable time in each state with a exponential distribution using the maximum likelyhood estimator (MLE).

#Change state_column to look ad different state distributions
state_column = "Type"

#def state_time_distribution_fitting(df, state_column):

states = list(df[state_column].unique())
delta_df = df
delta_df = delta_df.sort_values(by = ["UnitName", "StartTS"])

for state in states:
    #calculate time spend in each state
    delta_df[state+"Time"] = [np.datetime64("NaT")]*df.shape[0]
    state_df = delta_df[delta_df[state_column] == state]
    delta_df.loc[delta_df[state_column] == state, state+"Time"] = state_df["EndTS"] - state_df["StartTS"]
    delta_df[state+"Time"] = (delta_df[state+"Time"] /  np.timedelta64(1, 'h'))
    
for unit, unit_df in delta_df.groupby("UnitName"):                             
    unit_df["EndTS"] = pd.to_datetime(unit_df["EndTS"])
    shifted_endts = pd.to_datetime(unit_df["EndTS"].shift())
    start_ts = delta_df.loc[delta_df["UnitName"] == unit, "StartTS"]
    delta_df.loc[delta_df["UnitName"] == unit, "RunningTime"] = start_ts - shifted_endts

delta_df["RunningTime"] = (delta_df["RunningTime"] /  np.timedelta64(1, 'h'))
delta_df.loc[delta_df["RunningTime"] == 0, "RunningTime"] = np.nan

states = states + ["Running"]


#fit outages distributions
statetime_df = pd.DataFrame() #create empty parameter table
grouped_delta = delta_df.groupby(["ProductionType"])
statetime_df["ProductionType"] = grouped_delta.first().reset_index()["ProductionType"]
for state in states:
    statetime_df[state + "Time"] = [np.nan]*len(list(statetime_df["ProductionType"]))


def exponential_fit(x, scale):
    return expon.pdf(x, scale=scale)


for group_name, group_df in grouped_delta:
    
    #We drop NaN valued rows
    #group_df = group_df.dropna(subset = ["UpTime","OffTime"])
    
    for state in states:
        if not pd.isna(group_df[state+"Time"].mean()):
            # Fit the data to the exponential function
            mean = group_df[state+"Time"].mean()
            state_scale = mean
            statetime_df.loc[statetime_df["ProductionType"] == group_name[0], state+"Time"] = state_scale
            #plt.figure() #uncomment to plot
            #plt.xlim(0, 3*mean)  # Adjust the values as needed
            #plt.ylim(0, 1)  # Adjust the values as needed
            #plt.hist(group_df[state + "Time"], bins=5000, edgecolor='black', density=True)
            # Plot the fitted exponential distribution
            #x = np.sort(group_df[state + "Time"])
            #plt.plot(x, exponential_fit(x, scale = state_scale), 'r-', label='Exponential Fit')
            #plt.xlabel(f'Duration {state + "Time"}')
            #plt.ylabel('Frequency')
            #plt.title(f'{state}Time {group_name}')
            #plt.legend()
            #plt.show()


            
#MARKOV CHAIN MODELING
print("Starting Markov chain modeling")
df = delta_df[delta_df["Reason"] != "Shutdown" ] #remove shutdowns 

markov_d= {}
#Create a list containing tuples rapresenting all possible state changes (x,y) := x --> y
#data is a dataframe containing the correct "UpTime" between the states considered

GenTypeGroup_df = df.groupby("ProductionType")

#We can use different states
#print markov chain for each type of generator
for production_type, data in GenTypeGroup_df:
    transitions_probs = get_markov_probs(data, "Type")
    markov_d[production_type] = transitions_probs
    #uncomment to plot:
    #markov_graph(transitions_probs, title = f"{production_type} Markov Chain")

#t_probs = dict(transitions_probs)
#for key, value in transitions_probs.items():
#    if value < 0.05:
#        del t_probs[key]
#print graph


#CAPACITY MODELING
print("Starting Capacity Modeling")
#With Kernel density estimation
grouped_df = df.groupby(["ProductionType", "Type"])
capacity_d = {}
#idea: calculate prob for capacity = 0, remove and the use non parametric fit.
for production_type, prod_df in grouped_df:
    #calculate per unit available capacity
    PU = prod_df["AvailableCapacity"].copy() / prod_df["InstalledCapacity"]
    df.loc[(df["ProductionType"] == production_type[0]) & (df["Type"] == production_type[1]), "p.u."] = PU
    PU = PU[~np.isnan(PU)]
    p_0 = np.sum(PU == 0) / len(PU)
    PUplus = np.sort(PU[PU != 0]) #non zero capacities
    if len(PUplus) != 0:
        
        colors = ["r"]
        kernels = ["gaussian"]
        lw = 2
        #Uncomment to plot
        
        #plt.figure()
        for color, kernel in zip(colors, kernels):
            kde = KernelDensity(kernel=kernel, bandwidth=0.05).fit(PUplus[:, np.newaxis])
            capacity_d[production_type] = (p_0, kde)
            log_dens = kde.score_samples(PUplus[:, np.newaxis]) * len(PUplus) / len(PU) #scale to be prob on non zero values
            Y = np.exp(log_dens)
            #plt.plot(PUplus, Y, color = color, label = kernel)
            
        """
        # Plot histogram

        plt.xlim(0, 1)
        
        plt.hist(PU, bins=min(500, int(np.ceil(len(PU) ** (1. / 3))*3/2)) + 3 , edgecolor='black', alpha=0.7, density = True)

        # Customize the plot (add labels, title, etc.)
        plt.xlabel('p.u. Available Capacity')
        plt.ylabel('Frequency')
        plt.title(f'{production_type}')
        plt.legend()
        plt.show()
        """


In [49]:
geo_gen_types = geo["Technology"].unique()
geo_fuel_types = geo["Fueltype"].unique()
grouped  = df.groupby(["UnitName"]).first().reset_index()
entsoe_gen_types = list(grouped["ProductionType"].unique())

In [155]:
# Generating Scenarios

#Starting state --> we roll random running time for every generator the end
# todo: make the markov chaing run for a while to make the various generators get further in states
#np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) 
grouped  = df.groupby(["UnitName"]).first().reset_index()
gen_names = list(geo["projectID"])
gen_types = []
for index, geo_row in geo.iterrows():
    gen_type = get_gen_type(geo_row)
    if gen_type is None:
        tech = geo_row["Technology"]
        fuel = geo_row["Fueltype"]
        print("Weird thing:", tech, fuel)
        gen_types.append("Other ")
    else:
        gen_types.append(gen_type)

n_gens = len(gen_names)
pre_run_hours = 6*30*24 #time the chain is run for before producing data

states = ["Running", "Forced", "Planned"]
state_df = pd.DataFrame({"UnitName": gen_names, "ProductionType": gen_types, "State":["Running"]*n_gens, "Counter":[0]*n_gens, "Capacity":[1]*n_gens})
perc = np.ceil(pre_run_hours / 100)

for h in np.arange(pre_run_hours):
    
    if h % (perc) == 0:
        print(f"{h/pre_run_hours *100} %")
        print(state_df.head(5))
    
    for index, gen_row in state_df[state_df["Counter"] == 0].iterrows():
        gen_name = gen_row["UnitName"] #remove?
        gen_type = gen_row["ProductionType"]
        current_state = gen_row["State"]
        markov = markov_d[gen_type] #get associate markov chain
        
        new_state = next_state_markov(markov, current_state) #get new state of the generator
        
        scale = statetime_df.loc[statetime_df["ProductionType"] == gen_type, new_state + "Time"]
        new_counter = np.ceil(np.random.exponential(scale, 1))[0] #get number of hours spent in new_state
        
        #get the capacity of the generator in the currnent state
        if new_state == "Running":
            new_capacity = 1
        elif (gen_type, new_state) in capacity_d.keys():
            p_zero, pu_pdf = capacity_d[(gen_type, new_state)]
            if np.random.random_sample(1)[0] <= p_zero:
                new_capacity = 0
            else:
                new_capacity = pu_pdf.sample(1)[0][0]
                if new_capacity < 0:
                    new_capacity = 0
                elif new_capacity > 1:
                    new_capacity = 1
        else:
            #todo this should not happen
            #print(f"no capacity distribution for {gen_type}")
            new_capacity = 1
        
        state_df.loc[index, ["State", "Counter", "Capacity"]] = [new_state, new_counter, new_capacity]
        
    state_df.loc[state_df["Counter"] != 0, "Counter"] -= 1
    
#idea: 
# while t < 10 years roll dices take final state and final counter

Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing: nan Hydro
Weird thing

10.185185185185185 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       97  1.000000
1  GEO-45151  Fossil Hard coal   Running       29  1.000000
2  GEO-45150  Fossil Hard coal   Running      144  1.000000
3  GEO-45719  Fossil Hard coal   Running      114  1.000000
4  GEO-45719  Fossil Hard coal   Planned      128  0.389838
11.203703703703702 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       53  1.000000
1  GEO-45151  Fossil Hard coal    Forced       27  0.000000
2  GEO-45150  Fossil Hard coal   Running      100  1.000000
3  GEO-45719  Fossil Hard coal   Running       70  1.000000
4  GEO-45719  Fossil Hard coal   Planned       84  0.389838
12.222222222222221 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running        9  1.000000
1  GEO-45151  Fossil Hard coal   Running      612  1.000000
2  GEO-45150  Fossil Hard coal   Runn

32.592592592592595 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       64       1.0
1  GEO-45151  Fossil Hard coal   Running      328       1.0
2  GEO-45150  Fossil Hard coal   Running       79       1.0
3  GEO-45719  Fossil Hard coal   Running      391       1.0
4  GEO-45719  Fossil Hard coal   Running      292       1.0
33.611111111111114 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       20       1.0
1  GEO-45151  Fossil Hard coal   Running      284       1.0
2  GEO-45150  Fossil Hard coal   Running       35       1.0
3  GEO-45719  Fossil Hard coal   Running      347       1.0
4  GEO-45719  Fossil Hard coal   Running      248       1.0
34.629629629629626 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       68       1.0
1  GEO-45151  Fossil Hard coal   Running      240       1.0
2  GEO-45150  Fossil Hard coal   Plan

55.00000000000001 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running      690  1.000000
1  GEO-45151  Fossil Hard coal   Planned        0  0.000000
2  GEO-45150  Fossil Hard coal   Running      130  1.000000
3  GEO-45719  Fossil Hard coal   Planned       77  0.711468
4  GEO-45719  Fossil Hard coal   Planned       64  0.876957
56.018518518518526 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running      646  1.000000
1  GEO-45151  Fossil Hard coal   Running       65  1.000000
2  GEO-45150  Fossil Hard coal   Running       86  1.000000
3  GEO-45719  Fossil Hard coal   Planned       33  0.711468
4  GEO-45719  Fossil Hard coal   Planned       20  0.876957
57.03703703703704 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running      602       1.0
1  GEO-45151  Fossil Hard coal   Running       21       1.0
2  GEO-45150  Fossil Hard coal   Runnin

77.4074074074074 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       16  1.000000
1  GEO-45151  Fossil Hard coal   Planned       34  0.578432
2  GEO-45150  Fossil Hard coal   Running      243  1.000000
3  GEO-45719  Fossil Hard coal   Running       98  1.000000
4  GEO-45719  Fossil Hard coal   Running      367  1.000000
78.42592592592592 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       50       1.0
1  GEO-45151  Fossil Hard coal   Running      123       1.0
2  GEO-45150  Fossil Hard coal   Running      199       1.0
3  GEO-45719  Fossil Hard coal   Running       54       1.0
4  GEO-45719  Fossil Hard coal   Running      323       1.0
79.44444444444444 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running        6       1.0
1  GEO-45151  Fossil Hard coal   Running       79       1.0
2  GEO-45150  Fossil Hard coal   Running 

99.81481481481481 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running      312       1.0
1  GEO-45151  Fossil Hard coal   Running      105       1.0
2  GEO-45150  Fossil Hard coal   Running      207       1.0
3  GEO-45719  Fossil Hard coal   Running       58       1.0
4  GEO-45719  Fossil Hard coal   Running      215       1.0


In [156]:
# Generating Scenarios
# given a dataframe having UnitName and UnitType create scenarios for each UnitName
# 

start_time = np.datetime64("2023-01-01T00:00:00")
end_time = np.datetime64("2023-06-01T00:00:00")

tot_hours = (end_time - start_time) / np.timedelta64(1, "h")
data_list = []
current_time = start_time
#state_df = pd.DataFrame({"UnitName": gen_names, "ProductionType": gen_types, "State":["Running"]*n_gens, "Counter":[0]*n_gens, "Capacity":[1]*n_gens})

perc = np.ceil(tot_hours / 100)
columns = ["TimeStamp"] + gen_names

for h in np.arange(tot_hours):
    
    if h % (10*perc) == 0:
        print(f"{h/tot_hours *100} %")
        print(state_df.head(5))
    
    for index, gen_row in state_df[state_df["Counter"] == 0].iterrows():
        gen_name = gen_row["UnitName"] #remove?
        gen_type = gen_row["ProductionType"]
        current_state = gen_row["State"]
        markov = markov_d[gen_type] #get associate markov chain
        
        new_state = next_state_markov(markov, current_state) #get new state of the generator
        
        scale = statetime_df.loc[statetime_df["ProductionType"] == gen_type, new_state + "Time"]
        new_counter = np.ceil(np.random.exponential(scale, 1))[0] #get number of hours spent in new_state
        
        #get the capacity of the generator in the currnent state
        if new_state == "Running":
            new_capacity = 1
        elif (gen_type, new_state) in capacity_d.keys():
            p_zero, pu_pdf = capacity_d[(gen_type, new_state)]
            if np.random.random_sample(1)[0] <= p_zero:
                new_capacity = 0
            else:
                new_capacity = pu_pdf.sample(1)[0][0]
                if new_capacity < 0:
                    new_capacity = 0
                elif new_capacity > 1:
                    new_capacity = 1
        else:
            #todo this should not happen
            #print(f"no capacity distribution for {gen_type}")
            new_capacity = 1
        
        state_df.loc[index, ["State", "Counter", "Capacity"]] = [new_state, new_counter, new_capacity]
        
    state_df.loc[state_df["Counter"] != 0, "Counter"] -= 1
    current_time = current_time + np.timedelta64(1, "h") #move forward one hour
    new_row = [current_time] + list(state_df["Capacity"])
    row_d = dict(zip(columns, new_row))
    data_list.append(row_d)

scenario = pd.DataFrame(data_list)
   

0.0 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running      304       1.0
1  GEO-45151  Fossil Hard coal   Running       97       1.0
2  GEO-45150  Fossil Hard coal   Running      199       1.0
3  GEO-45719  Fossil Hard coal   Running       50       1.0
4  GEO-45719  Fossil Hard coal   Running      207       1.0
10.20971302428256 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running      406  1.000000
1  GEO-45151  Fossil Hard coal   Running      233  1.000000
2  GEO-45150  Fossil Hard coal   Planned       90  0.711902
3  GEO-45719  Fossil Hard coal   Running      536  1.000000
4  GEO-45719  Fossil Hard coal   Planned       19  0.777525
20.41942604856512 %
    UnitName     ProductionType    State  Counter  Capacity
0  GEO-45151  Fossil Hard coal   Running       36  1.000000
1  GEO-45151  Fossil Hard coal   Running      705  1.000000
2  GEO-45150  Fossil Hard coal   Running      742  1.0

In [160]:
scenario.shape

(3624, 1601)

In [161]:
geo.shape

(4019, 18)

In [162]:
len(gen_names)

4019

In [163]:
n_gens

4019

In [164]:
len(columns)

4020

In [165]:
len(new_row)

4020

In [173]:
len(data_list[2].keys())

1601

In [170]:
state_df.shape

(4019, 5)

In [171]:
len(new_row)

4020

In [174]:
len(row_d.keys())

1601

In [175]:
len(dict(zip(columns, new_row)).keys())

1601

In [176]:
len(columns)

4020

In [177]:
len(new_row)

4020

In [180]:
len(list(zip(columns,new_row)))

4020

In [182]:
len(dict(zip(columns, new_row)))

1601

In [186]:
len(set(columns))

1601

In [191]:
geo.groupby("projectID").head(3)

GEO,Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,DateIn,DateRetrofit,DateOut,lat,lon,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,EIC,projectID
0,Duernrohr Chp,Hard Coal,CCGT,CHP,Austria,373.384467,,1985.0,,,48.3264,15.9246,,,,,,GEO-45151
1,Duernrohr Chp,Hard Coal,CCGT,CHP,Austria,324.521809,,1985.0,,,48.3264,15.9246,,,,,,GEO-45151
2,Mellach Chp,Hard Coal,Steam Turbine,CHP,Austria,226.796491,,1986.0,,,46.9115,15.4884,,,,,,GEO-45150
3,Lenzing,Hard Coal,,PP,Austria,11.063243,,1955.0,,,47.9767,13.6201,,,,,,GEO-45719
4,Lenzing,Hard Coal,,PP,Austria,19.360676,,1972.0,,,47.9767,13.6201,,,,,,GEO-45719
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4014,Vidraru Romania,Hydro,Reservoir,PP,Romania,217.197452,,1966.0,,,45.3667,24.6307,,,,,,GEO-40980
4015,Villarodin,Hydro,,PP,France,320.668722,,1968.0,,,45.2130,6.7210,,,,,,GEO-39817
4016,Villerest,Hydro,Reservoir,PP,France,57.261146,,1984.0,,,45.9870,4.0480,,,,,,GEO-39797
4017,Vinon,Hydro,,PP,France,28.949260,,1967.0,,,43.7250,5.8270,,,,,,GEO-39822


In [192]:
geo

GEO,Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,DateIn,DateRetrofit,DateOut,lat,lon,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,EIC,projectID
0,Duernrohr Chp,Hard Coal,CCGT,CHP,Austria,373.384467,,1985.0,,,48.3264,15.9246,,,,,,GEO-45151
1,Duernrohr Chp,Hard Coal,CCGT,CHP,Austria,324.521809,,1985.0,,,48.3264,15.9246,,,,,,GEO-45151
2,Mellach Chp,Hard Coal,Steam Turbine,CHP,Austria,226.796491,,1986.0,,,46.9115,15.4884,,,,,,GEO-45150
3,Lenzing,Hard Coal,,PP,Austria,11.063243,,1955.0,,,47.9767,13.6201,,,,,,GEO-45719
4,Lenzing,Hard Coal,,PP,Austria,19.360676,,1972.0,,,47.9767,13.6201,,,,,,GEO-45719
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4014,Vidraru Romania,Hydro,Reservoir,PP,Romania,217.197452,,1966.0,,,45.3667,24.6307,,,,,,GEO-40980
4015,Villarodin,Hydro,,PP,France,320.668722,,1968.0,,,45.2130,6.7210,,,,,,GEO-39817
4016,Villerest,Hydro,Reservoir,PP,France,57.261146,,1984.0,,,45.9870,4.0480,,,,,,GEO-39797
4017,Vinon,Hydro,,PP,France,28.949260,,1967.0,,,43.7250,5.8270,,,,,,GEO-39822
