In [9]:
import pandas as pd 
import plotly.graph_objects as go
import sys, os
fn_data = "../data/renewables_with_load.parquet"
from data import get_generation, normalize_generation, get_profiles
from graphs import plot_profile

In [12]:
def normalize_generation(
    df: pd.DataFrame,
    shares: dict[str, float],
    total_demand: float = 0,
) -> pd.DataFrame:
    """Normalize data to a given value of annual demand. Generation of
       renewable generation is scaled to meet the given demand share on an
       annual basis. In addition, a baseload technology is added with an
       constant annual profile

    Args:
        df: Dataframe with observed demand and generation data
        shares: shares of each technology in annual demand. keys have to match
            with columns. Exception is "Baseload" that is used to create the
            baseload technology with constant profile
        total_demand: Total demand over the whole time horizon to normalize demand
            If zero, no demand scaling
    """
    if total_demand == 0:
        total_demand = df["Demand"].sum()
    # normalize data
    df_ = (df / df.sum()).assign(Baseload=1 / len(df))
    shares.update({"Demand": 1})
    for tech, fac in shares.items():
        df_[tech] = df_[tech] * total_demand * fac
    return df_[list(shares.keys())]

def get_generation(fn: str, country: str, year: int) -> pd.DataFrame:
    """Get renewable generation and demand by country and year

    Args:
        fn: name of parquet file
        country: 2-letter country code
        year: year for data
    """
    df = (
        pd.read_parquet(
            fn, 
            filters=[
                ("country", "==", country),
                ("dateTime", ">=", pd.to_datetime(f"{year}/01/01 00:00")),
                ("dateTime", "<=", pd.to_datetime(f"{year}/12/31 23:00")),            ])
        .set_index("dateTime")
        .drop("country", axis=1)
    )
    df.columns = [c[:1].capitalize() + c[1:] for c in df.columns]
    df["Wind"] = df["WindOffshore"].fillna(0) + df["WindOnshore"].fillna(0)
    return df

In [17]:
def resample_generation(df: pd.DataFrame, days: int = 1) -> pd.DataFrame:
    """Resample generation data to daily basis using sums
    
    Args:
        df: dataframe to be resampled
        days: number of days to aggregate
    """
    return df.resample(f"{days}D").sum()
resample_generation(df_gen, 4)

Unnamed: 0_level_0,Solar,WindOffshore,WindOnshore,Demand,Wind
dateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-01,137046.8050,422151.1400,2.437464e+06,4.934200e+06,2.859615e+06
2023-01-05,123007.8725,487348.3725,2.042660e+06,4.883847e+06,2.530008e+06
2023-01-09,94382.8425,421744.4000,2.614856e+06,5.783068e+06,3.036600e+06
2023-01-13,143246.4075,433522.8650,3.266431e+06,5.394583e+06,3.699954e+06
2023-01-17,155211.6025,355618.5750,1.165248e+06,5.884822e+06,1.520866e+06
...,...,...,...,...,...
2023-12-15,181422.7275,411122.4675,2.030749e+06,5.141704e+06,2.441872e+06
2023-12-19,93560.8600,436520.5950,3.427738e+06,5.451933e+06,3.864258e+06
2023-12-23,82290.3825,431917.9250,3.114949e+06,4.439456e+06,3.546867e+06
2023-12-27,167560.8000,488966.9500,2.860188e+06,4.589512e+06,3.349155e+06


In [14]:
df_gen = get_generation(fn_data, country="DE", year=2023)
df_norm = normalize_generation(df_gen, shares={"Wind": 0.5, "Solar": 0.2, "Baseload": 0.4}, total_demand=0)
profiles = get_profiles(df_norm)
profiles["Hourly: Year"]

Unnamed: 0_level_0,Wind,Solar,Baseload,Demand
Hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,27130.770867,0.0,20854.452969,41775.432164
1,26812.641116,0.0,20854.452969,41421.215432
2,26532.765564,0.989173,20854.452969,41852.840212
3,26447.014592,113.19439,20854.452969,43542.830685
4,26537.470437,1062.96411,20854.452969,47406.574452
5,26091.946928,3931.576557,20854.452969,51757.933479
6,25143.304706,9348.831946,20854.452969,55245.846712
7,24439.121869,16610.326137,20854.452969,57249.540377
8,24113.018068,24031.232347,20854.452969,58427.05826
9,24287.274853,29628.505401,20854.452969,59359.867616


In [5]:
def plot_profile(
        df_p: pd.DataFrame, 
        title: str = "", 
        tech_order: list[str] | None = None, 
        colors: dict[str, str] | None = None
    ):
    """Plot profiles
    
    Args:
        df_p: dataframe with profiles in columns and x-axis as index
        title: title for the plot
        tech_order: order of technologies for stapling. If none, technologies are
            not stapled
        colors: color setting for profiles
    """
    # set default arguments
    tech_order = ["Baseload", "Wind", "Solar"] if tech_order is None else tech_order
    colors = {
        "Demand": "Red",
        "Wind": "Green",
        "Solar": "Orange",
        "Baseload": "Black"
    } if colors is None else colors

    # staple the profiles
    if tech_order is not None:
        base = pd.Series(0, index=df_p.index)
        for t in tech_order:
            if t in df_p.columns:
                base += df_p[t]
                df_p[t] = base
    # create figure
    fig = go.Figure()
    for c in df_p.columns:
        fig.add_trace(
            go.Scatter(x=df_p.index, y=df_p[c], mode="lines", line=dict(color=colors[c]), name=c)
        )
        fig.update_layout(
            legend=dict(
                yanchor="bottom",
                y=-0.3,
                xanchor="left",
                x=0.3,
                orientation="h"),
            yaxis=dict(title="Energy"),
            xaxis=dict(title=df_p.index.name),
            title=dict(text=title, xanchor="center", yanchor="top", x=0.4)

    )
    return fig
p = "Monthly"
fig = plot_profile(profiles[p], title=p)
fig.show()


In [6]:
df_gen.

SyntaxError: invalid syntax (613620620.py, line 1)