In [None]:
import re
from pathlib import Path

import hvplot.pandas
import hvplot.xarray
import pandas as pd
import pypsa
import seaborn
import xarray as xr

seaborn.set_style("whitegrid")


def remove_numbers(s):
    """removes any numbers and surronding spaces from end of string,
    e.g. 'csp-tower TES 34' -> 'csp-tower TES'"""

    return re.sub("(\s*\d+\s*)$", "", s)


def remove_brakets(s):
    """remove braketed specifications for technologies to use with pd.groupby(...), e.g.
    HVDC inverter pair (exp) -> HVDC inverter pair
    battery inverter (discharging, imp) -> battery inverter
    """

    return re.match(r"(.*?)(\s\([\w\s,]+\))?$", s).groups()[0].strip()


def group_components(s):

    # components starting with the key in the lookup are mapped
    # to the corresponding value
    lookup = {
        "csp-tower TES": "csp tower TES",
        "csp-tower power block": "csp tower power block",
        "csp-tower": "csp tower",
        "offwind": "wind offshore",
        "onwind": "wind onshore",
        "solar-utility": "pv utility",
        "HVDC": "HVDC",
        "battery": "battery",
        "Buffer:": "Import buffer",
        "seawater desalination": "water desalination",
        "clean water tank storage": "water storage",
        "H2 (g) submarine pipeline": "H2 pipeline",
        "H2 (g) pipeline": "H2 pipeline",
        "H2 (g) fill compressor station": "H2 pipeline",
        "H2 (g) pipeline decompressor": "H2 pipeline",
        "electrolysis": "electrolysis",
        "H2 storage": "H2 storage",
        "hydrogen storage": "H2 storage",
        "CCGT": "CCGT",
    }

    # all potential matches for s
    candidates = [k for k in lookup if s.startswith(k)]

    assert candidates, f"No matching entry for {s} found."

    # Determine closest match for s with simple heuristic:
    # The longest of the strings s starts with is probably
    # the one we are looking for. Determine and return it
    # Fails if two candiates have the same length.
    candidates = {len(c): c for c in candidates}

    return lookup[candidates[max(candidates.keys())]]


df = pd.read_csv(snakemake.input["results"], sep=";")


## +++ Cost composition of ESC by component family
cost = df.query("category == 'cost'")

# Aggregate across copies of componentes (numbered components) and related technologies
cost["subcategory"] = cost["subcategory"].apply(remove_numbers)
cost["subcategory"] = cost["subcategory"].apply(remove_brakets)

cost["subcategory"] = cost["subcategory"].apply(group_components)

cost = cost.groupby("subcategory")["value"].sum()

# Need multiindex in order to stack with hvplot
cost = cost.to_frame().reset_index()
cost["esc"] = df["esc"].unique().item()
cost = cost.set_index(["esc", "subcategory"])

demand = df.query("subcategory == 'Total demand'")["value"].item()  # of ESC in MWh

cost /= demand  # cost per MWh delivered

fig_cost = cost.hvplot.barh(
    stacked=True, ylabel="LCoE [EUR/MWh]", title="Cost composition for LCoE"
).options(yrotation=90)

## +++ Installed capacities by components
capacities = df.query("category == 'installed capacity'")

# Aggregate across copies of componentes (numbered components) and related technologies
capacities["subcategory"] = capacities["subcategory"].apply(remove_numbers)

capacities = capacities.groupby("subcategory")["value"].sum()

fig_capacities = capacities.hvplot.bar(
    ylabel="[MW, MWh, m3, t]",
    title="Installed technology capacities",
    rot=60,
    shared_axes=False,
)


## +++ Combine figures
fig = (fig_cost + fig_capacities).cols(1)

## +++ Save figures
for fp in snakemake.output["fig"]:
    hvplot.save(fig, fp)