In [None]:
import pypsa
import numpy as np
import pandas as pd
import re
import pickle
from pathlib import Path
from _helpers import calculate_annual_investment, calculate_annuity
from _helpers import extract_technology, get_bus_unit

from _helpers import configure_logging
import logging
logger = logging.getLogger(__name__)

def _do_units_match(unit1, unit2):
    '''Rough checker if two units match.

    Catches mismatches between e.g. t, m^3 and W.
    Does not catch order of magnitude mismatches by SI prefixes.'''

    def _stripper(u):
        import re
        # remove optional Si prefixes 'M', 'k' and per hour ('/h', 'h') indicators
        return re.match('[Mk]?(.+?)\/?h?$', re.split('_|-', u)[0]).groups()[0]

    return _stripper(unit1) == _stripper(unit2)

def create_network():
    """Create the pypsa network scaffolding for the ESC."""
    
    # Modify PyPSA 'Link' component to allow for 2 output busses by overwriting component_attrs
    # c.f. https://www.pypsa.org/examples/chp-fixed-heat-power-ratio.html
    
    # Load additional components
    with open(snakemake.input["additional_components"], "rb") as f:
        override_component_attrs = pickle.load(f)

    # Create network with modified link-component
    network = pypsa.Network(override_component_attrs=override_component_attrs)

    # Load network components from csv files
    network.import_from_csv_folder(snakemake.input["network"])
    
    # Equally weighted snapshots, year defined via config
    year = snakemake.config["scenario"]["year"]
    # Handle leap year by dropping 29th of Februray
    snapshots = pd.date_range(str(year),str(year+1), freq="H", closed="left")
    snapshots = filter(lambda x: not((x.month==2) & (x.day==29)), list(snapshots))
    snapshots = [x.strftime('%Y-%m-%d %H:%M:%S') for x in snapshots]
    network.set_snapshots(snapshots=snapshots)
    
    return network

def attach_efficiencies(network):
    """Attach dedicated efficiencies from file.

    The efficiencies are from an additional csv file and added to the links in the pypsa network
    Format for efficiencies.csv file:
    * "from" and "to" must substrings of the bus names
    * "process" must be a substring of the name of the link

    Return
    ------
    network : pypsa.network
        network with external efficiencies attached to all links.

    """
    efficiencies = pd.read_csv(snakemake.input["efficiencies"])

    def get_efficiency(tech, src_bus, tar_bus):

        tech = extract_technology(tech)
        src = extract_technology(src_bus)
        tar = extract_technology(tar_bus)

        efficiency = efficiencies[(efficiencies['process'] == tech) &
                                 (efficiencies['to'] == tar) &
                                 (efficiencies['from'] == src)]

        if efficiency.empty is True:
            return np.nan

        # Check if all units match
        src_unit = get_bus_unit(src_bus, network)
        tar_unit = get_bus_unit(tar_bus, network)
        unit_mismatch = None
        if (efficiency[['from_unit','to_unit']] == 'p.u.').any(axis=None) == True: #'==' b/c pd.any returns np.bool
            
            if (efficiency[['from_unit','to_unit']] == 'p.u.').all(axis=None) == False: #'==' b/c pd.any returns np.bool
                unit_mismatch = 'One efficiency in [p.u.], but the other one not.'
            elif _do_units_match(src_unit, tar_unit) is False:
                unit_mismatch = f'Unit of bus {src_bus} [{src_unit}] does not match {tar_bus} [{tar_unit}].'

        elif _do_units_match(src_unit, efficiency['from_unit'].item()) is False:
            unit_mismatch = (f'Source bus unit {src_bus} [{src_unit}] does not match unit '
                             f'in registered efficiencies.csv [{efficiency["from_unit"].item()}].')
        elif _do_units_match(tar_unit, efficiency['to_unit'].item()) is False:
            unit_mismatch = (f'Target bus unit {tar_bus} [{tar_unit}] does not match unit '
                             f'in registered efficiencies.csv [{efficiency["to_unit"].item()}].')

        if unit_mismatch:
            raise ValueError(f'Mismatching units for {tech}: {unit_mismatch}.')


        return efficiency['efficiency'].item()

    links = network.links

    for idx, row in links.iterrows():

        lead_efficiency = get_efficiency(extract_technology(row.name), row['bus0'], row['bus1'])
        links.loc[idx, 'efficiency'] = lead_efficiency

        additional_buses = {c for c in links.columns if c.startswith('bus') and row[c] != ""}-{'bus0','bus1'}
        for b in additional_buses:

            # by design decision all buses busn (n>1, e.g. bus2, bus3, ...) either:
            # case 1. contribute to the output to bus1, e.g. bus2 feeds into bus1
            # or
            # case 2. are few by bus0.
            # Try to retrieve both efficiencies: One should always be nan, the other one is taken.
            # If either one are nan or not nan, throw an error.
            # 
            # Case 1.:
            # Efficiencies are provided for the conversion from bus2 to bus1
            # and are thus weighted by the primary efficiency of bus1
            # Efficiencies are have to become negative to correctly account for the flow.
            follow_efficiency = (-1) * lead_efficiency / get_efficiency(extract_technology(row.name), row[b], row['bus1'])

            # Case 2.:
            # Efficiencies are provided for conversion from bus0 to busn.
            # This type of efficiency does not need to be adjusted.
            regular_efficiency = efficiency = get_efficiency(extract_technology(row.name), row['bus0'], row[b])
            
            e = np.array([regular_efficiency, follow_efficiency])
            
            if np.isnan(e).all():
                logger.error(f"No efficiency found for link {row.name} between "
                               f"{row[b]} <-> ({row['bus0']} or {row['bus1']}).")
            elif np.isnan(e).sum() == 0:
                logger.error(f"Two efficiencies found for link {row.name} between "
                             f"{row[b]} <-> ({row['bus0']} and {row['bus1']}). "
                             f"Efficiency must by unambigious.")
            else:
                links.loc[idx, 'efficiency'+b.replace('bus','')] = e[~np.isnan(e)][0]

    return network

def override_costs_for_special_cases(n):

    # battery inverter represented by two links (charging and discharging),
    # while costs in cost data are for bidirectional inverter --> correction here
    links = n.links
    idx = links.filter(like='battery inverter', axis=0).index
    links.loc[idx, 'capital_cost'] /= 2.
    
    # LOHC chemical can enter the model through generators starting with "LOHC chemical"
    # to substitute for lost LOHC (~0.01% per cycle).
    # The cost for these generators is not determined by their production capacitiy,
    # but by their production per t of LOHC chemical.
    # Correct this here, assumed name of affected generators
    # 'LOHC chemical ...'
    generators = n.generators
    idx = generators.filter(like='LOHC chemical', axis=0).index
    generators.loc[idx, 'marginal_cost'] = generators.loc[idx, 'capital_cost']
    generators.loc[idx, 'capital_cost'] = 0.
    
    # By default LOHC implicitly enters the models through stores (due to the e_cyclic property)
    # The investment costs of stores must also account for the cost of chemicals, not only the storage
    if 'LOHC' in network.name :
        unloaded_storage_name = 'LOHC unloaded DBT storage'
        loaded_storage_name = 'LOHC loaded DBT storage'
        
        # Cost per 1 t LOHC (unloaded)
        lohc_cost = n.generators.loc['LOHC chemical (exp)', 'marginal_cost']

        # Raw costs per 1t of unloaded LOHC for unloaded storage
        idx = n.stores.filter(like=unloaded_storage_name, axis=0).index
        n.stores.loc[idx, 'capital_cost'] += lohc_cost

        # Modified costs per t of loaded LOHC: 1t loaded contains 5.6 wt-% H2 (H18-DBT effective discharge ratio)
        # Do not account for the H2 costs - this is covered by the model, as the store
        # has to be recharged eventually (e_cyclic attribute)
        loaded_unloaded_ratio = pd.read_csv(snakemake.input['efficiencies'],
                                            index_col=['process','from','to']).loc['LOHC hydrogenation',
                                                                                   'LOHC (unloaded)',
                                                                                   'LOHC (loaded)']['efficiency']
        loaded_lohc_cost = lohc_cost/loaded_unloaded_ratio
        
        idx = n.stores.filter(like=loaded_storage_name, axis=0).index
        n.stores.loc[idx, 'capital_cost'] += loaded_lohc_cost
        
        # LOHC can also enter through ships cargo store
        # Outbound and inbound journeys identical, e_nom_max is determined by loaded state
        idx = n.stores.filter(regex='\d+ cargo', axis=0).index
        n.stores.loc[idx, 'capital_cost'] += loaded_lohc_cost
        
    return n

def attach_costs(network):
    """
    Attach the overnight investment costs (capital costs) to the network.
    
    Costs are calculated from investment costs and FOM using EAC method
    and wacc as specified via config/snakemake.input files.
    Components name need to follow the scheme '<name> (<exp|imp>)'
    where '<name>' must correspond to the component in the costs.csv file.
    
    Requires efficiencies of the network to be already attached.
    """
    wacc = pd.read_csv(snakemake.input['wacc'], comment='#', index_col="region")
    wacc = wacc.loc[snakemake.wildcards["from"], snakemake.config["scenario"]["wacc"]]
    
    costs = pd.read_csv(snakemake.input['costs'], index_col=['technology','parameter'])  
        
    def attach_component_costs(network, component):    

        components = getattr(network, component)
        for idx, row in components.iterrows():

            try:
                tech = costs.loc[extract_technology(idx)]
            except KeyError:
                logger.info(f"No cost assumptions found for {idx}.")
                continue

            # Compare units between bus and cost data; scale investment on-demand
            investment_unit = tech.loc['investment']['unit'].replace("EUR/","").replace("(","").replace(")","")

            investment_factor = 1.
            
            # Determine unit of the bus - depends on component type
            # stores -> attached to one bus, situation clear
            # generators -> attached to one bus, situation clear
            # links -> attached to >=2 buses, use additional column in .csv to determine the bus to scale to
            bus_unit = bus0_unit = bus1_unit = None
            if (
                component=='stores' or
                component=='generators'
                ):
                bus_unit = network.buses.loc[row['bus']]['unit']
            elif component=='links':
                
                if row['scale_costs_based_on'] not in ['bus0','bus1']:
                    raise NotImplementedError(f'Scaling of costs for link {idx} to others than bus0 or bus1 not implemented.')
                
                bus_unit = network.buses.loc[row[row['scale_costs_based_on']]]['unit']
                if row['scale_costs_based_on']=="bus1":
                    # bus1 is output by convention; cost in output units, i.e. scale to input units
                    investment_factor *= row['efficiency']
                    
            # Consistency check: Correct units (ignore first letter for prefixed values)
            # Warning: Does not catch case were bus unit is only a single letter
            if _do_units_match(investment_unit, bus_unit) is False:
                raise ValueError(f'Could not find matching cost data for {component} "{idx}": '
                                 f'Expected {bus_unit} based on network, but found {investment_unit} in cost data.')
            
            prefix_bus_unit = bus_unit[0]
            prefix_investment_unit = investment_unit[0]
            
            if prefix_bus_unit == prefix_investment_unit:
                investment_factor *= 1.
            elif prefix_bus_unit == "M" and prefix_investment_unit == "k":
                investment_factor *= 1.e3
            else:
                raise ValueError(f"Cannot scale between {prefix_bus_unit} and {prefix_investment_unit} "
                                 f"for {idx} costs.")

            # Some technologies are without FOM values
            # (e.g. battery capacity where FOM is attributed to the link/inverter/charger capacities)
            try:
                fom = tech.loc['FOM','value']
            except KeyError:
                logger.info(f"No FOM for {idx}, assuming 0%.")
                fom = 0.

            capital_cost = calculate_annuity(tech.loc['investment','value']*investment_factor,
                                             fom, tech.loc['lifetime','value'], wacc)
            components.loc[idx, 'capital_cost'] = capital_cost
        
        return network

    network = attach_component_costs(network, 'links')
    network = attach_component_costs(network, 'stores')
    network = attach_component_costs(network, 'generators')
    
    return network

def scale_transportation_with_distance(n, link_types=['HVDC overhead','pipeline']):
    """Scale transportation of chemical energy carriers by distance.
    
    Changes costs and efficiencies for links related to the link_types based on the distance
    between exporter and importer. Does not touch the efficiencies or costs or shipping
    (they are treated separately).
    """
    
    links = n.links
    distances = pd.read_csv(snakemake.input['distances'], comment='#', quotechar='"')

    mapping = {
        'HVDC overhead':
            {
                'distance_type': 'as-the-crow-flies',
                'detour_factor_key': 'transmission_line'
            },
        'pipeline':
            {
                'distance_type': 'as-the-crow-flies',
                'detour_factor_key': 'pipeline'
            }
    }

    distances = distances[(distances['region_a']==snakemake.wildcards['from']) &
                          (distances['region_b']==snakemake.wildcards['to'])].set_index('type')

    efs = [c for c in links.columns if c.startswith('efficiency')]

    for link_type in link_types:

        m = mapping[link_type]
        detour_factor = snakemake.config['detour_factors'][m['detour_factor_key']]
        distance = distances.loc[m['distance_type'], 'value']

        for idx, row in links.query('name.str.contains(@link_type)', engine='python').iterrows():
            links.loc[idx, 'length'] = distance
            links.loc[idx, 'capital_cost'] *= distance # capital cost in EUR/km

            links.loc[idx, efs] = np.sign(row[efs].astype(np.float))*(np.abs(row[efs])**(distance/1.e3)) # efficiencies in p.u./1000km; avoid imaginary numbers for negative efficiencies as base (>=2 input buses for links)

    return n

def add_shipping(n):
    """Adds optional shipping routes to the network.
    
    Checks whether file "<ESC>/ships.csv" exists and - if it does - constructs
    a shipping route with multiple convoys (as optimisation options) for this route
    using standard PyPSA components.
    
    Limitation: ONLY SUPPORTS ONE SHIPPING CONNECTION PER NETWORK AT THE MOMENT!
    """
    
    fn = Path(snakemake.input['network'])/"ships.csv"

    # Network without shipping routes
    if not fn.exists():
        return n
    else:
        ships = pd.read_csv(fn, comment='#', index_col='name')

    props = pd.read_csv(snakemake.input['shipping_properties'], comment='#', index_col=['name','variable'])
    
    distances = pd.read_csv(snakemake.input['distances'], comment='#', quotechar='"')
    
    if len(ships.index) != 1:
        raise ValueError("Number of shipping lanes defined in ships.csv must be exactly one.")
        
    ship = ships.iloc[0]

    props = props.loc[ship.name]

    loading_time = np.int(np.floor(props.loc['(un-) loading time', 'value']))
    unloading_time = loading_time
    loading_rate_pu = 1./loading_time
    unloading_rate_pu = loading_rate_pu

    distance = distances.query(f"""region_a == '{snakemake.wildcards['from']}' and """
                    f"""region_b == '{snakemake.wildcards['to']}' and """
                    f"""type == 'sea route'""", engine='python')['value'].item()

    travel_time = np.int(np.ceil(distance / props.loc['average speed','value']))

    # Round trip time for a convoy (loading, travel, unloading, return trip)
    round_trip_time = loading_time + travel_time + unloading_time + travel_time

    # Number of full journeys (round-trip-journey) possible for convoy along sea route
    journeys = np.int(np.floor(n.snapshots.shape[0]/round_trip_time))

    # By constructing the tightest shipping schedule starting at the beginning of the year
    # we have this amount of hours were the importing habour is not served...
    annual_shipping_gap = n.snapshots.shape[0]%round_trip_time

    # ... as we later construct additional shipping convoys by simply shifting the schedule,
    # this will create a nasty gap in the supply chain, resulting in weired results in the optimisation.
    # We avoid this by smoothing the supply: the shipping duration is artificially prolonged to reduce this gap
    # Can be thought of as something like a buffer, which is near identically distributed across all journeys
    additional_forward_travel_time = np.int(np.floor(annual_shipping_gap / journeys / 2))
    # return trip can take a bit longer (max 1 additional snapshot)
    additional_return_travel_time = np.int(np.floor(annual_shipping_gap / journeys - additional_forward_travel_time))

    forward_travel_time = travel_time+additional_forward_travel_time
    return_travel_time = travel_time+additional_return_travel_time
    
    updated_round_trip_time = loading_time + forward_travel_time + unloading_time + return_travel_time
    logger.info(f"Increasing the round-trip travel time from {round_trip_time}h to "
                f"{updated_round_trip_time}h (+{(updated_round_trip_time/round_trip_time-1)*100:.2f}%) "
                f"to achieve more levelled supply by ship.")

    # One round-trip loading schedule for earliest convoy in year
    loading_schedule = np.concatenate(([loading_rate_pu]*loading_time,
                                        [0]*forward_travel_time,
                                        [0]*unloading_time,
                                        [0]*return_travel_time))

    # One round-trip unloading schedule for earliest convoy in year
    unloading_schedule = np.concatenate(([0]*loading_time,
                                         [0]*forward_travel_time,
                                         [unloading_rate_pu]*unloading_time,
                                         [0]*return_travel_time
                                        ))

    # Numbers of convoys (base convoy + convoys which can be loaded without competing for the
    #  loading infrastructure while the base convoy is on its way)
    # if loading_time == unloading_time there this approach results in no clashes for the unloading infrastruct.
    convoy_number = 1 + np.int(np.floor((forward_travel_time+return_travel_time+unloading_time)/loading_time))

    # Create full year schedule for loading:
    # Left-over days at end of year (which can not be used for a full round-trip journey)
    # are filled with 0s (=no journey/anchored)
    loading_schedule = np.concatenate([loading_schedule]*journeys)
    tmp = np.zeros(n.snapshots.shape[0])
    tmp[:loading_schedule.shape[0]] = loading_schedule
    loading_schedule = tmp

    # Create full year schedule for unloading:
    # (Basically the same as for loading, could use np.roll here as rates and durations for loading
    #  and unloading are identical in the current model version)
    # Left-over days at end of year (which can not be used for a full round-trip journey)
    # are filled with 0s (=no journey/anchored)
    unloading_schedule = np.concatenate([unloading_schedule]*journeys)
    tmp = np.zeros(n.snapshots.shape[0])
    tmp[:unloading_schedule.shape[0]] = unloading_schedule
    unloading_schedule = tmp

    ## How the schedules look like
    # plt.plot(loading_schedule, label='loading')
    # plt.plot(unloading_schedule, label='unloading')
    # plt.legend()

    # Calculate energy transport efficiency for the trip
    # losses due to boil-off
    boil_off = (1-props.loc['boil-off','value']/100)**forward_travel_time
    # losses from ship propulsion (outward and return journey)
    energy_demand = (1-2*distance*props.loc['energy demand','value']/props.loc['capacity','value'])
    # take whatever requires more energy (boil-off can be used by propulsion or propulsion uses cargo)
    shipping_efficiency = np.min([energy_demand, boil_off])
    
    # Additional energy losses from (un-) loading the cargo
    loading_efficiency = (1-props.loc['(un-) loading losses','value']/100)
    unloading_efficiency = loading_efficiency

    # Calculate investment costs per gross MWh capacity
    wacc = pd.read_csv(snakemake.input['wacc'], comment='#', index_col="region")
    wacc = wacc.loc[snakemake.wildcards["from"], snakemake.config["scenario"]["wacc"]]
    
    costs = pd.read_csv(snakemake.input['costs'], index_col=['technology','parameter'])
    costs = costs.loc[ship.name]

    # Consistency check: whether units match
    unit_costs = costs.loc['capacity']['unit']
    unit_bus = network.buses.loc[ship['bus0']]['unit']
    if unit_costs.startswith(unit_bus) is False:
        raise ValueError(f"Unit mismatch for shipping capacity between network ({unit_bus}) "
                         f"and cost database ({unit_costs}).")
    
    try:
        capital_cost = calculate_annuity(costs.loc['investment','value'],
                                     costs.loc['FOM','value'],
                                     costs.loc['lifetime','value'],
                                     wacc)
        capital_cost /= costs.loc['capacity','value']
    except:
        raise ValueError(f"Exception calculating capital cost for {ship.name}."
                         f"Missing cost or shipping property entries")
        
    if distance == 0:
        # Treat the special case, where distance between exporter and importer is zero.
        # (e.g. same exporter as importer region)
    
        logger.info(f"No distance between exporter and importer. "
                    f"Adding a direct pseudo connection without shipping schedule.")
        
        convoy_number = 1
        
        loading_schedule = np.ones(n.snapshots.shape[0])
        unloading_schedule = loading_schedule
        
    else:
        logger.info(f"Adding {convoy_number} shipping convoys to shipping route.")        
        
    for i in range(convoy_number):

        ship_bus = f"{ship.name} convoy {i+1}"

        n.add("Bus",
              name=ship_bus,
              carrier=network.buses.loc[ship.loc['bus0'],'carrier'],
              unit=network.buses.loc[ship.loc['bus0'],'unit'],
             )

        n.add("Store",
              name=f"{ship_bus} cargo",
              bus=ship_bus,
              e_nom_extendable=True,
              e_cyclic=True, # Ships may starting at end of year and start deliver at the beginning of the year
              capital_cost=capital_cost,
             )

        n.add("Link",
              name=f"{ship_bus} loading",
              bus0=ship.loc['bus0'],
              bus1=ship_bus,
              efficiency=loading_efficiency,
              # Capacity expansion at point of export/depature
              # ship capacity taken into account as gross capacity before transport losses/demand
              p_nom_extendable=True,
              p_nom=0.,
              capital_cost=0.,
              # Loading extracts energy from bus and happens at max rate and at fixed times
              # Rolling the schedules ensures there is no overlap between convoys
              p_min_pu=np.zeros_like(loading_schedule),
              p_max_pu=np.roll(loading_schedule,i*loading_time),
             )

        n.add("Link",
              name=f"{ship_bus} unloading",
              bus0=ship_bus,
              bus1=ship.loc['bus1'],
              efficiency=shipping_efficiency*unloading_efficiency,
              p_nom_extendable=True,
              p_nom=0.,
              capital_cost=0.,
              # Unloading at max rate and at fixed times
              # Rolling the schedules ensures there is no overlap between convoys
              p_min_pu=np.zeros_like(unloading_schedule),
              p_max_pu=np.roll(unloading_schedule,i*loading_time),
             )    
        
    return n

In [None]:
if __name__ == "__main__":

    configure_logging(snakemake)
    
    # DO NOT CHANGE THIS ORDER; dependencies between methods not explicit
    network = create_network()
    network = attach_efficiencies(network)
    network = attach_costs(network)
    network = scale_transportation_with_distance(network)
    network = add_shipping(network)
    
    network = override_costs_for_special_cases(network)

    network.export_to_netcdf(snakemake.output['network'])