In [None]:
date_range=snakemake.params.date_range
year=snakemake.wildcards.year

date_range=[year+"-"+date for date in date_range]

In [None]:
import logging

import atlite

logging.basicConfig(level=logging.INFO)


import geopandas as gpd
import pandas as pd
import xarray as xr
from shapely.geometry import Polygon

In [None]:
atlite.__version__

In [None]:
xr.__version__

In [None]:
%psource atlite.Cutout.convert_and_aggregate

# Desired regions


In [None]:
desired_regions = snakemake.params.aggregated_regions
desired_regions.sort()
desired_regions

## Shape level

To get the desired data at shape level, the first input is the shapefile itself.


In [None]:
europe = (
    gpd.read_file(snakemake.input.euroshape)
    .set_index(["CNTR_CODE"])
)

In [None]:
europe

In [None]:
year = snakemake.wildcards.year

In [None]:
europe.plot(figsize=(15, 15));

In [None]:
cutout = atlite.Cutout(path=snakemake.input.weatherdata, chunks={"time": 2000})

In [None]:
cutout.prepare()

In [None]:
europe = europe.sort_index()

# Hydro


In [None]:
fn = snakemake.input.eiahydrogen

In [None]:
# in billion KWh/a = TWh/a
eia_hydro_gen = (
    pd.read_csv(fn, skiprows=4, index_col=1, na_values=[" ", "--"])
    .drop(["Unnamed: 0", "Unnamed: 2"], axis=1)
    .dropna(how="all")
)
eia_hydro_gen

In [None]:
import pycountry

In [None]:
countries_iso2c = []
for country in eia_hydro_gen.index.values:
    try:
        countries_iso2c.append(pycountry.countries.get(name=country).alpha_2)
    except AttributeError:
        countries_iso2c.append(country)

In [None]:
eia_hydro_gen.index = pd.Index(countries_iso2c, name="countries")
eia_hydro_gen

In [None]:
eia_hydro_gen.rename(
    index={"Kosovo": "KV", "GB": "UK", "Czech Republic": "CZ", "Macedonia": "MK"},
    inplace=True,
)
eia_hydro_gen

In [None]:
eia_hydro_gen = eia_hydro_gen.T
eia_hydro_gen

In [None]:
eia_hydro_gen = eia_hydro_gen * 1e6  # in MWh/a
eia_hydro_gen

In [None]:
europe.index

In [None]:
eia_stats = eia_hydro_gen.reindex(columns=europe.index.drop_duplicates())
eia_stats

In [None]:
eia_stats.columns

In [None]:
europe2 = europe.dissolve(by=europe.index)

In [None]:
europe2

In [None]:
europe2.index.name = "countries"

In [None]:
europe2

In [None]:
inflow = cutout.runoff(
    shapes=europe2,
    smooth=True,
    lower_threshold_quantile=True,
    normalize_using_yearly=eia_stats,
).sel(time=slice(date_range[0], date_range[1]))

In [None]:
# scale the inflow by installed capacities
# df_installed_cap = pd.read_csv(
#     snakemake.input.hydroinstalledcap,
#     sep="\t",
#     usecols=["Year", "AreaName", "MapCode", "ProductionType", "highRES_Europe"],
# )
# df_installed_cap = df_installed_cap.loc[
#     df_installed_cap["ProductionType"] != "Hydro Pumped Storage"
# ]

df_installed_cap = (pd.read_csv(
    "resources/jrc-hydro-power-plant-database.csv")
                    .replace({"EL":"GR"})
                    # Remove countries not in desired_regions
                    .query("type != 'HPHS' and country_code in @desired_regions")
                    .sort_values("country_code")
                    .rename(columns={"installed_capacity_MW":"cap_mw","country_code":"iso2"})
                    .loc[:,["iso2","type","cap_mw","lat","lon"]])

In [None]:
# df_installed_cap.MapCode.unique()

In [None]:
# df_cap_shares = (
#     df_installed_cap.loc[:, ["MapCode", "ProductionType", "highRES_Europe"]]
#     .set_index(["ProductionType", "MapCode"])
#     .unstack("ProductionType")
#     .loc[:, "highRES_Europe"]
#     .assign(
#         Hydro=lambda x: x["Hydro Run-of-river and poundage"]
#         + x["Hydro Water Reservoir"],
#         share_ror=lambda x: x["Hydro Run-of-river and poundage"] / x["Hydro"],
#         share_res=lambda x: x["Hydro Water Reservoir"] / x["Hydro"],
#     )
#     .loc[:, ["share_ror", "share_res"]]
# )

df_cap_shares = (
    df_installed_cap.loc[:, ["iso2", "type", "cap_mw"]]
    .groupby(["type","iso2"]).sum()
    .unstack("type")
    .fillna(0)
    .loc[:, "cap_mw"]
    .assign(
        Hydro=lambda x: x["HROR"]
        + x["HDAM"],
        share_ror=lambda x: x["HROR"] / x["Hydro"],
        share_res=lambda x: x["HDAM"] / x["Hydro"],
    )
    .loc[:, ["share_ror", "share_res"]]
    .rename_axis(index={"iso2":"countries"})
)



df_cap_shares

In [None]:
inflow

In [None]:
inflow_res = (
    inflow.assign_coords(time=range(0, len(inflow.time)))
    .assign_coords(technology="HydroRes")
    .expand_dims("technology")
    .stack(final=["time", "countries", "technology"])
    .round(2)
    .to_pandas()
    .reset_index()
)

inflow_res.columns = inflow_res.columns.map(str)
inflow_res = inflow_res.rename(columns={"0": "value"})

In [None]:
df_cap_shares.loc[:, ["share_res"]]

In [None]:
inflow_res

In [None]:
inflow_res.groupby("countries").sum()

In [None]:
df_installed_res = (
    df_installed_cap.loc[
        df_installed_cap["type"] == "HDAM"
    ]
    .loc[:, ["iso2","cap_mw"]]
    .groupby("iso2").sum()
    )

(
    inflow_res.set_index(["countries", "time", "technology"])
    .mul(
        df_cap_shares.rename(columns={"share_res": "value"}),
        axis="index",
        level="countries",
    )
    .drop(columns={"share_ror"})
    .reset_index()
    .set_index("time")
    .round(0)
    # Added query to remove countries which either: i) have no hydro 
    # in current dataset or ii) have no hydrores (0 installed cap). 
    # The mul above would set countries in i) to have NA inflows and 
    # countries in ii) to have 0 inflows
    .query("countries in @df_installed_res.index.values")
    .to_csv(snakemake.output["hydroresinfl"])
)

## ROR capfac


In [None]:
df_cap_shares.loc[:, ["share_ror"]]

In [None]:
if snakemake.wildcards.spatial == "nuts2":

    ror_inflow = (
        inflow.to_pandas()
        .T.stack()
        .to_frame()
        .rename(columns={0: "value"})
        .mul(
            (
            df_cap_shares.rename(columns={"share_ror": "value"})
                .loc[:, ["value"]])
            ))

    df_installed_ror=df_installed_cap.query("type == 'HROR'")

    df_installed_ror=gpd.GeoDataFrame(
                df_installed_ror,
                geometry=gpd.points_from_xy(df_installed_ror.lon, df_installed_ror.lat),
                             crs="epsg:4326")

    euro_nuts=gpd.read_file(snakemake.input.euroshape)
    
    # euro_nuts= (
    #         gpd.read_file(snakemake.input.euroshape)
    #         .replace({"GB": "UK", "EL": "GR"})
    #         .query("LEVL_CODE == 2 & CNTR_CODE in @desired_regions ")
    #         .rename(columns={"NUTS_ID": "index"})
    #         .loc[:,["index","CNTR_CODE","geometry"]]
    #         .set_index(["index"])
    #     )

    # Issue spotted below that using either iso2 from euro shapefile
    # or hydro dataset after join can lead to different country assignments 
    # for a given plant. This may go away when we use a higher res nuts
    # shapefile. For now, take nuts shapefile assignment.

    df_installed_ror = (gpd.sjoin(euro_nuts, df_installed_ror, op='contains')
                    .reset_index()
                    .loc[:,["index","CNTR_CODE","cap_mw"]]
                    .rename(columns={"CNTR_CODE":"iso2"}))
    
    ror_share =(df_installed_ror.groupby(["iso2","index"]).sum()
                        .div(df_installed_ror.groupby(["iso2"]).sum())
                        .drop(columns="index")
                        .rename_axis(index={"iso2":"countries"}))

    ror_inflow=(ror_share.reset_index()
                    .merge(ror_inflow.reset_index(),on="countries")
                    .rename(columns={"cap_mw":"ror_share_reg","value":"ror_inflow"})
                    .assign(ror_inflow_reg=lambda x: x["ror_share_reg"]*x["ror_inflow"])
                    .set_index(["index","time"])
                    .loc[:,["ror_inflow_reg"]])

    ror_capfac=(ror_inflow.rename(columns={"ror_inflow_reg":"value"})
            .div(
                df_installed_ror.groupby("index").sum()
                .rename(columns={"cap_mw":"value"}),
                axis="index", level="index")
            .loc[:,"value"])

In [None]:
if snakemake.wildcards.spatial == "region":

    # As we use jrc dataset currently there is no need to 
    # select for > 0 installed capacity as we select for
    # HROR only. May need this in future.

    df_installed_ror = (
    df_installed_cap.loc[
        df_installed_cap["type"] == "HROR"
    ]
    .set_index("iso2")
    .loc[:, "cap_mw"]
    .groupby("iso2").sum()
    .rename_axis(index="countries")
    .to_frame()
    .rename(columns={"cap_mw":"value"})
    )
    
    ror_capfac = (
        inflow.to_pandas()
        .T.stack()
        .to_frame()
        .rename(columns={0: "value"})
        .mul(
            (
                df_cap_shares.reset_index()
                .rename(columns={"share_ror": "value"})
                .set_index("countries")
                .loc[:, ["value"]]
            )
        )
        .div(df_installed_ror, axis="index", level="countries")
        .rename_axis(index={"countries":"index"})
        .reset_index()
        # Query seems to be a bit tricky with multiindex so reset
        # and apply to columns then set_index.
        .query("index in @df_installed_ror.index.values")
        .set_index(["index","time"])
        .loc[:, "value"]
        )

    df_installed_ror=(df_installed_ror.reset_index()
                    .assign(index=lambda x: x["countries"])
                    .rename(columns={"countries":"iso2"})
                     )

In [None]:
(df_installed_ror.groupby(["iso2","index"]).sum()
    .reset_index()
    .assign(out=lambda x: "HydroRoR" + "." + x["iso2"] + "." + x["index"] + " inf")
    .loc[:,"out"]
    .to_csv(snakemake.output["areashydro"],header=False,index=False))

In [None]:
ror_capfac

In [None]:
ror_capfac = ror_capfac.where(ror_capfac <= 1, 1)

In [None]:
#ror_capfac.unstack().T.plot(subplots=True, figsize=(10, 50), ylim=(0, 1.1))

In [None]:
capacity_factor_hydro_ror = ror_capfac.to_xarray() #.rename({"countries": "index"})

In [None]:
capacity_factor_hydro_ror

In [None]:
capacity_factor_hydro_ror.name = "HydroRoR"

In [None]:
capacity_factor_hydro_ror.time.size

In [None]:
capacity_factor_hydro_ror.assign_coords(
    technology=lambda x: x.name, time=range(0, capacity_factor_hydro_ror.time.size)
).expand_dims("technology").stack(final=["time", "technology", "index"]).round(
    2
).to_pandas().to_csv(snakemake.output["hydrororcapfac"], header=False)