In [None]:
import logging

import atlite
import shapely.validation as shpval
import fiona
import geopandas as gpd
import pandas as pd
import pycountry

logger = logging.getLogger(__name__)

from _helpers import configure_logging

if __name__ == "__main__":

    configure_logging(snakemake)

    # Determine layer names in GADM file
    layers = fiona.listlayers(snakemake.input["gadm"])

    # Only allow layers 0 to 2 (no GADM 3 and 4) for better performance
    # by selectively loading. Layer number is always last character in layer name
    layers = [l for l in layers if int(l[-1]) < 3]

    # Sort ascending, layer 0, ..., layer 2; defines search order
    layers.sort()

    # Consume members from set if found. Search from coarse (GADM0) to fine (GADM1, 2)
    region_members = set(snakemake.params.region_members)

    # Holds all found geometries for region members from different layers
    member_geometries = []

    for layer in layers:
        # Load GADM and select all shapes associated with the region
        # in snakemake.config (i.e. the config files)

        gadm = gpd.read_file(snakemake.input["gadm"], layer=layer)

        # Construct name of column to search in; column name "NAME_#" with "#"=layer number
        name_column = f"NAME_{int(layer[-1])}"

        # Select and temporarily store all region members from this GADM layer
        gadm = gadm[gadm[name_column].isin(region_members)]

        # Stow-away found matches
        if len(gadm):
            # Homogeneous column names across all layers:
            gadm = gadm[["NAME_0", name_column, "geometry"]]
            gadm.columns = ["country", "member_name", "geometry"]

            # Stowing
            member_geometries.append(gadm)

            # Remove found regions from search list
            region_members = region_members - set(gadm["member_name"].values)

        # Stop searching early if all region members have been found
        if len(region_members) < 1:
            break
        else:
            continue

    if len(member_geometries) == 0:
        logger.error(
            f"No matching entries on GADM found for region '{snakemake.wildcards['region']}'. "
            f"Requested region members: {region_members}."
            f"Check correct and matching spelling with https://gadm.org ."
        )

    # Combine found onland GADM geometries
    member_geometries = gpd.GeoDataFrame(
        pd.concat(member_geometries, ignore_index=True).reset_index(drop=True),
        crs=member_geometries[0].crs,
    )

    # Read EEZs for potential offshore locations and
    # add EEZs for all involved countries
    # (neglecting proximity to specified members for now)
    eez = gpd.read_file(snakemake.input["eez"])

    # Drop entries without ISO_TER1 entry
    # These are mostly small island states + Hawaii
    eez = eez.dropna(axis=0, how="any", subset=["ISO_TER1"])

    # Determine associated country names for EEZs
    eez["country"] = eez["ISO_TER1"].map(
        lambda c: pycountry.countries.get(alpha_3=c).name
    )

    # Drop unnecessary columns
    eez = eez[["country", "geometry"]]

    # Relevant countries
    eez = eez[eez["country"].isin(member_geometries["country"].unique())]
    
    # .buffer(...) operation used later is significantly faster and more efficient
    # on exploded shape compared to MultiPolygon.
    # -> explode -> buffer -> union
    member_geometries = member_geometries.explode(ignore_index=True)

    ## Switch to CRS with m[etres] as unit
    
    # Use Mollweide CRS for estimating offshore distance and adjacency in m[etre]
    # Mollweide is not very accurate at high latitudes, but sufficient for
    # this initial step and large offshore_proximity values.
    # See: https://epsg.io/54009
    # and Usery and Seong (2001), doi:10.1559/152304001782153053
    crs_m = "ESRI:54009"
    crs_org = gadm.crs
    
    member_geometries = member_geometries.to_crs(crs_m)
    eez = eez.to_crs(crs_m)

    # For determining offshore regions we only need to buffer the boundary
    # of the onshore region + simplification doesn't hurt/might improve niceiness of shapes
    mgb = member_geometries.copy(deep=True)
    mgb["geometry"] = mgb.boundary
    mgb["geometry"] = mgb.simplify(100)

    # First only consider offshore regions which are adjacent to any onshore region
    # Use buffer to prevent small gaps to overeagerly exclude an offshore region
    eez = eez[eez.geometry.intersects(mgb.buffer(100).unary_union)]

    logger.info(
        f"{len(eez)} offshore region(s) found for region '{snakemake.wildcards.region}'."
    )
    # Determine offshore areas adjacent to onshore region and within a proximity radius
    if len(eez) > 0:

        # Select only offshore locations within <offshore_proximity> m[eters] of
        # an onshore location which is part of the region
        # (=offshore locations accessible from the region under consideration)
        offshore = eez.intersection(mgb.buffer(snakemake.params.offshore_proximity).unary_union)

        # Combine offshore region into MultiPolygon
        offshore = offshore.unary_union
        offshore = offshore.simplify(0)
        offshore = shpval.make_valid(offshore)

    else:
        offshore = None

    # Merge onshore regions into one
    onshore = member_geometries.unary_union
    onshore = onshore.simplify(0)
    onshore = shpval.make_valid(onshore)

    ## Combine resulting region masks and convert back to original CRS for saving
    region_masks = gpd.GeoDataFrame(
        gpd.GeoSeries(
            [onshore, offshore],
            index=["onshore", "offshore"],
            name="geometry",
            crs=crs_m,
        )
    )
    region_masks = region_masks.to_crs(crs_org)


    # Save all region geometries into single file
    region_masks.to_file(snakemake.output["gpkg"], driver="GPKG")