In [None]:
# TODO: add the GADM-EEZ Full pipe management here for download, join and merge both datasets.
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon, box
from shapely import speedups
from pathlib import Path
import os

speedups.enable()

In [None]:
#  FIXME: This will depends from where the notebook kernel is running so be careful
WORK_DIR = Path(os.getcwd())
BASE_DIR = f'{WORK_DIR.parents[2]}/datasets'

# @TODO: Add expected data files source as an environment variable.
assert BASE_DIR == '/home/jovyan/work/datasets', f'{BASE_DIR} is not the correct directory'

IN_FOLDER = Path(f'{BASE_DIR}/raw')
OUT_FOLDER = Path(f'{BASE_DIR}/processed')

extent_mask_path = Path(f'{BASE_DIR}/raw/extent-layer-creation/test_extent_2000_simp_convex.shp')

In [None]:
def mask_gdf(df, extent_mask_path):
    """
    Get the extent mask from the extent_mask_path
    """
    extent = gpd.read_file(extent_mask_path)
    mask = df.to_crs(extent.crs).intersects(extent.unary_union)    
    return df.loc[mask].dropna(subset=['geometry']).reset_index(drop=True, inplace=True)

In [None]:
def interior_to_polygon(geom: Polygon):
  """
  Takes a Shapely geometry and returns interiors as Shapely geometry.
  """
  types = [geom.geom_type] if type(geom.geom_type) == str else geom.geom_type.tolist()
  # check geometry is a polygon 
  if types and 'Polygon' in types:
    # check it contains a ring
    ring = geom.is_ring
    if ring.any() and geom.interiors.to_list()[0]:
      # extract shapely LinearRing(s)
      rings_list = [Polygon(g) for g in geom.interiors.to_list()[0]]
      out = MultiPolygon(rings_list) if len(rings_list) > 1 else rings_list[0]
    else:
      print("No interior ring")
      out = "NA"
  else:
    print("Requires a Polygon")
    out = "NA"     
  return out

def exterior_to_polygon(geom):
  """
  Takes a Shapely geometry and returns exterior ring as Shapely geometry.
  """
  types = geom.geom_type.tolist() or []
  # check geometry is a polygon 
  if types and 'Polygon' in types:
    # check it contains a ring
    ring = geom.is_ring
    if ring.any() and geom.exterior.tolist()[0]:
      # extract shapely LinearRing
      out = Polygon(geom.exterior.tolist()[0])
    else:
      print("No exterior ring")
      out = "NA"
  else:
    print("Requires a Polygon")
    out = "NA"     
  return out

In [None]:
def execute_command(command):
  """
  Executes a command in the shell and returns the output.
  """
  try:
    out = subprocess.check_output(command, shell=True)
  except subprocess.CalledProcessError as e:
    out = e.output
  return out

In [None]:
# Filter gadm by extent

gadm = gpd.read_file(f'{BASE_DIR}/raw/gadm-eez/gadm_410-levels.gpkg', layer='ADM_0')
filtered_gadm = mask_gdf(gadm_eez, extent_mask_path)
filtered_gadm['area_m2'] = filtered_gadm.to_crs('epsg:3410').geometry.area
filtered_gadm['perimeter_m'] = filtered_gadm.to_crs('epsg:3410').geometry.length

filtered_gadm.to_file(f'{BASE_DIR}/processed/locations/gadm_filter_by_extent.gpkg', driver='GPKG')

In [None]:
# Load eez data and merge with gadm

In [None]:
o = f"{dataPath}_clean.shp"
execute_command(f'mapshaper-xl 16 -i snap {dataPath} \
        -clean gap-fill-area=30km2 \
        -dissolve GID_0 \
        -o format=shapefile {o} force')

In [None]:
# Filter GADM_EEZ by extent

gadm_eez = gpd.read_file(f'{IN_FOLDER}/gadm_eez.gpkg')

sub = mask_gdf(gadm_eez, extent_mask_path)
sub.plot()
sub.to_file(f'{IN_FOLDER}/gadm_eez_filter_by_extent.gpkg', driver='GPKG')