### The way to calculate the coastal length of mangroves is:
1.- Buffer extent data by 200m  
2.- Clip the buffered extent with the target feature  
3.- Clip the coastal extent with the clipped buffered extent  
4.- Calculate the length of the clipped coastal extent  
5.- repeat the process for each year extent and each location  

In [1]:
import os
import logging
from pathlib import Path
import requests

import json
import multiprocessing as mp

from shapely.geometry import Polygon, box, mapping
import fiona
import geopandas as gpd
import shapely.speedups
shapely.speedups.enable()
%run utils.ipynb

In [2]:
# LOCAL PATHS
#  FIXME: This will depends from where the notebook kernel is running so be careful
WORK_DIR =Path(os.getcwd())
BASE_DIR = f'{WORK_DIR.parents[3]}/datasets'

# @TODO: Add expected data files source as an environment variable.
assert BASE_DIR == '/home/jovyan/work/datasets', f'{BASE_DIR} is not the correct directory'

# variables

mangrove_extent_path = Path(f'{BASE_DIR}/raw/extent-layer-creation/gmw_v3_fnl_mjr_v314.gpkg')
layers = fiona.listlayers(mangrove_extent_path)



rtree.index.Index(bounds=[-179.99998854118687, -58.44946994568893, 179.99998854118684, 74.70884000396096], size=70)

### This creates a buffered version of the data for each year

In [4]:
for layer_name in layers:
    print(layer_name)
    mangrove_extent_df = gpd.read_file(mangrove_extent_path, layer=layer_name
                                      ).to_crs('epsg:3410').buffer(200)
    gpd.GeoDataFrame({"geometry": mangrove_extent_df.clip(gpd.GeoSeries({"geometry": box(-180,-50, 180, 40)}, crs='EPSG:4326'
        ).to_crs('EPSG:3410')
    ).to_crs('EPSG:4326').unary_union}, 
                     crs='EPSG:4326'
    ).to_file(f'{BASE_DIR}/raw/extent-layer-creation/{layer_name}-bufered.shp')

mng_mjr_1996
mng_mjr_2007
mng_mjr_2008
mng_mjr_2009
mng_mjr_2010
mng_mjr_2015
mng_mjr_2016
mng_mjr_2017
mng_mjr_2018
mng_mjr_2019
mng_mjr_2020


### This applies a small reduction on the vertex number so spatial intersection is faster

In [5]:
for layer_name in layers:
    input_file = f'{BASE_DIR}/raw/extent-layer-creation/{layer_name}-bufered.shp'
    output_file= f'{BASE_DIR}/processed/{layer_name}-bufered-simp.shp'
    cmd = f'mapshaper-xl 16gb -i {input_file} snap \
        -simplify 0.7 dp planar keep-shapes \
        -o  {output_file} format=shapefile force'
    execute_command(cmd)

Allocating 16 GB of heap memory
[i] Snapped 10119 points
[simplify] Repaired 29 intersections
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_1996-bufered-simp.shp
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_1996-bufered-simp.shx
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_1996-bufered-simp.dbf
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_1996-bufered-simp.prj
INFO:root:Task created
Allocating 16 GB of heap memory
[i] Snapped 10391 points
[simplify] Repaired 14 intersections
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_2007-bufered-simp.shp
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_2007-bufered-simp.shx
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_2007-bufered-simp.dbf
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_2007-bufered-simp.prj
INFO:root:Task created
Allocating 16 GB of heap memory
[i] Snapped 10453 points
[simplify] Repaired 30 intersections
[o] Wrote /home/jovyan/work/datasets/processed/mng_mjr_2

## The rest of the calculations will happen on postgres due to the complexity.

```sql
-- This query creates a subset of the coastline with the id of each location attached to each line segment
select st_intersection(c.the_geom, lvg.the_geom) as the_geom, location_i 
into coastline_subset
from coastline c 
inner join test_extent_2000_simp_convex tesc on st_intersects(c.the_geom, tesc.the_geom)
inner join locations_v3_gee lvg on st_intersects(c.the_geom, lvg.the_geom);

-- Creating a spatial index later on the geom column
CREATE INDEX coastline_subset_geom_spgist_x
  ON coastline_subset USING spgist (the_geom);
```

```sql
--- we will need to perform this for each year of data
select sum(ST_Length(st_intersection(s.the_geom, f.the_geom))) as mangrove_coastal_lenght, location_i, 2016 as year
from coastline_subset_simp_70_p s 
inner join "mng_mjr_2016-bufered" f on st_intersects(s.the_geom, f.the_geom)
group by location_i; 
```

```sql
--- Dont forget to ensure parallelization on the heavy queries
set max_parallel_workers = 24;
SET max_parallel_workers_per_gather = 24;
SET min_parallel_table_scan_size = '1kB';
```

```bash
# For data ingest in the DB
ogr2ogr -makevalid \
	-update -append \
	-geomfield the_geom \
	--config OGR_TRUNCATE YES \
	-nln mng_mjr_2015 -nlt PROMOTE_TO_MULTI \
	-t_srs EPSG:4326 -a_srs EPSG:4326 \
	-f PostgreSQL PG:"dbname=postgres host=localhost \
	port=4321 user=postgres password=postgres /mng_mjr_2015.json
 ```