# On the fly analysis

### Data ingest from google cloud storage

```bash
ogr2ogr -progress \
  -makevalid -overwrite \
  -nln eez_minus_mpa -nlt PROMOTE_TO_MULTI \
  -lco GEOMETRY_NAME=the_geom \
  -lco PRECISION=FALSE \
  -lco SPATIAL_INDEX=GIST \
  -lco FID=id \
  -t_srs EPSG:4326 -a_srs EPSG:4326 \
  -f PostgreSQL PG:"host=$POSTGRES_HOST port=$POSTGRES_PORT \
   user=$POSTGRES_USER password=$POSTGRES_PASSWORD \
   dbname=$POSTGRES_DB active_schema=$POSTGRES_SCHEMA" \
   -doo "PRELUDE_STATEMENTS=CREATE SCHEMA IF NOT EXISTS $POSTGRES_SCHEMA AUTHORIZATION CURRENT_USER;" "/vsizip/vsigs/$URL";
```


## Data analysis
Input call:

```bash
curl 'https://30x30.skytruth.org/functions/analysis/' \
  -H 'content-type: application/json' \
  --data-raw '{"id":"d7c9978f92fff5a373f2dec55e17bbab","type":"Feature","properties":{},"geometry":{"coordinates":[[[-22.791446197507895,9.57642078480319],[-13.563358667514933,11.633521660754937],[0.7068797809280056,5.048301327696478],[-7.5698585191689745,-4.074667696937624],[-22.791446197507895,9.57642078480319]]],"type":"Polygon"}}' \
  --compressed
```

Response:
    
```json
{locations_area:{"code":<location_iso>, "protected_area": <area>, "area":<location_marine_area>}, "total_area":<total_area>}
```


## Data preprocessing

We are going to use the intermidiate data from mpas and from eez, in order to create a dataset that can be used for spatial analysis.
The steps are:
1. Load both datasets
2. Create a difference dataset from the two (Substract the mpas from the eez)
3. disaggregate the eez dataset based on the iso3 codes
4. upload the data to google cloud storage


In [46]:
import geopandas as gpd
from pathlib import Path
from tqdm.asyncio import tqdm
import multiprocessing
import psutil
from functools import lru_cache
import math
import asyncio
from shapely.geometry import Polygon
from shapely.ops import unary_union

In [2]:
# Paths
basepath = Path("/home/mambauser/notebooks")
eez_path = basepath.parent.joinpath("data/eez_intermediate/eez_intermediate/eez_intermediate.shp")
mpas_path = basepath.parent.joinpath("data/mpa_intermediate/mpa_intermediate/mpa_intermediate.shp")
output_file = basepath.parent.joinpath("data/analysis/eez_minus_mpa/eez_minus_mpa.shp")
output_file.parent.mkdir(parents=True, exist_ok=True)

In [3]:
@lru_cache
def get_system_info(MIN_MEMORY_WORKER=8):
    """Get system info."""
    cpu_count = multiprocessing.cpu_count()
    available_memory = psutil.virtual_memory().available / (1024.0**3)
    recommended_partitions = math.floor(available_memory / MIN_MEMORY_WORKER)
    return {
        "cpu_count": cpu_count,
        "total_memory": psutil.virtual_memory().total / (1024.0**3),
        "available_memory": available_memory,
        "recommended_partitions": recommended_partitions
        if recommended_partitions < cpu_count
        else (cpu_count-1),
    }

In [4]:
def background(f):
    def wrapped(*args, **kwargs):
        return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs)

    return wrapped

In [5]:
def load_shapefile(path: str, np: int = 1):
    """Load shapefile into dask GeoDataFrame."""
    df = gpd.read_file(path)
    df.geometry = df.geometry.make_valid()
    return df

In [50]:
def collection_to_multipolygon(geometry_collection):
    """Convert collection of polygons to multipolygon."""
    print(type(geometry_collection.geoms))
    geom_list = [
        geom
        for geom in geometry_collection.geoms
        if geom.geom_type == "Polygon" or geom.geom_type == "MultiPolygon"
    ]
    return unary_union(geom_list)


def repair_geometry(geom):
    if not geom:
        return Polygon()
    elif not geom.is_valid:
        geom = collection_to_multipolygon(geom.buffer(0.0).make_valid())
    elif geom.geom_type == "GeometryCollection":
        geom = collection_to_multipolygon(geom)
    return geom

In [51]:
def get_matches(geom, df):
    """Get matches."""
    candidates = df.iloc[df.sindex.intersection(geom.bounds)]
    if len(candidates) > 0:
        candidates = candidates[candidates.intersects(geom)]
    return candidates

In [52]:
@background
def difference(geom_col_value, df, pbar):
    candidates = get_matches(geom_col_value, df)
    if len(candidates) > 0:
        geometry = repair_geometry(geom_col_value.difference(
            candidates.geometry.unary_union
        ))
    else:
        geometry = geom_col_value
    pbar.update(1)

    return geometry

In [53]:
async def create_difference(geodataframe1, geodataframe2):
    """Create difference between two GeoDataFrames."""
    # we build the spatial index for the larger GeoDataFrame

    result = geodataframe1.copy()
    with tqdm(total=result.shape[0]) as pbar:  # we create a progress bar
        result["geometry"] = await asyncio.gather(
            *(difference(val, geodataframe2[["geometry"]], pbar) for val in result["geometry"])
        )

    return result

In [10]:
eez = load_shapefile(eez_path.as_posix(), 10)  # 200nm EEZ
mpas = load_shapefile(mpas_path.as_posix(), 100) # MPAs

In [54]:
a_minus_b = await create_difference(eez, mpas)

100%|█████████▉| 281/282 [05:12<00:40, 40.70s/it]

<class 'shapely.geometry.base.GeometrySequence'>


100%|██████████| 282/282 [06:58<00:00,  1.49s/it]


In [58]:
a_minus_b

Unnamed: 0,MRGID,GEONAME,POL_TYPE,AREA_KM2,ISO_SOV1,ISO_SOV2,ISO_SOV3,geometry
0,8444.0,American Samoa Exclusive Economic Zone,200NM,405830.0,USA,,,"MULTIPOLYGON (((-166.64194 -17.55500, -166.651..."
1,8379.0,Ascension Exclusive Economic Zone,200NM,446005.0,GBR,,,"MULTIPOLYGON (((-10.93296 -7.90389, -10.93294 ..."
2,8446.0,Cook Islands Exclusive Economic Zone,200NM,1969553.0,NZL,,,"MULTIPOLYGON (((-158.75396 -6.13852, -159.2757..."
3,8389.0,Overlapping claim Falkland / Malvinas Islands:...,Overlapping claim,550566.0,GBR,ARG,,"MULTIPOLYGON (((-59.14325 -55.75011, -58.00000..."
4,8440.0,French Polynesian Exclusive Economic Zone,200NM,4766689.0,FRA,,,"MULTIPOLYGON (((-135.92905 -7.89648, -135.9282..."
...,...,...,...,...,...,...,...,...
277,62589.0,Chagos Archipelago Exclusive Economic Zone,200 NM,650804.0,MUS,,,"MULTIPOLYGON (((75.83260 -5.31997, 75.82830 -5..."
278,8383.0,Overlapping claim South Georgia and South Sand...,Overlapping claim,1237783.0,GBR,ARG,,"MULTIPOLYGON (((-35.63012 -50.83060, -35.61631..."
279,8402.0,Bermudian Exclusive Economic Zone,200NM,464389.0,GBR,,,"MULTIPOLYGON (((-60.70499 32.39067, -60.70500 ..."
280,8456.0,United States Exclusive Economic Zone,200NM,2451023.0,USA,,,"MULTIPOLYGON (((-67.28403 45.19125, -67.28400 ..."


In [57]:
a_minus_b[
    ~(a_minus_b.geometry.is_empty == True)
].to_file(output_file.as_posix())