In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import sys
from pathlib import Path
from typing import Union
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import json

scripts_dir = Path("../..").joinpath("src")
if scripts_dir not in sys.path:
    sys.path.insert(0, scripts_dir.resolve().as_posix())

from helpers.strapi import Strapi
from helpers.settings import get_settings
from helpers.file_handler import FileConventionHandler
from helpers.utils import download_and_unzip_if_needed

from pipelines.output_schemas import (
    FPLSchema,
    ProtectionLevelSchema,
    MPAsSchema,
    HabitatsSchema,
    LocationSchema,
    ProtectedAreaExtentSchema,
)
from pipelines.processors import (
    add_envelope,
    add_location_iso,
    expand_multiple_locations,
    add_region_iso,
    calculate_eez_area,
    add_bbox,
    add_groups_and_members,
    add_location_name,
    output,
    clean_geometries,
    filter_by_exluding_propossed_mpas,
    spatial_join,
    process_mpa_data,
    assign_iso3,
    calculate_global_area,
    separate_parent_iso,
    calculate_stats_cov,
    coverage_stats,
    mpaatlas_filter_stablishment,
    process_mpaatlas_data,
    calculate_stats,
    fix_monaco,
    batch_export,
    calculate_area,
)

logging.basicConfig(level=logging.DEBUG)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("fiona").setLevel(logging.WARNING)

In [3]:
mysettings = get_settings()
prev_step = "preprocess"
current_step = "stats"

In [4]:
# Strapi setup
strapi = Strapi(url=mysettings.STRAPI_URL)
strapi.login(jwt=mysettings.STRAPI_JWT)

ValueError: {'status': 400, 'name': 'ValidationError', 'message': '2 errors occurred', 'details': {'errors': [{'path': ['identifier'], 'message': 'identifier is a required field', 'name': 'ValidationError'}, {'path': ['password'], 'message': 'password is a required field', 'name': 'ValidationError'}]}}

In [5]:
# after generated the locations file for the first time:
location_code = pd.read_csv(mysettings.DATA_DIR.joinpath("eez/processed/stats/locations_code.csv"))

### Locations (eez + regions)

In [3]:
pipe = "eez"
strapi_collection = ""
pipe_dir = FileConventionHandler(pipe)

output_file = pipe_dir.get_processed_step_path(current_step).joinpath("locations.json")

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)

/home/mambauser/data/eez/processed/eez_preprocess.zip
/home/mambauser/data/eez/processed/preprocess


PosixPath('/home/mambauser/data/eez/processed/preprocess')

In [5]:
locations = (
    gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, "shp"))
    .pipe(add_envelope)
    .pipe(add_location_iso)
    .pipe(expand_multiple_locations)
    .pipe(add_region_iso,'iso')
    .pipe(calculate_eez_area)
    .pipe(add_bbox)
    .pipe(add_groups_and_members)
    .pipe(add_location_name)
    .rename(
        columns={
            "iso": "code",
            "AREA_KM2": "totalMarineArea",
            "location_type": "type",
        }
    )
)

locations.drop(
    columns=list(
        set(locations.columns) - 
        set(["code", "name", "totalMarineArea", "type", "groups", "bounds", "id"])),
    inplace=True,
)

output_locations = {
    "version": 2,
    "data": {
        "api::location.location": LocationSchema(pd.DataFrame(locations)).to_dict(
            orient="index"
        )
    },
}
with open(output_file, "w") as f:
    json.dump(output_locations, f)

del output_locations

DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name


In [9]:
##
(locations[['id', 'code']]
 .to_csv(pipe_dir.get_processed_step_path(current_step)
     .joinpath('locations_code.csv'), index=False))

del locations

In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Habitats

The habitat data came from:


In [8]:
pipe = "habitats"
strapi_collection = ''
pipe_dir = FileConventionHandler(pipe)
output_file = pipe_dir.get_step_fmt_file_path(current_step, "csv")

# Download the habitat file
# download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)

habitats_intermediate = pd.read_csv(
    pipe_dir.processed_path.joinpath("habitats4.csv"), keep_default_na=False
)

In [None]:
habitat_stats = habitats_intermediate.pipe(
    output,
    iso_column="location_id",
    rep_d={
        "habitat_name": {
            "saltmarshes": 1,
            "seagrasses": 2,
            "warm-water corals": 3,
            "cold-water corals": 4,
            "mangroves": 5,
            "seamounts": 6,
        },
        "protected_area": {"": 0},
    },
    rename={
        "protected_area": "protectedArea",
        "total_area": "totalArea",
        "habitat_name": "habitat",
    },
    drop_cols=["location_id"],
)
HabitatsSchema(habitat_stats).to_csv(
    output_file.as_posix(), index=True
)

In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Coverage stats - Mpas

We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.
The steps are:
1. Load eez
2. Spatial inner Join the eez dataset with the Mpas one
3. Assign the location iso
4. dissolve by location iso and cummulative year
5. calculate the area for global regions and eez countries
6. prepare the data to be ingested in strapi
7. upload the data to strapi

In [5]:
pipe = "mpa"
strapi_collection = ""

pipe_dir_eez = FileConventionHandler("eez")
pipe_dir_mpas = FileConventionHandler(pipe)
output_file = pipe_dir_mpas.get_processed_step_path(current_step).joinpath(
    "mpa_landmask_strapi.csv"
)

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)
# Download the mpas file && unzip it
download_and_unzip_if_needed(pipe_dir_mpas, prev_step, mysettings)

# Load the data
eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)
mpas = gpd.read_file(pipe_dir_mpas.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)

/home/mambauser/data/eez/processed/eez_preprocess.zip
/home/mambauser/data/eez/processed/preprocess
/home/mambauser/data/mpa/processed/mpa_preprocess.zip
/home/mambauser/data/mpa/processed/preprocess


In [6]:
eez_mpas_data_join = await spatial_join(eez, mpas.pipe(filter_by_exluding_propossed_mpas))

100%|█████████▉| 281/282 [06:12<01:03, 63.36s/it]

<class 'shapely.geometry.base.GeometrySequence'>


100%|██████████| 282/282 [06:33<00:00,  1.39s/it]

<class 'shapely.geometry.base.GeometrySequence'>
<class 'shapely.geometry.base.GeometrySequence'>
<class 'shapely.geometry.base.GeometrySequence'>
<class 'shapely.geometry.base.GeometrySequence'>





In [7]:
# To get an idea of the spatial join results
eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3).to_file(
    pipe_dir_mpas.get_processed_step_path(current_step).joinpath("mpas_sjoin.shp"), driver="ESRI Shapefile"
)

In [9]:
final_data = await process_mpa_data(
    eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),
    range(2011, time.localtime().tm_year + 1),
    ["PA_DEF", "iso_3"],
    {"protectedAreasCount": "sum"},
)

DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
100%|██████████| 14/14 [03:03<00:00, 13.12s/it]


DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
  7%|▋         | 1/14 [01:14<16:05, 74.30s/it]DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
 14%|█▍        | 2/14 [01:19<06:42, 33.52s/it]DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
 21%|██▏       | 3/14 [01:45<05:30, 30.01s/it]DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
 29%|██▊       | 4/14 [02:00<04:02, 24.29s/it]DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
 43%|████▎     | 6/14 [02:03<01:29, 11.15s/it]DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
 50%|█████     | 7/14 [02:12<01:13, 10.47s/it]DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
 57%|█████▋    | 8/14 [02:16<00:50,  8.40s/it]DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name
DEBUG:pyproj:PROJ_ERR

In [15]:
coverage = (
    final_data.pipe(calculate_global_area, ["year", "PA_DEF"], {"area": "sum"}, "iso_3")
    .pipe(separate_parent_iso, "iso_3")
    .pipe(add_region_iso, "iso_3")
    .replace(
        {
            "iso_3": {
                "ATA": "ABNJ",
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
                "GIB": "GBR",
            }
        }
    )
    .pipe(calculate_stats_cov, ["year", "PA_DEF"], "iso_3")
    .pipe(coverage_stats)
)

ProtectedAreaExtentSchema(
    coverage.pipe(
        output,
        "iso_3",
        {"PA_DEF": {"0": 2, "1": 1}},
        {"PARENT_NAME": "location", "PA_DEF": "protection_status"},
        ["area", "iso_3"],
    )
).to_csv(
    output_file,
    index=True,
)

In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Mpa atlas - country stats Fully or highly protected

We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.
The steps are:
1. Load eez
2. Spatial inner Join the eez dataset with the Mpaatlas one
3. iso assign using the sovereign one provided by mpaatlas
4. dissolve by location
5. calculate the area for global regions and eez countries ussing mollwide projection
6. prepare the data to be ingested in strapi
7. upload the data to strapi

In [16]:
pipe = "mpaatlas"
strapi_collection = "mpaa-protection-level-stat"

pipe_dir_eez = FileConventionHandler("eez")
pipe_dir_mpaatlas = FileConventionHandler(pipe)
output_file = pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(
    "mpaatlas_protection_level.csv"
)

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)
# Download the mpas file && unzip it
download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)

# Load the data
eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)
mpaatlas_intermediate = gpd.read_file(
    pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, "shp")
).pipe(clean_geometries)

/home/mambauser/data/eez/processed/eez_preprocess.zip
/home/mambauser/data/eez/processed/preprocess
/home/mambauser/data/mpaatlas/processed/mpaatlas_preprocess.zip
/home/mambauser/data/mpaatlas/processed/preprocess


In [17]:
eez_mpaatlas_data_join = await spatial_join(
    eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)
)

  0%|          | 0/282 [00:00<?, ?it/s]

<class 'shapely.geometry.base.GeometrySequence'>


100%|██████████| 282/282 [00:27<00:00,  4.06s/it]

100%|██████████| 282/282 [00:27<00:00, 10.21it/s]


<class 'shapely.geometry.base.GeometrySequence'>


In [40]:
# To get an idea of the spatial join results
# eez_mpaatlas_data_join.to_file(
#     pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath("mpaatlas_sjoin.shp"),
#     driver="ESRI Shapefile",
# )

In [18]:
eez_mpaatlas_data_join.dissolve(by=["protecti_1", "location_i"], aggfunc={"name": "count"}).reset_index().to_file(
pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath("mpaatlas_sjoin_dissolved.shp"),
driver="ESRI Shapefile",
)

DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name


In [19]:
result = (
    eez_mpaatlas_data_join.pipe(process_mpaatlas_data)
    .pipe(calculate_global_area, gby_col=["protecti_1"], iso_column="location_i")
    .pipe(separate_parent_iso)
    .replace(
        {
            "location_i": {
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
            }
        }
    )
    .pipe(add_region_iso, iso_column="location_i")
    .pipe(calculate_stats, gby_col=["protecti_1"], iso_column="location_i")
    .pipe(fix_monaco, iso_column="location_i", area_column="area_km2")
    .pipe(
        output,
        iso_column="location_i",
        rep_d={
            "protecti_1": {
                "fully or highly protected": 1,
                "less protected or unknown": 2,
            }
        },
        rename={"protecti_1": "mpaa_protection_level", "area_km2": "area"},
        drop_cols=[],
    )
)

ProtectionLevelSchema(result[~result.location.isna()].assign(year=2023)).to_csv(
    output_file, index=True
)

DEBUG:pyproj:PROJ_ERROR: proj_create: unrecognized format / unknown name


In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Protected seas  - fishing protection level

In [None]:
pipe = "protectedseas"
strapi_collection = ""

pipe_dir = FileConventionHandler(pipe)
output_file = pipe_dir.get_processed_step_path(current_step).joinpath("lfp.csv")

# Download the protected seas file && unzip it
download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)

# Load the data
protectedseas_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, "shp")).pipe(
    clean_geometries
)

In [None]:
final = (
    protectedseas_intermediate.pipe(calculate_area)
    .pipe(calculate_global_area, gby_col=["FPS_cat"], iso_column="iso")
    .pipe(separate_parent_iso, iso_column="iso")
    .replace(
        {
            "iso": {
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
            }
        }
    )
    .pipe(add_region_iso, iso_column="iso")
    .pipe(calculate_stats, gby_col=["FPS_cat"], iso_column="location_i")
    .pipe(fix_monaco, iso_column="iso", area_column="area_km2")
    .pipe(
        output,
        iso_column="iso",
        rep_d={
            "FPS_cat": {
                "highly": 1,
                "moderately": 2,
                "less": 3,
            }
        },
        rename={"FPS_cat": "fishing_protection_level", "area_km2": "area"},
        drop_cols=["iso"],
    )
)

FPLSchema(final).to_csv(output_file, index=True)

In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Country detail table data

In [60]:
## TODO: this whole pipe will change base on the new designs.

pipe = "mpa"
strapi_collection_mpas = ""

pipe_dir = FileConventionHandler(pipe)
pipe_dir_mpaatlas = FileConventionHandler("mpaatlas")
output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath("mpa_detail.csv")

# Download the protected atlas file && unzip it
download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)
# Download the mpaatlas file 
download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)

/home/mambauser/data/mpa/processed/mpa_preprocess.zip
/home/mambauser/data/mpa/processed/preprocess
/home/mambauser/data/mpaatlas/processed/mpaatlas_preprocess.zip
/home/mambauser/data/mpaatlas/processed/preprocess


PosixPath('/home/mambauser/data/mpaatlas/processed/preprocess')

In [61]:
# Load the data
mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, "shp")).pipe(
    clean_geometries
)
mpaatlas_intermediate = gpd.read_file(
    pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, "shp")
).pipe(clean_geometries)

In [18]:
def extract_wdpaid_mpaatlas(gdf):
    return gdf.assign(wdpaid=gdf.wdpa_id.str.extract(r"(\d+(?:_\d+)*)").astype("Int64"))

def columns_to_lower(gdf):
    gdf.columns = gdf.columns.str.lower()
    return gdf

In [62]:
init_table = (
    pd.concat(
        [
            (
                mpa_intermediate.pipe(columns_to_lower)
                .pipe(separate_parent_iso, iso_column="parent_iso")
                .rename(
                    columns={
                        "parent_iso": "iso",
                        "status_yr": "year",
                        "gis_m_area": "area_km2",
                    }
                )
            ).assign(source="protected_planet"),
            (
                mpaatlas_intermediate.pipe(calculate_area)
                .pipe(extract_wdpaid_mpaatlas)
                .pipe(separate_parent_iso, iso_column="location_i")
                .rename(
                    columns={
                        "location_i": "iso",
                        "wdpa_id": "wdpa_pid",
                    }
                )
            ).assign(source="mpaatlas"),
        ],
        ignore_index=True,
    )
    .reset_index(drop=True)
    .replace(
        {
            "iso": {
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
            }
        }
    )
    .assign(is_child=True)
)

In [63]:
filters =((init_table.wdpaid.astype(str) != init_table.wdpa_pid)
    & (
        init_table.wdpaid.astype(str)
        == init_table.wdpa_pid.str.extract(r"(\d+(?:_\d+)*)", expand=False)
    )
    )

childs = init_table[filters]

parents = init_table[~filters]

In [71]:
with_child = init_table.groupby(["wdpaid"]).size()

In [86]:
init_table[init_table.wdpaid.isin(with_child[with_child > 1].index.values)].sort_values(
    by=["wdpaid", "source"], ascending=[True, False]
).drop_duplicates("wdpaid")

Unnamed: 0,wdpaid,wdpa_pid,pa_def,name,area_km2,status,year,iso,geometry,source,designatio,establishm,protection,protecti_1
17,170,170,1,Isla del Coco,55055.984162,Designated,1978,CRI,"POLYGON ((-86.36701 6.23701, -86.36701 4.52901...",protected_planet,,,,
20,191,191,1,Galápagos Islands,138850.038134,Inscribed,1978,ECU,"POLYGON ((-89.37005 -2.02384, -89.625 -2.07455...",protected_planet,,,,
54,763,763,1,Kisite,27.838077,Designated,1978,KEN,"POLYGON ((39.39004 -4.68572, 39.40071 -4.71694...",protected_planet,,,,
79,1033,1033,1,Channel Islands National Park,463.128622,Designated,1938,USA,"MULTIPOLYGON (((-119.02417 33.50503, -119.0220...",protected_planet,,,,
82,1065,1065,1,Padre Island National Seashore,320.994765,Designated,1962,USA,"POLYGON ((-97.31832 26.56199, -97.32464 26.580...",protected_planet,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18357,555758130,555758130_A,1,Musquash Estuary,0.000000,Established,2002,CAN,"POLYGON ((-66.25779 45.1845, -66.25801 45.1844...",protected_planet,,,,
18368,555758167,555758167_A,1,Oliver Cove Marine Park,0.019056,Designated,1992,CAN,"MULTIPOLYGON (((-128.35241 52.31264, -128.3525...",protected_planet,,,,
18371,555758211,555758211_A,1,Richibucto Dunes,0.093888,Established,2009,CAN,"POLYGON ((-64.735 46.6892, -64.75115 46.68206,...",protected_planet,,,,
18379,555758331,555758331_A,1,Zumtela Bay Conservancy,0.042509,Designated,2008,CAN,"MULTIPOLYGON (((-130.36603 54.58262, -130.366 ...",protected_planet,,,,


In [64]:
parents[parents.wdpaid.isin(childs.wdpaid.unique())]

Unnamed: 0,wdpaid,wdpa_pid,pa_def,name,area_km2,status,year,iso,geometry,source,designatio,establishm,protection,protecti_1
834,18314,18314,1.0,Baeria Rocks Ecological Reserve,1.382728,Designated,1971.0,CAN,"POLYGON ((-125.14853 48.95796, -125.14799 48.9...",protected_planet,,,,
14801,555624306,555624306,1.0,Caribe Mexicano,57814.157521,Designated,2016.0,MEX,"POLYGON ((-86.37226 21.93714, -86.37502 21.295...",protected_planet,,,,
14802,555624307,555624307,1.0,Pacífico Mexicano Profundo,435749.767538,Designated,2016.0,MEX,"MULTIPOLYGON (((-93.77985 12.73334, -94.4341 1...",protected_planet,,,,
15013,555629385,555629385,1.0,Revillagigedo,148516.125913,Designated,2017.0,MEX,"POLYGON ((-114.517 17.65523, -115.47142 17.655...",protected_planet,,,,
19034,555624306,555624306,,Caribe Mexicano Core Zone Benthic Protection,19325.63,,2016.0,MEX,"MULTIPOLYGON (((-85.90873 17.95643, -86.15511 ...",mpaatlas,Biosphere Reserve,actively managed,incompatible,less protected or unknown
19035,555548204,555548204,,Great Barrier Reef Coast - Habitat Protection ...,17275.08,,,AUS,"MULTIPOLYGON (((152.00901 -24.43617, 152.00796...",mpaatlas,Marine Park,actively managed,light,less protected or unknown
19036,555548204,555548204,,Great Barrier Reef Coast - Conservation Park Zone,4059.91,,,AUS,"MULTIPOLYGON (((151.94032 -24.23285, 151.94542...",mpaatlas,Marine Park,actively managed,light,less protected or unknown
19037,555548204,555548204,,Great Barrier Reef Coast - Buffer Zone,39.24,,,AUS,"MULTIPOLYGON (((145.53308 -14.80205, 145.53308...",mpaatlas,Marine Park,actively managed,high,fully or highly protected
19038,555548204,555548204,,Great Barrier Reef Coast - Scientific Research...,117.95,,,AUS,"MULTIPOLYGON (((152.10886 -23.47565, 152.10886...",mpaatlas,Marine Park,actively managed,high,fully or highly protected
19039,555548204,555548204,,Great Barrier Reef Coast - Marine National Par...,15730.7,,,AUS,"MULTIPOLYGON (((152.11465 -24.49841, 152.1 -24...",mpaatlas,Marine Park,actively managed,high,fully or highly protected


In [54]:
childs

Unnamed: 0,wdpaid,wdpa_pid,pa_def,name,area_km2,status,year,iso,geometry,designatio,establishm,protection,protecti_1
160,2628,2628_A,1,Great Barrier Reef,856.431255,Designated,2004,AUS,"MULTIPOLYGON (((145.97213 -16.76038, 145.97223...",,,,
161,2628,2628_E,1,Great Barrier Reef,214688.793012,Designated,2004,AUS,"MULTIPOLYGON (((151.10253 -21.39999, 151.11739...",,,,
162,2628,2628_B,1,Great Barrier Reef,114762.543771,Designated,2004,AUS,"MULTIPOLYGON (((146.33101 -18.21595, 146.31653...",,,,
163,2628,2628_D,1,Great Barrier Reef,14979.946901,Designated,2004,AUS,"MULTIPOLYGON (((146.03442 -17.35992, 146.03442...",,,,
230,4144,4144_B,1,Cape St. Mary's Ecological Reserve,52.292550,Designated,1964,CAN,"POLYGON ((-54.19167 46.83474, -54.19201 46.834...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19491,555629385,555629385_A,,Revillagigedo - Zona Núcleo/Uso Restringido In...,148161.110000,,2019,MEX,"POLYGON ((-110.07809 20.00863, -110.07809 17.6...",National Park,actively managed,full,fully or highly protected
19492,555624307,555624307_A,,Pacífico Mexicano Profundo [Buffer Zone],315060.250000,,2018,MEX,"MULTIPOLYGON (((-94.70558 15.1973, -92.76017 1...",Biosphere Reserve,implemented,incompatible,less protected or unknown
19493,555624307,555624307_B,,Pacífico Mexicano Profundo [Core Zone],122083.090000,,2018,MEX,"MULTIPOLYGON (((-93.29993 13.95, -93.29993 13....",Biosphere Reserve,implemented,incompatible,less protected or unknown
19494,555624306,555624306_A,,Caribe Mexicano Buffer Zone,38722.240000,,2016,MEX,"MULTIPOLYGON (((-86.06299 18.48397, -86.0548 1...",Biosphere Reserve,actively managed,incompatible,less protected or unknown


In [40]:
mpaatlas_intermediate.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 815 entries, 0 to 814
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   wdpa_id     724 non-null    object  
 1   designatio  815 non-null    object  
 2   name        815 non-null    object  
 3   location_i  814 non-null    object  
 4   establishm  815 non-null    object  
 5   protection  815 non-null    object  
 6   protecti_1  815 non-null    object  
 7   year        530 non-null    object  
 8   geometry    813 non-null    geometry
 9   WDPAID      724 non-null    Int64   
dtypes: Int64(1), geometry(1), object(8)
memory usage: 64.6+ KB


In [41]:
mpa_intermediate.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 18671 entries, 0 to 18670
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   WDPAID      18671 non-null  int32   
 1   WDPA_PID    18671 non-null  object  
 2   PA_DEF      18671 non-null  object  
 3   NAME        18671 non-null  object  
 4   GIS_M_AREA  18257 non-null  float64 
 5   STATUS      18671 non-null  object  
 6   STATUS_YR   18671 non-null  int64   
 7   PARENT_ISO  18671 non-null  object  
 8   geometry    18671 non-null  geometry
dtypes: float64(1), geometry(1), int32(1), int64(1), object(5)
memory usage: 1.2+ MB


In [13]:
mpa_intermediate

Unnamed: 0,WDPAID,WDPA_PID,PA_DEF,NAME,GIS_M_AREA,STATUS,STATUS_YR,PARENT_ISO,geometry
0,1,1,1,Diamond Reef and Salt Fish Tail Reef,14.636135,Designated,1973,ATG,"POLYGON ((-61.82494 17.18497, -61.82497 17.184..."
1,2,2,1,Palaster Reef,3.845623,Designated,1973,ATG,"POLYGON ((-61.74007 17.52001, -61.77174 17.526..."
2,27,27,1,Folkstone,9.989930,Designated,1980,BRB,"POLYGON ((-59.63212 13.1737, -59.63263 13.1687..."
3,46,46,1,Reserva Biológica Atol Das Rocas,353.837622,Designated,1979,BRA,"POLYGON ((-33.64137 -3.79099, -33.64151 -3.791..."
4,57,57,1,Parque Nacional Do Cabo Orange,2270.594697,Designated,1980,BRA,"POLYGON ((-50.85381 2.81364, -50.85443 2.81359..."
...,...,...,...,...,...,...,...,...,...
18666,555594046,555594046,1,Mashtan Island,,Designated,2002,BHR,"POLYGON ((4759827.90237 3149355.27177, 4759827..."
18667,313506,313506,1,Arad Bay,,Designated,2003,BHR,"POLYGON ((4743197.11699 3203279.17554, 4743197..."
18668,145813,145813,1,Hawar Island and the Surrounding Terretorial Sea,,Designated,1996,BHR,"POLYGON ((4771270.71072 3129994.93264, 4771269..."
18669,145812,145812,1,Tubli Bay,,Designated,2006,BHR,"POLYGON ((4738737.37964 3195543.87956, 4738737..."


In [24]:
mpa_intermediate['test'] = mpa_intermediate.WDPA_PID.str.extract(r"(\d+(?:_\d+)*)").astype(int)

In [29]:
mpa_intermediate[((mpa_intermediate.WDPAID == 555577419) )]

Unnamed: 0,WDPAID,WDPA_PID,PA_DEF,NAME,GIS_M_AREA,STATUS,STATUS_YR,PARENT_ISO,geometry,test
12674,555577419,555556881,1,Gifford,5852.016732,Designated,2018,AUS,"POLYGON ((159.65001 -27.12949, 159.65001 -27.1...",555556881


In [30]:
mpa_intermediate[(mpa_intermediate.WDPAID == mpa_intermediate.test)]
# [
#     (mpa_intermediate.WDPAID.astype(str) != mpa_intermediate.WDPA_PID)
#     | (mpa_intermediate.WDPA_PID.str.extract(r"(\d+(?:_\d+)*)").astype(int) != mpa_intermediate.WDPAID)
# ]

Unnamed: 0,WDPAID,WDPA_PID,PA_DEF,NAME,GIS_M_AREA,STATUS,STATUS_YR,PARENT_ISO,geometry,test
0,1,1,1,Diamond Reef and Salt Fish Tail Reef,14.636135,Designated,1973,ATG,"POLYGON ((-61.82494 17.18497, -61.82497 17.184...",1
1,2,2,1,Palaster Reef,3.845623,Designated,1973,ATG,"POLYGON ((-61.74007 17.52001, -61.77174 17.526...",2
2,27,27,1,Folkstone,9.989930,Designated,1980,BRB,"POLYGON ((-59.63212 13.1737, -59.63263 13.1687...",27
3,46,46,1,Reserva Biológica Atol Das Rocas,353.837622,Designated,1979,BRA,"POLYGON ((-33.64137 -3.79099, -33.64151 -3.791...",46
4,57,57,1,Parque Nacional Do Cabo Orange,2270.594697,Designated,1980,BRA,"POLYGON ((-50.85381 2.81364, -50.85443 2.81359...",57
...,...,...,...,...,...,...,...,...,...,...
18666,555594046,555594046,1,Mashtan Island,,Designated,2002,BHR,"POLYGON ((4759827.90237 3149355.27177, 4759827...",555594046
18667,313506,313506,1,Arad Bay,,Designated,2003,BHR,"POLYGON ((4743197.11699 3203279.17554, 4743197...",313506
18668,145813,145813,1,Hawar Island and the Surrounding Terretorial Sea,,Designated,1996,BHR,"POLYGON ((4771270.71072 3129994.93264, 4771269...",145813
18669,145812,145812,1,Tubli Bay,,Designated,2006,BHR,"POLYGON ((4738737.37964 3195543.87956, 4738737...",145812


In [None]:
test4 = mpa_intermediate.fillna(0).replace(
    {
        "PARENT_ISO": {
            "COK": "NZL",
            "IOT": "GBR",
            "NIU": "NZL",
            "SHN": "GBR",
            "SJM": "NOR",
            "UMI": "USA",
            "NCL": "FRA",
        }
    }
)
test4_final = test4.pipe(
    output,
    iso_column="PARENT_ISO",
    rep_d={
        "STATUS": {
            "Adopted": 4,
            "implemented": 6,
            "Established": 6,
            "Designated": 5,
            "Proposed": 3,
            "Inscribed": 3,
            "unknown": 1,
        },
        "PA_DEF": {"0": 2, "1": 1},
        "STATUS_YR": {0: pd.NA},
    },
    rename={
        "PARENT_ISO": "iso",
        "PA_DEF": "protection_status",
        "GIS_M_AREA": "area",
        "STATUS_YR": "year",
        "WDPA_PID": "wdpaid",
        "NAME": "name",
    },
    drop_cols=["geometry", "iso", "STATUS"],  # "WDPAID",
).astype({"year": "Int64"})

batch_export(test4_final, 5000, MPAsSchema, mpa_folder, "mpa_join_mpatlas_prot")

#### joined work table

In [None]:
## TODO: this whole pipe will change base on the new designs.

In [None]:
mpa_atlas_merge = mpa_atlas_table.pipe(separate_parent_iso, iso_column="location_i").replace(
    {
        "location_i": {
            "COK": "NZL",
            "IOT": "GBR",
            "NIU": "NZL",
            "SHN": "GBR",
            "SJM": "NOR",
            "UMI": "USA",
            "NCL": "FRA",
        }
    }
)

protectedseas_merge = protected_seas_table.pipe(separate_parent_iso, iso_column="iso").replace(
    {
        "iso": {
            "COK": "NZL",
            "IOT": "GBR",
            "NIU": "NZL",
            "SHN": "GBR",
            "SJM": "NOR",
            "UMI": "USA",
            "NCL": "FRA",
        }
    }
)

Final = (
    test4_final.assign(mpa=test4_final.index)
    .merge(
        mpa_atlas_merge[["establishm", "wdpa_id", "protection", "area_km2"]],
        left_on="wdpaid",
        right_on="wdpa_id",
        how="left",
    )
    .merge(
        protectedseas_merge[["site_id", "wdpa_id", "area_km2", "FPS_cat"]],
        left_on="wdpaid",
        right_on="wdpa_id",
        how="left",
    )
    .pipe(set_area)
    .pipe(filter_location)
    .drop_duplicates()
    .reset_index(drop=True)
)
Final[["area_km2_x", "area_km2_y", "area"]].bfill(axis=1)

Final_output = Final.pipe(
    output,
    iso_column=None,
    rep_d={
        "protection": {
            "full": 3,
            "light": 4,
            "incompatible": 5,
            "high": 6,
            "minimal": 7,
            "unknown": 8,
            "unknown/to be determined": 8,
        },
        "FPS_cat": {
            "highly": 1,
            "moderately": 2,
            "less": 3,
        },
        "establishm": {
            "actively managed": 4,
            "implemented": 6,
            "designated": 5,
            "proposed or committed": 3,
        },
    },
    rename={
        "establishm": "mpaa_establishment_stage",
        "protection": "mpaa_protection_level",
        "FPS_cat": "fishing_protection_level",
    },
    drop_cols=[
        "wdpaid",
        "wdpa_id_x",
        "wdpa_id_y",
        "area_km2_x",
        "area_km2_y",
        "protection_status",
        "name",
        "site_id",
        "year",
        "area",
    ],
).rename(columns={"area_km2": "area"})

batch_export(Final_output, 5000, MPAsTableStatsSchema, mpa_folder, "mpa_join_mpatlas_prot")

#### upload data to strapi

In [None]:
strapi.deleteCollectionData("mpa", list(range(1, 18914)))

In [None]:
strapi.importCollectionData(
    strapi_collection_mpas,
    output_file,
)