In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import sys
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import json

scripts_dir = Path("../..").joinpath("src")
if scripts_dir not in sys.path:
    sys.path.insert(0, scripts_dir.resolve().as_posix())

from helpers.strapi import Strapi
from helpers.settings import get_settings, Settings
from helpers.file_handler import FileConventionHandler
from helpers.utils import download_and_unzip_if_needed, writeReadGCP

from pipelines.output_schemas import (
    FPLSchema,
    ProtectionLevelSchema,
    MPAsSchema,
    HabitatsSchema,
    LocationSchema,
    ProtectedAreaExtentSchema,
)
from pipelines.processors import (
    add_envelope,
    add_location_iso,
    expand_multiple_locations,
    add_region_iso,
    calculate_eez_area,
    add_bbox,
    add_groups_and_members,
    add_location_name,
    output,
    clean_geometries,
    filter_by_exluding_propossed_mpas,
    spatial_join,
    process_mpa_data,
    assign_iso3,
    calculate_global_area,
    separate_parent_iso,
    calculate_stats_cov,
    coverage_stats,
    mpaatlas_filter_stablishment,
    process_mpaatlas_data,
    calculate_stats,
    fix_monaco,
    batch_export,
    calculate_area,
    define_is_child,
    set_child_id,
    add_child_parent_relationship,
    columns_to_lower,
    extract_wdpaid_mpaatlas,
    simplify_async,
    process_tpa_data,
)

logging.basicConfig(level=logging.DEBUG)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("fiona").setLevel(logging.WARNING)

In [3]:
mysettings = get_settings()
prev_step = "preprocess"
current_step = "stats"

In [4]:
# Strapi setup
strapi = Strapi(url=mysettings.STRAPI_URL)
strapi.login(jwt=mysettings.STRAPI_JWT)

<helpers.strapi.Strapi at 0x7f913ef00680>

In [5]:
# after generated the locations file for the first time:
location_code = pd.read_csv(mysettings.DATA_DIR.joinpath("eez/processed/stats/locations_code.csv"))

### Locations (eez + regions)

In [6]:
pipe = "eez"
strapi_collection = ""
pipe_dir = FileConventionHandler(pipe)

output_file = pipe_dir.get_processed_step_path(current_step).joinpath("locations.json")

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)

/home/mambauser/data/eez/processed/eez_preprocess.zip
/home/mambauser/data/eez/processed/preprocess


PosixPath('/home/mambauser/data/eez/processed/preprocess')

In [13]:
locations = (
    gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, "shp"))
    .pipe(add_envelope)
    .pipe(add_location_iso)
    .pipe(expand_multiple_locations)
    .pipe(add_region_iso,'iso')
    .pipe(calculate_eez_area)
    .pipe(add_bbox)
    .pipe(add_groups_and_members)
    .pipe(add_location_name)
    .rename(
        columns={
            "iso": "code",
            "AREA_KM2": "totalMarineArea",
            "location_type": "type",
        }
    )
)

locations.drop(
    columns=list(
        set(locations.columns) - 
        set(["code", "name", "totalMarineArea", "type", "groups", "bounds", "id"])),
    inplace=True,
)

output_locations = {
    "version": 2,
    "data": {
        "api::location.location": LocationSchema(pd.DataFrame(locations)).to_dict(
            orient="index"
        )
    },
}
with open(output_file, "w") as f:
    json.dump(output_locations, f)

del output_locations

In [8]:
##
(locations[['id', 'code']]
 .to_csv(pipe_dir.get_processed_step_path(current_step)
     .joinpath('locations_code.csv'), index=False))

del locations

In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Habitats

The habitat data came from:


In [12]:
pipe = "habitats"
strapi_collection = ''
pipe_dir = FileConventionHandler(pipe)
output_file = pipe_dir.get_step_fmt_file_path(current_step, "csv")

# Download the habitat file
# download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)

habitats_intermediate = pd.read_csv(
    pipe_dir.processed_path.joinpath("habitats5.csv"), keep_default_na=False
)

In [13]:
habitat_stats = habitats_intermediate.pipe(
    output,
    iso_column="location_id",
    rep_d={
        "habitat_name": {
            "saltmarshes": 1,
            "seagrasses": 2,
            "warm-water corals": 3,
            "cold-water corals": 4,
            "mangroves": 5,
            "seamounts": 6,
        },
        "protected_area": {"": 0},
    },
    rename={
        "protected_area": "protectedArea",
        "total_area": "totalArea",
        "habitat_name": "habitat",
    },
    drop_cols=["location_id"],
)
HabitatsSchema(habitat_stats).to_csv(
    output_file.as_posix(), index=True
)

  df.replace(rep_d)


In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Coverage stats - Mpas

We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.
The steps are:
1. Load eez
2. Spatial inner Join the eez dataset with the Mpas one
3. Assign the location iso
4. dissolve by location iso and cummulative year
5. calculate the area for global regions and eez countries
6. prepare the data to be ingested in strapi
7. upload the data to strapi

In [7]:
pipe = "mpa"
strapi_collection = ""

pipe_dir_eez = FileConventionHandler("eez")
pipe_dir_mpas = FileConventionHandler(pipe)
output_file = pipe_dir_mpas.get_processed_step_path(current_step).joinpath(
    "mpa_landmask_strapi.csv"
)

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)
# Download the mpas file && unzip it
download_and_unzip_if_needed(pipe_dir_mpas, prev_step, mysettings)

# Load the data
eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)
mpas = gpd.read_file(pipe_dir_mpas.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)

/home/mambauser/data/eez/processed/eez_preprocess.zip
/home/mambauser/data/eez/processed/preprocess
/home/mambauser/data/mpa/processed/mpa_preprocess.zip
/home/mambauser/data/mpa/processed/preprocess


In [8]:
eez_mpas_data_join = await spatial_join(eez, mpas.pipe(filter_by_exluding_propossed_mpas))

100%|██████████| 282/282 [07:28<00:00,  1.59s/it]

<class 'shapely.geometry.base.GeometrySequence'>
<class 'shapely.geometry.base.GeometrySequence'>





In [9]:
# To get an idea of the spatial join results
eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3).to_file(
    pipe_dir_mpas.get_processed_step_path(current_step).joinpath("mpas_sjoin.shp"), driver="ESRI Shapefile"
)

INFO:pyogrio._io:Created 17,697 records


In [10]:
final_data = await process_mpa_data(
    eez_mpas_data_join.pipe(add_location_iso).pipe(assign_iso3),
    range(2011, time.localtime().tm_year + 1),
    ["PA_DEF", "iso_3"],
    {"protectedAreasCount": "sum"},
)

100%|██████████| 14/14 [03:59<00:00, 17.14s/it]


100%|█████████▉| 281/282 [00:19<00:02,  2.21s/it]

In [11]:
coverage = (
    final_data.pipe(calculate_global_area, ["year", "PA_DEF"], {"area": "sum"}, "iso_3")
    .pipe(separate_parent_iso, "iso_3")
    .pipe(add_region_iso, "iso_3")
    .replace(
        {
            "iso_3": {
                "ATA": "ABNJ",
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
                "GIB": "GBR",
            }
        }
    )
    .pipe(calculate_stats_cov, ["year", "PA_DEF"], "iso_3")
    .pipe(coverage_stats)
)

ProtectedAreaExtentSchema(
    coverage.pipe(
        output,
        "iso_3",
        {"PA_DEF": {"0": 2, "1": 1}},
        {"PARENT_NAME": "location", "PA_DEF": "protection_status"},
        ["area", "iso_3"],
    )
).to_csv(
    output_file,
    index=True,
)

In [6]:
strapi_collection = "protection-coverage-stat"

In [8]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 2300)))

<helpers.strapi.Strapi at 0x7fda8ddb8860>

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Mpa atlas - country stats Fully or highly protected

We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.
The steps are:
1. Load eez
2. Spatial inner Join the eez dataset with the Mpaatlas one
3. iso assign using the sovereign one provided by mpaatlas
4. dissolve by location
5. calculate the area for global regions and eez countries ussing mollwide projection
6. prepare the data to be ingested in strapi
7. upload the data to strapi

In [12]:
pipe = "mpaatlas"
strapi_collection = "mpaa-protection-level-stat"

pipe_dir_eez = FileConventionHandler("eez")
pipe_dir_mpaatlas = FileConventionHandler(pipe)
output_file = pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath(
    "mpaatlas_protection_level.csv"
)

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_dir_eez, prev_step, mysettings)
# Download the mpas file && unzip it
download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)

# Load the data
eez = gpd.read_file(pipe_dir_eez.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)
mpaatlas_intermediate = gpd.read_file(
    pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, "shp")
).pipe(clean_geometries)

/home/mambauser/data/eez/processed/eez_preprocess.zip
/home/mambauser/data/eez/processed/preprocess
/home/mambauser/data/mpaatlas/processed/mpaatlas_preprocess.zip
/home/mambauser/data/mpaatlas/processed/preprocess


In [13]:
eez_mpaatlas_data_join = await spatial_join(
    eez, mpaatlas_intermediate.pipe(mpaatlas_filter_stablishment)
)

  0%|          | 0/282 [00:00<?, ?it/s]

<class 'shapely.geometry.base.GeometrySequence'>


100%|██████████| 282/282 [00:26<00:00,  3.35s/it]

100%|██████████| 282/282 [00:26<00:00, 10.47it/s]


<class 'shapely.geometry.base.GeometrySequence'>


In [40]:
# To get an idea of the spatial join results
# eez_mpaatlas_data_join.to_file(
#     pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath("mpaatlas_sjoin.shp"),
#     driver="ESRI Shapefile",
# )

In [14]:
eez_mpaatlas_data_join.dissolve(by=["protecti_1", "location_i"], aggfunc={"name": "count"}).reset_index().to_file(
pipe_dir_mpaatlas.get_processed_step_path(current_step).joinpath("mpaatlas_sjoin_dissolved.shp"),
driver="ESRI Shapefile",
)

INFO:pyogrio._io:Created 54 records


In [15]:
result = (
    eez_mpaatlas_data_join.pipe(process_mpaatlas_data)
    .pipe(calculate_global_area, gby_col=["protecti_1"], iso_column="location_i")
    .pipe(separate_parent_iso)
    .replace(
        {
            "location_i": {
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
            }
        }
    )
    .pipe(add_region_iso, iso_column="location_i")
    .pipe(calculate_stats, gby_col=["protecti_1"], iso_column="location_i")
    .pipe(fix_monaco, iso_column="location_i", area_column="area_km2")
    .pipe(
        output,
        iso_column="location_i",
        rep_d={
            "protecti_1": {
                "fully or highly protected": 1,
                "less protected or unknown": 2,
            }
        },
        rename={"protecti_1": "mpaa_protection_level", "area_km2": "area"},
        drop_cols=[],
    )
)

ProtectionLevelSchema(result[~result.location.isna()].assign(year=2023)).to_csv(
    output_file, index=True
)

In [10]:
strapi_collection = "mpaa-protection-level-stat"

In [11]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 300)))

<helpers.strapi.Strapi at 0x7fda8ddb8860>

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

### Protected seas  - fishing protection level

In [6]:
pipe = "protectedseas"
strapi_collection = "fishing-protection-level-stat"

pipe_dir = FileConventionHandler(pipe)
input_file = pipe_dir.get_processed_step_path(prev_step).joinpath("protectedseas_stats.xlsx")
output_file = pipe_dir.get_processed_step_path(current_step).joinpath("lfp.csv")

# Download the protected seas file && unzip it
writeReadGCP(
    credentials=mysettings.GCS_KEYFILE_JSON,
    bucket_name=mysettings.GCS_BUCKET,
    blob_name="vizzuality_processed_data/protectedseas/preprocess/protectedseas_stats.xlsx",
    file=input_file,
    operation="r",
)

# Load the data
protectedseas_intermediate = pd.read_excel(input_file)

DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token


In [52]:
protectedseas_intermediate[
    (
        protectedseas_intermediate.iso_ter.isna()
        & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)
    )
    | (
        protectedseas_intermediate.iso_ter.isna()
        & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)
        & ~protectedseas_intermediate.iso_sov.isin(
            protectedseas_intermediate[
                protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)
            ].iso_sov.unique()
        )
    )
][protectedseas_intermediate.iso_sov.eq("ESP")]

  protectedseas_intermediate[


Unnamed: 0,iso_ter,iso_sov,includes_multi_jurisdictional_areas,lfp,area_sqkm,total_area,pct_total
320,,ESP,True,5,142.97301,1011023.776,0.014141
321,,ESP,True,4,1639.682076,1011023.776,0.16218
322,,ESP,True,3,214532.8498,1011023.776,21.219367
323,,ESP,True,2,15064.13277,1011023.776,1.489988
324,,ESP,True,1,779644.1388,1011023.776,77.114323


In [54]:
final = (
    protectedseas_intermediate[
        (
            protectedseas_intermediate.iso_ter.isna()
            & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)
        )
        | (
            protectedseas_intermediate.iso_ter.isna()
            & protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(False)
            & ~protectedseas_intermediate.iso_sov.isin(
                protectedseas_intermediate[
                    protectedseas_intermediate.includes_multi_jurisdictional_areas.eq(True)
                ].iso_sov.unique()
            )
        )
    ].replace(
        {
            "lfp": {
                5: "highly",
                4: "highly",
                3: "moderately",
                2: "less",
                1: "less",
            },
        }
    ).groupby(["iso_sov", "lfp"]).agg({"area_sqkm": "sum", "total_area": "max"}).reset_index()
    .pipe(
        calculate_global_area,
        gby_col=["lfp"],
        iso_column="iso_sov",
        agg_ops={"area_sqkm": "sum", "total_area": "sum"},
    )
    .pipe(add_region_iso, iso_column="iso_sov")
    .pipe(
        calculate_stats,
        gby_col=["lfp"],
        ops={"area_sqkm": "sum", "total_area": "sum"},
        iso_column="iso_sov",
    )
    .pipe(lambda x: x.assign(pct=round((x.area_sqkm / x.total_area)*100, 2)))
    .pipe(
        output,
        iso_column="iso_sov",
        rep_d={
            "lfp": {
                "highly": 1,
                "moderately": 2,
                "less": 3,
            }
        },
        rename={"lfp": "fishing_protection_level", "area_sqkm": "area"},
        drop_cols=["iso_sov", "total_area"],
    )
)
FPLSchema(final[final.location.notna()]).to_csv(output_file, index=True)

  df.replace(rep_d)


In [55]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 500)))

<helpers.strapi.Strapi at 0x7f2ecc13ba10>

In [56]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)

HTTPError: 401 Client Error: Unauthorized for url: https://30x30-dev.skytruth.org/cms/api/import-export-entries/content/import

### Country mpas detail table data

  1- lower case the columns   
2- separate location that its regime is in dispute or on join regime  
3- calcualte area for mpaatlas data  
4- rename columns for merge  
5- merge maaatlas and mpa data identifying the source  
6- identify child resources and set them as childs  
7- calculate bbox  
8- set child resources  
9- prepare output for batch export  
10- upload data to strapi  

In [6]:
pipe = "mpa"
strapi_collection_mpas = "mpa"

pipe_dir = FileConventionHandler(pipe)
pipe_dir_mpaatlas = FileConventionHandler("mpaatlas")
output_file_mpas = pipe_dir.get_processed_step_path(current_step).joinpath("mpa_detail.csv")

# Download the protected atlas file && unzip it
download_and_unzip_if_needed(pipe_dir, prev_step, mysettings)
# Download the mpaatlas file 
download_and_unzip_if_needed(pipe_dir_mpaatlas, prev_step, mysettings)

/home/mambauser/data/mpa/processed/mpa_preprocess.zip
/home/mambauser/data/mpa/processed/preprocess
/home/mambauser/data/mpaatlas/processed/mpaatlas_preprocess.zip
/home/mambauser/data/mpaatlas/processed/preprocess


PosixPath('/home/mambauser/data/mpaatlas/processed/preprocess')

In [17]:
# Load the data
mpa_intermediate = gpd.read_file(pipe_dir.get_step_fmt_file_path(prev_step, "shp")).pipe(
    clean_geometries
)
mpaatlas_intermediate = gpd.read_file(
    pipe_dir_mpaatlas.get_step_fmt_file_path(prev_step, "shp")
).pipe(clean_geometries)

In [18]:
mpa_intermediate.head(2)

Unnamed: 0,WDPAID,WDPA_PID,PA_DEF,NAME,DESIG_ENG,IUCN_CAT,STATUS,STATUS_YR,PARENT_ISO,GIS_M_AREA,geometry
0,33800,33800,1,Hoi Ha Wan,Site of Special Scientific Interest (SSSI),Not Reported,Designated,1989,CHN,,"POLYGON ((10914096.80917 2716480.57181, 109140..."
1,33801,33801,1,Hok Tsui (Cape d'Aguilar),Site of Special Scientific Interest (SSSI),Not Reported,Designated,1990,CHN,,"POLYGON ((10933980.32964 2718482.04387, 109339..."


In [19]:
init_table = (
    pd.concat(
        [
            (
                mpa_intermediate.pipe(columns_to_lower)
                .pipe(separate_parent_iso, iso_column="parent_iso")
                .rename(
                    columns={
                        "parent_iso": "iso",
                        "status_yr": "year",
                        "gis_m_area": "area_km2",
                    }
                ).drop(columns=['status'])
            ).assign(source="protected_planet"),
            (
                mpaatlas_intermediate.pipe(calculate_area)
                .pipe(extract_wdpaid_mpaatlas)
                .pipe(separate_parent_iso, iso_column="location_i")
                .rename(
                    columns={
                        "location_i": "iso",
                        "wdpa_id": "wdpa_pid",
                        "designatio": "desig_eng",
                    }
                )
            ).assign(source="mpaatlas"
            ).astype({"mpa_zone_i": "Int64"}),
        ],
        ignore_index=True,
    )
    .reset_index(drop=True)
    .replace(
        {
            "iso": {
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
            }
        }
    )
    .sort_values(by=["wdpa_pid", "wdpa_pid", "source"], ascending=[True, True, False])
)

In [20]:
#  to be run if things change a lot in the future
# iucn_cat = pd.DataFrame(
#     {"slug": init_table.iucn_cat.dropna().unique(), "name": init_table.iucn_cat.dropna().unique()},
#     index=pd.Index(np.arange(1, len(init_table.iucn_cat.dropna().unique()) + 1)),
# )
# iucn_cat.to_csv(pipe_dir.get_processed_step_path(current_step).joinpath("iucn_categories.csv"), index=True)

iucn_cat = pd.read_csv(
    pipe_dir.get_processed_step_path(current_step).joinpath("iucn_categories.csv"), index_col=0
)

In [21]:
mpa_table = (
    init_table.pipe(add_bbox, "bbox")
    .pipe(define_is_child)
    .pipe(set_child_id)
    .sort_values(by=["wdpaid", "is_child"], ascending=[True, True])
    .reset_index(drop=True)
    .pipe(
        output,
        iso_column="iso",
        rep_d={
            "status": {
                "Adopted": 4,
                "implemented": 6,
                "Established": 6,
                "Designated": 5,
                "Proposed": 3,
                "Inscribed": 3,
                "unknown": 1,
            },
            "pa_def": {"0": 2, "1": 1},
            "year": {0: pd.NA},
            "iucn_cat": dict(
                iucn_cat[["slug"]]
                .reset_index(drop=False)
                .iloc[:, [1, 0]]
                .to_dict(orient="tight")["data"]
            ),
            "source": {"protected_planet": 3, "mpaatlas": 1},
            "protection": {
                "full": 3,
                "light": 4,
                "incompatible": 5,
                "high": 6,
                "minimal": 7,
                "unknown": 8,
                "unknown/to be determined": 8,
            },
            "establishm": {
                "actively managed": 4,
                "implemented": 6,
                "designated": 5,
                "Designated": 5,
                "proposed or committed": 3,
                "Proposed": 3,
                "Inscribed": 3,
                "Established": 5,
                "Adopted": 5,
                "unknown": 1,
            },
        },
        rename={
            "pa_def": "protection_status",
            "area_km2": "area",
            "iucn_cat": "mpa_iucn_category",
            "desig_eng": "designation",
            "protection": "mpaa_protection_level",
            "establishm": "mpaa_establishment_stage",
            "source": "data_source",
        },
        drop_cols=["geometry", "iso", "protecti_1"],  # "WDPAID",
    )
    .pipe(add_child_parent_relationship)
    .astype(
        {
            "year": "Int32",
            "mpa_iucn_category": "Int64",
            "protection_status": "Int64",
        }
    )
    .sort_index()
)

In [22]:
mpa_table.to_csv(output_file_mpas, index=True)

In [80]:
# todo investigate the issue with area as null

In [25]:
batch_export(
    mpa_table[mpa_table.area.notna()],
    5000,
    MPAsSchema,
    pipe_dir.get_processed_step_path(current_step),
    "mpa_detail",
    format="json",
    strapi_colection=strapi_collection_mpas,
)

In [33]:
# # This code is to be able to identify groups that has wdpa_pid so in the future if needed we could combine the group geometries to generate a wdpa coverage geometry
# init_table[
#     (
#         init_table.sort_values(by=["wdpaid", "source"], ascending=[True, False])
#         .groupby("wdpaid")
#         .transform("size")
#         .gt(1)
#     )
#     & (init_table.wdpa_pid.str.extract(r"([A-Za-z]+)", expand=False).notna())
# ].groupby("wdpaid")
# .geometry.apply(lambda x: x.union_all())

#### upload data to strapi

In [7]:
strapi.deleteCollectionData("mpa", list(range(1, 20914)))

<helpers.strapi.Strapi at 0x7f913ef00680>

In [8]:
for i in range(0, 4):
    strapi.importCollectionData(
        strapi_collection_mpas,
        mpa_folder.joinpath(f"mpa_detail_{i}.csv"),
    )

NameError: name 'mpa_folder' is not defined

### Coverage stats - Tpas

We are going to use the intermediate data from eez, in order to create a dataset that can be used as a land mask.
The steps are:
1. Load eez
2. Spatial inner Join the eez dataset with the Mpas one
3. Assign the location iso
4. dissolve by location iso and cummulative year
5. calculate the area for global regions and eez countries
6. prepare the data to be ingested in strapi
7. upload the data to strapi

In [6]:
pipe = "mpa-terrestrial"
strapi_collection = ""

pipe_dir_gadm = FileConventionHandler("gadm")
pipe_dir_pas = FileConventionHandler(pipe)
output_file = pipe_dir_pas.get_processed_step_path(current_step).joinpath(
    "pa_landmask_strapi.csv"
)

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_dir_gadm, prev_step, mysettings)
# Download the mpas file && unzip it
download_and_unzip_if_needed(pipe_dir_pas, prev_step, mysettings)

# Load the data
gadm = gpd.read_file(pipe_dir_gadm.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)
pas = gpd.read_file(pipe_dir_pas.get_step_fmt_file_path(prev_step, "shp")).pipe(clean_geometries)

/home/mambauser/data/gadm/processed/gadm_preprocess.zip
/home/mambauser/data/gadm/processed/preprocess
/home/mambauser/data/mpa-terrestrial/processed/mpa-terrestrial_preprocess.zip
/home/mambauser/data/mpa-terrestrial/processed/preprocess


In [8]:
gadm_simp = await simplify_async(gadm)

100%|██████████| 206/206 [02:20<00:00,  1.47it/s]


In [9]:
gadm_pas_data_join = await spatial_join(gadm_simp, pas.pipe(filter_by_exluding_propossed_mpas))

100%|██████████| 206/206 [7:47:52<00:00, 136.27s/it] 


 16%|█▌        | 32/206 [00:04<00:32,  5.29it/s]

<class 'shapely.geometry.base.GeometrySequence'>


 34%|███▍      | 71/206 [00:18<01:00,  2.24it/s]

<class 'shapely.geometry.base.GeometrySequence'>


 73%|███████▎  | 150/206 [03:14<07:39,  8.21s/it]

<class 'shapely.geometry.base.GeometrySequence'>


100%|██████████| 206/206 [7:47:52<00:00, 5051.81s/it]  

In [13]:
gadm_pas_data_join

Unnamed: 0,WDPAID,WDPA_PID,PA_DEF,NAME,DESIG_ENG,IUCN_CAT,MARINE,GIS_AREA,STATUS,STATUS_YR,PARENT_ISO,geometry,index_right,COUNTRY,GID_0,area_km2,COUNTRY_ES,COUNTRY_FR
0,17131.0,17131,1,Hamoun,Protected Area,V,0,3022.952813,Designated,1968.0,IRN,"POLYGON ((61.19578 31.44834, 61.36976 31.42576...",0.0,Afghanistan,AFG,644050.28,Afganistán,Afghanistan
1,17160.0,17160,1,"Hamun-e-Puzak, south end","Ramsar Site, Wetland of International Importance",Not Reported,0,172.551965,Designated,1975.0,IRN,"POLYGON ((61.75037 31.33179, 61.72363 31.35944...",0.0,Afghanistan,AFG,644050.28,Afganistán,Afghanistan
2,1118.0,1118,1,Dasht-i-Nawar,Waterfowl Sanctuary,IV,0,375.359815,Designated,2020.0,AFG,"POLYGON ((67.76059 33.78497, 67.75989 33.78315...",0.0,Afghanistan,AFG,644050.28,Afganistán,Afghanistan
3,15133.0,15133,1,Kol-i-Hashmat Khan,Waterfowl Sanctuary,IV,0,1.665554,Designated,2017.0,AFG,"MULTIPOLYGON (((69.20214 34.49681, 69.2028 34....",0.0,Afghanistan,AFG,644050.28,Afganistán,Afghanistan
4,555705308.0,555705308,1,Koh-e Baba (Shah Foladi),Protected Landscape,V,0,341.997539,Designated,2019.0,AFG,"POLYGON ((67.99935 34.66263, 67.9932 34.66009,...",0.0,Afghanistan,AFG,644050.28,Afganistán,Afghanistan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289347,2531.0,2531,1,Mana Pools,National Park,II,0,2134.271397,Designated,1975.0,ZWE,"POLYGON ((29.56479 -15.6758, 29.56611 -15.6757...",0.0,Zimbabwe,ZWE,391234.88,Zimbabue,Zimbabwe
289348,2526.0,2526,1,Sapi,Safari Area,VI,0,1200.644367,Designated,1975.0,ZWE,"POLYGON ((29.88011 -15.67272, 29.87637 -15.679...",0.0,Zimbabwe,ZWE,391234.88,Zimbabue,Zimbabwe
289349,62095.0,62095,1,Chiawa,Game Management Area,VI,0,2413.162703,Designated,1989.0,ZMB,"MULTIPOLYGON (((29.10233 -15.86402, 29.09482 -...",0.0,Zimbabwe,ZWE,391234.88,Zimbabue,Zimbabwe
289350,7962.0,7962,1,Lower Zambezi,National Park,II,0,4161.873753,Designated,1983.0,ZMB,"MULTIPOLYGON (((30.2016 -15.65147, 30.20137 -1...",0.0,Zimbabwe,ZWE,391234.88,Zimbabue,Zimbabwe


In [17]:
# To get an idea of the spatial join results
gadm_pas_data_join.pipe(assign_iso3, False).to_file(
    pipe_dir_pas.get_processed_step_path(current_step).joinpath("mpas_sjoin.shp"),
    driver="ESRI Shapefile",
)

INFO:pyogrio._io:Created 289,352 records


In [22]:
final_data = await process_tpa_data(
    gadm_pas_data_join.pipe(assign_iso3, False),
    range(2011, time.localtime().tm_year + 1),
    ["PA_DEF", "iso_3"],
    {"protectedAreasCount": "sum"},
)

  0%|          | 0/14 [00:00<?, ?it/s]

In [None]:
coverage = (
    final_data.pipe(calculate_global_area, ["year", "PA_DEF"], {"area": "sum"}, "iso_3")
    .pipe(separate_parent_iso, "iso_3")
    .pipe(add_region_iso, "iso_3")
    .replace(
        {
            "iso_3": {
                "ATA": "ABNJ",
                "COK": "NZL",
                "IOT": "GBR",
                "NIU": "NZL",
                "SHN": "GBR",
                "SJM": "NOR",
                "UMI": "USA",
                "NCL": "FRA",
                "GIB": "GBR",
            }
        }
    )
    .pipe(calculate_stats_cov, ["year", "PA_DEF"], "iso_3")
    .pipe(coverage_stats)
)

ProtectedAreaExtentSchema(
    coverage.pipe(
        output,
        "iso_3",
        {"PA_DEF": {"0": 2, "1": 1}},
        {"PARENT_NAME": "location", "PA_DEF": "protection_status"},
        ["area", "iso_3"],
    )
).to_csv(
    output_file,
    index=True,
)

In [None]:
strapi_collection = "protection-coverage-stat"

In [None]:
strapi.deleteCollectionData(strapi_collection, list(range(1, 2300)))

<helpers.strapi.Strapi at 0x7fda8ddb8860>

In [None]:
strapi.importCollectionData(
    strapi_collection,
    output_file,
)