## Locations table for marine and terrestrial territories

This notebook gathers and organizes all necessary data to generate a the locations table, encompassing both marine and terrestrial territories. To be run after intermediate.ipynb and before precalculations.ipynb.

### Set up

In [42]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
import logging
import sys
from pathlib import Path
import time
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import dotenv

dotenv.load_dotenv()

scripts_dir = Path(".").joinpath("src")
if scripts_dir not in sys.path:
    sys.path.insert(0, scripts_dir.resolve().as_posix())

from helpers.strapi import Strapi
from helpers.settings import get_settings, Settings
from helpers.file_handler import FileConventionHandler
from helpers.utils import download_and_unzip_if_needed, writeReadGCP

from pipelines.output_schemas import (
    LocationSchema,
)
from pipelines.processors import (
    add_envelope,
    add_location_iso,
    expand_multiple_locations,
    add_region_iso,
    calculate_eez_area,
    add_bbox,
    add_groups_and_members,
    add_location_name,
    add_translations,
    calculate_gadm_area,
    map_and_generate_ids,
    drop_unnecessary_columns,
    combine_and_clean_columns,
    process_and_merge_commitments,
    set_index_and_sort,
)

logging.basicConfig(level=logging.DEBUG)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("fiona").setLevel(logging.WARNING)

In [2]:
mysettings = get_settings()
prev_step = "preprocess"
current_step = "stats"

### Create locations from EEZ and GADM

In [3]:
pipe_eez = "eez"
pipe_eez_dir = FileConventionHandler(pipe_eez)
pipe_gadm = "gadm"
pipe_gadm_dir = FileConventionHandler(pipe_gadm)

output_file = pipe_gadm_dir.get_processed_step_path(current_step).joinpath("locations_all.json")

# Download the EEZ file && unzip it
download_and_unzip_if_needed(pipe_eez_dir, prev_step, mysettings)

# Download the gadm file && unzip it
download_and_unzip_if_needed(pipe_gadm_dir, prev_step, mysettings)

/home/sofia/dev/skytruth-30x30/data/data/eez/processed/eez_preprocess.zip
/home/sofia/dev/skytruth-30x30/data/data/eez/processed/preprocess
/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/gadm_preprocess.zip
/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess


PosixPath('/home/sofia/dev/skytruth-30x30/data/data/gadm/processed/preprocess')

In [4]:
# # Download country translations
working_folder = FileConventionHandler(pipe_gadm)
input_path = working_folder.pipe_raw_path

# # Download data from GCS if needed
# writeReadGCP(
#     credentials=mysettings.GCS_KEYFILE_JSON,
#     bucket_name=mysettings.GCS_BUCKET,
#     blob_name=translations_csv_url,
#     file=translations_csv_output,
#     operation="r",
# )

# writeReadGCP(
#     credentials=mysettings.GCS_KEYFILE_JSON,
#     bucket_name=mysettings.GCS_BUCKET,
#     blob_name=country_commitments_url,
#     file=country_commitments_output,
#     operation="r",
# )

# Load translations and commitments
translations_csv_url = "vizzuality_processed_data/gadm/preprocess/locations_translated.csv"
translations_csv_output = input_path.joinpath(translations_csv_url.split("/")[-1])
country_commitments_url = "30x30 National Commitments - MPAtlas Country Targets.csv"
country_commitments_output = input_path.joinpath(country_commitments_url.split("/")[-1])



**Note:** gadm includes some extra iso codes that had to be included in the regions_data.json (provided by protected planet) to process the terrestrial stats: 'XCA': Caspian Sea, included in Asia & Pacific region, 'XKO': Kosovo, included in Europe region, 'ZNC': Northern Cyprus, included in Europe region

In [5]:
# Process EEZ data (marine data)
locations_marine = (
    gpd.read_file(pipe_eez_dir.get_step_fmt_file_path(prev_step, "shp"))
    .pipe(add_envelope)
    .pipe(add_location_iso)
    .pipe(expand_multiple_locations)
    .pipe(add_region_iso, 'iso')
    .pipe(calculate_eez_area)
    .pipe(add_bbox)
    .pipe(add_groups_and_members, "marine")
    .pipe(add_location_name)
    .pipe(add_translations, translations_csv_output)
    .rename(
        columns={
            "AREA_KM2": "total_marine_area",
            "location_type": "type",
            "bounds": 'marine_bounds'
        }
    )
    .pipe(lambda df: df.drop(
        columns=list(
            set(df.columns) -
            set(["code", "name", "name_es", "name_fr", "total_marine_area", "marine_bounds", "type", "groups", "id"])
        )
    ))
    .reset_index(drop=True)
)

# Process GADM data (land data)
locations_land = (
    gpd.read_file(pipe_gadm_dir.get_step_fmt_file_path(prev_step, "shp"))
    .rename(columns={"GID_0": "iso", 'area_km2': 'AREA_KM2'})
    .pipe(add_envelope)
    .pipe(add_region_iso, 'iso')
    .pipe(calculate_gadm_area)
    .pipe(add_bbox)
    .pipe(add_groups_and_members, "land")
    .pipe(add_location_name)
    .pipe(add_translations, translations_csv_output)
    .rename(
        columns={
            "AREA_KM2": "total_terrestrial_area",
            "location_type": "type",
            "bounds": "terrestrial_bounds"
        }
    )
    .pipe(map_and_generate_ids, locations_marine)
    .pipe(drop_unnecessary_columns, ["code", "name", "name_es", "name_fr", "total_terrestrial_area", "type", "groups", "terrestrial_bounds", "id"])
    .reset_index(drop=True)
)

# Merge EEZ and GADM datasets and add marine national commitments
combined_locations = (
    pd.merge(
        locations_marine, locations_land,
        on=['code', 'id'],
        suffixes=('_marine', '_land'),
        how='outer'
    )
    .pipe(combine_and_clean_columns)
    .reset_index(drop=True)
    .pipe(lambda df: process_and_merge_commitments(df, pd.read_csv(country_commitments_output, header=1)))
    .pipe(set_index_and_sort)
)

combined_locations

Unnamed: 0_level_0,total_marine_area,id,marine_bounds,code,total_terrestrial_area,terrestrial_bounds,type,groups,name,name_es,name_fr,marine_target,marine_target_year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,361000000,1,"[-180.0, -85.5625, 180.0, 90.0]",GLOB,134954835,"[-180.0, -90.0, 180.0, 83.65833]",worldwide,[],Global,Global,Global,30,2030
2,212881389,2,"[-180.0, -76.80012, 180.0, 90.0]",ABNJ,0,,highseas,[],Areas Beyond National Jurisdiction,Áreas fuera de la jurisdicción nacional,Zones au-delà de la juridiction nationale,,
3,14878058,3,"[-28.84709, -50.31506, 75.85287, 38.80087]",AF,29993095,"[-25.3618, -34.83514, 63.50347, 37.55986]",region,[],Africa,África,Afrique,,
4,54088687,4,"[-180.0, -58.44947, 180.0, 47.73081]",AS,31625556,"[-180.0, -55.11694, 180.0, 53.56086]",region,[],Asia & Pacific,Asia y Pacífico,Asie et Pacifique,,
5,9618978,5,"[-180.0, -85.5625, 180.0, -57.18865]",AT,12088230,"[-180.0, -90.0, 180.0, -59.59375]",region,[],Antarctica,Antártida,Antarctique,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,0,207,,XCA,371055,"[46.75388, 36.5723, 54.04378, 47.01562]",country,[4],Caspian Sea,Mar Caspio,Mer Caspienne,,
208,0,208,,XKO,10819,"[19.97939, 41.84826, 21.79305, 43.24613]",country,[6],Kosovo,Kosovo,Kosovo,,
209,0,209,,ZMB,753990,"[21.98004, -18.07918, 33.71244, -8.27198]",country,[3],Zambia,Zambia,Zambie,,
210,0,210,,ZNC,3314,"[32.602, 35.00272, 34.60792, 35.71208]",country,[6],Northern Cyprus,Chipre del Norte,Chypre du Nord,,


In [6]:
# Prepare final JSON output
output_locations_combined = {
    "version": 2,
    "data": {
        "api::location.location": LocationSchema(pd.DataFrame(combined_locations)).to_dict(
            orient="index"
        )
    },
}

# Write the output to a JSON file (stored in gadm folder)
with open(output_file, "w") as f:
    json.dump(output_locations_combined, f)

del output_locations_combined

In [7]:
# Create locations_code and save in data_commons/data folder
(combined_locations[['id', 'code']].rename(columns={'id': 'location'})
 .to_csv(scripts_dir.joinpath('data_commons/data/locations_code.csv'), index=False))

In [17]:
# Upload files to bucket
remote_path_code = 'vizzuality_processed_data/strapi_tables/location_code.csv'
remote_path_table = 'vizzuality_processed_data/strapi_tables/locations.json'

writeReadGCP(
    credentials=mysettings.GCS_KEYFILE_JSON,
    bucket_name=mysettings.GCS_BUCKET,
    blob_name=remote_path_code,
    file=scripts_dir.joinpath('data_commons/data/locations_code.csv'),
    operation="w",
)

writeReadGCP(
    credentials=mysettings.GCS_KEYFILE_JSON,
    bucket_name=mysettings.GCS_BUCKET,
    blob_name=remote_path_table,
    file=output_file,
    operation="w",
)

DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token
DEBUG:google.auth.transport.requests:Making request: POST https://oauth2.googleapis.com/token
