### Set up

In [8]:
import geopandas as gpd
import pandas as pd
from datetime import datetime

In [9]:
path_in = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw"
path_out = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed"

### Read and prepare data

In [None]:
https://guide.mpatlas.org/api/v1/zone/geojson/

In [35]:
# Read data from MPAtlas
mpatlas = gpd.read_file(path_in + "/mpatlas_assess_zone.geojson")

In [36]:
# Fill missing wdpa_pid with the wdpa_id
mpatlas['wdpa_pid'] = mpatlas['wdpa_pid'].fillna(mpatlas['wdpa_id'])

In [37]:
# Create new column with protection level reclassified
def map_protection_level(value):
    if value in ["full", "high"]:
        return "fully or highly protected"
    else:
        return "less protected or unknown"

# Create a new column based on column1
mpatlas['protection_level'] = mpatlas['protection_mpaguide_level'].apply(map_protection_level)

In [38]:
# replace proposed/committed with proposed or committed
mpatlas['establishment_stage'] = mpatlas['establishment_stage'].replace(['proposed/committed'], 'proposed or committed')

In [39]:
# Take only year from 'proposed_date', 'designated_date', 'implemented_date'
mpatlas['proposed_date'] = mpatlas['proposed_date'].str[:4].astype('Int64')
mpatlas['designated_date'] = mpatlas['designated_date'].str[:4].astype('Int64')
mpatlas['implemented_date'] = mpatlas['implemented_date'].str[:4].astype('Int64')

# Create column 'year' with the most recent year from 'proposed_date', 'designated_date', 'implemented_date'
mpatlas['year'] = mpatlas[['proposed_date', 'designated_date', 'implemented_date']].max(axis=1)

# Convert year to int to be able to save it later (Int64 not allowed)
mpatlas['year'].fillna(0, inplace=True)
mpatlas['year'] = mpatlas['year'].astype(int)
mpatlas['year'] = mpatlas['year'].replace(0, pd.NaT)


In [40]:
# Calculate area in km2
mpatlas.to_crs('ESRI:54009', inplace=True)
mpatlas['area_km2'] = mpatlas['geometry'].area / 10**6
mpatlas.to_crs('EPSG:4326', inplace=True)

In [41]:
# Keep relevant columns 
mpatlas2 = mpatlas[['wdpa_pid', 'name', 'designation', 'sovereign', 'area_km2', 'establishment_stage', 'protection_level', 'year', 'geometry']].rename(columns={'sovereign': 'location_id', 'wdpa_pid': 'wdpa_id'})

# Save as geojson (to keep full names)
mpatlas2.to_file(path_out + "/mpatlas/mpatlas_assess_zone_cleaned.geojson", driver='GeoJSON')

In [42]:
# For those with multiple countries, split them
mpatlas_iso = mpatlas2.copy()
mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(';')
mpatlas_iso = mpatlas_iso.explode('location_id')
mpatlas_iso['location_id'] = mpatlas_iso['location_id'].str.split(':')
mpatlas_iso = mpatlas_iso.explode('location_id')

### Global stats

In [43]:
# Calculate global area per protection level
prot_global = mpatlas2.groupby('protection_level').agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})
prot_global['location_id'] = 'GLOB'
prot_global['last_updated'] = datetime.now().year

In [44]:
# Calculate global area per establishment stage
stage_global = mpatlas2.groupby(['establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})
stage_global['location_id'] = 'GLOB'
stage_global['last_updated'] = datetime.now().year

### Country stats

In [45]:
prot_iso = mpatlas_iso.groupby(['location_id', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})
prot_iso['last_updated'] = datetime.now().year

In [47]:
stage_iso = mpatlas_iso.groupby(['location_id', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area'})
stage_iso['last_updated'] = datetime.now().year

### Region stats

In [49]:
# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)
regions_data = [
    {
        'region_iso': 'AS',
        'region_name': 'Asia & Pacific',
        'country_iso_3s': [
            "AFG", "ASM", "AUS", "BGD", "BRN", "BTN", "CCK", "CHN", "COK", "CXR", "FJI", "FSM", "GUM", "HKG", "IDN",
            "IND", "IOT", "IRN", "JPN", "KHM", "KIR", "KOR", "LAO", "LKA", "MAC", "MDV", "MHL", "MMR", "MNG", "MNP",
            "MYS", "NCL", "NFK", "NIU", "NPL", "NRU", "NZL", "PAK", "PCN", "PHL", "PLW", "PNG", "PRK", "PYF", "SGP",
            "SLB", "THA", "TKL", "TLS", "TON", "TUV", "TWN", "VNM", "VUT", "WLF", "WSM"
        ]
    },
    {
        'region_iso': 'AF',
        'region_name': 'Africa',
        'country_iso_3s': [
            "AGO", "BDI", "BEN", "BFA", "BWA", "CAF", "CIV", "CMR", "COD", "COG", "COM", "CPV", "DJI", "DZA", "EGY",
            "ERI", "ESH", "ETH", "GAB", "GHA", "GIN", "GMB", "GNB", "GNQ", "KEN", "LBR", "LBY", "LSO", "MAR", "MDG",
            "MLI", "MOZ", "MRT", "MUS", "MWI", "MYT", "NAM", "NER", "NGA", "REU", "RWA", "SDN", "SEN", "SHN", "SLE",
            "SOM", "SSD", "STP", "SWZ", "SYC", "TCD", "TGO", "TUN", "TZA", "UGA", "ZAF", "ZMB", "ZWE"
        ]
    },
    {
        'region_iso': 'EU',
        'region_name': 'Europe',
        'country_iso_3s': [
            "ALA", "ALB", "AND", "ARM", "AUT", "AZE", "BEL", "BGR", "BIH", "BLR", "CHE", "CYP", "CZE", "DEU", "DNK",
            "ESP", "EST", "FIN", "FRA", "FRO", "GBR", "GEO", "GGY", "GIB", "GRC", "HRV", "HUN", "IMN", "IRL", "ISL",
            "ISR", "ITA", "JEY", "KAZ", "KGZ", "LIE", "LTU", "LUX", "LVA", "MCO", "MDA", "MKD", "MLT", "MNE", "NLD",
            "NOR", "POL", "PRT", "ROU", "RUS", "SJM", "SMR", "SRB", "SVK", "SVN", "SWE", "TJK", "TKM", "TUR", "UKR",
            "UZB", "VAT"
        ]
    },
    {
        'region_iso': 'SA',
        'region_name': 'Latin America & Caribbean',
        'country_iso_3s': [
            "ABW", "AIA", "ARG", "ATG", "BES", "BHS", "BLM", "BLZ", "BMU", "BOL", "BRA", "BRB", "CHL", "COL", "CRI",
            "CUB", "CUW", "CYM", "DMA", "DOM", "ECU", "FLK", "GLP", "GRD", "GTM", "GUF", "GUY", "HND", "HTI", "JAM",
            "KNA", "LCA", "MAF", "MEX", "MSR", "MTQ", "NIC", "PAN", "PER", "PRI", "PRY", "SLV", "SUR", "SXM", "TCA",
            "TTO", "UMI", "URY", "VCT", "VEN", "VGB", "VIR"
        ]
    },
    {
        'region_iso': 'PO',
        'region_name': 'Polar',
        'country_iso_3s': [
            "ATF", "BVT", "GRL", "HMD", "SGS"
        ]
    },
    {
        'region_iso': 'NA',
        'region_name': 'North America',
        'country_iso_3s': [
            "CAN", "SPM", "USA"
        ]
    },
    {
        'region_iso': 'WA',
        'region_name': 'West Asia',
        'country_iso_3s': [
            "ARE", "BHR", "IRQ", "JOR", "KWT", "LBN", "OMN", "PSE", "QAT", "SAU", "SYR", "YEM"
        ]
    }
]

# Convert the region data to a dictionary that maps each country to its region name
country_to_region = {}
for region in regions_data:
    for country in region['country_iso_3s']:
        country_to_region[country] = region['region_iso']

# Add region column to mpatlas_iso
mpatlas_iso['regions'] = mpatlas_iso['location_id'].map(country_to_region)

In [51]:
# Calculate area per protection level per region
prot_region = mpatlas_iso.groupby(['regions', 'protection_level']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})
prot_region['last_updated'] = datetime.now().year

In [53]:
# Calculate area per establishment stage per region
stage_region = mpatlas_iso.groupby(['regions', 'establishment_stage']).agg({'area_km2': 'sum'}).reset_index().rename(columns={'area_km2': 'area', 'regions': 'location_id'})
stage_region['last_updated'] = datetime.now().year

In [54]:
# Concatenate all dataframes for protection stats and establishment stage stats
prot = pd.concat([prot_iso, prot_global, prot_region], ignore_index=True)
stage = pd.concat([stage_iso, stage_global, stage_region], ignore_index=True)
prot.to_csv(path_out + "/tables/mpatlas_protection_level.csv", index=False)
stage.to_csv(path_out + "/tables/mpatlas_establishment_stage.csv", index=False)