### Set up

In [11]:
import os
from pathlib import Path
import geopandas as gpd
import pandas as pd

In [12]:
base = Path(os.getcwd()).parent

In [13]:
base.joinpath('data','protectedseas')

PosixPath('/home/mambauser/data/protectedseas')

In [19]:
path_in = base.joinpath("data", "ProtectedSeas")
path_out = base.joinpath("data", "ProtectedSeas")

### Processing

In [21]:
# Import shp containing geometries
ps = gpd.read_file(
    path_in.joinpath("ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp").as_posix()
)

In [22]:
# Import csv containing information
protectedseas = pd.read_csv(
    path_in.joinpath("ProtectedSeas_ProtectedSeas_06142023.csv")
    )

In [23]:
# Keep only rows in which wdpa_id is not null and it's different than 0
protectedseas = protectedseas[protectedseas['wdpa_id'].notna()]
protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']

In [24]:
# Join csv with shapefile and keep only wdpa geometries
ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')

In [25]:
# Keep only columns of interest
ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]
ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})

In [26]:
# ProtectedSeas only provides country names, not country codes. We need to add country codes to the dataframe
country_iso_dict = {
    'Antigua and Barbuda': 'ATG',
    'USA': 'USA',
    'Albania': 'ALB',
    'Netherlands Antilles': 'NLD',
    'United Arab Emirates': 'ARE',
    'Argentina': 'ARG',
    'France': 'FRA',
    'Australia': 'AUS',
    'Barbados': 'BRB',
    'Belgium': 'BEL',
    'Bangladesh': 'BGD',
    'Bulgaria': 'BGR',
    'Belize': 'BLZ',
    'Brazil': 'BRA',
    'Bahamas': 'BHS',
    'British Virgin Islands': 'GBR',
    'Canada': 'CAN',
    'Chile': 'CHL',
    'Cameroon': 'CMR',
    'Colombia': 'COL',
    'Comoros': 'COM',
    'Costa Rica': 'CRI',
    'Cuba': 'CUB',
    'Cyprus': 'CYP',
    'Germany': 'DEU',
    'Djibouti': 'DJI',
    'Djbouti': 'DJI',
    'Dominica': 'DMA',
    'Denmark': 'DNK',
    'Dominican Republic': 'DOM',
    'Algeria': 'DZA',
    'Ecuador': 'ECU',
    'Egypt': 'EGY',
    'Spain': 'ESP',
    'Estonia': 'EST',
    'Finland': 'FIN',
    'France, Italy, Monaco': 'FRA;ITA;MCO',
    'French Antilles': 'FRA',
    'Gabon': 'GAB',
    'United Kingdom': 'GBR',
    'Grenada': 'GRD',
    'Ghana': 'GHA',
    'Gibraltar': 'GBR',
    'Guinea': 'GIN',
    'The Gambia': 'GMB',
    'Guinea Bissau': 'GNB',
    'Greece': 'GRC',
    'Guatemala': 'GTM',
    'French Guyana': 'FRA',
    'Honduras': 'HND',
    'Croatia': 'HRV',
    'Indonesia': 'IDN',
    'Indonesia ': 'IDN',
    'India': 'IND',
    'Ireland': 'IRL',
    'Iceland': 'ISL',
    'Israel': 'ISR',
    'Italy': 'ITA',
    'Jamaica': 'JAM',
    'Jordan': 'JOR',
    'Japan': 'JPN',
    'Kenya': 'KEN',
    'Cambodia': 'KHM',
    'South Korea': 'KOR',
    'Cayman Islands': 'GBR',
    'Lebanon': 'LBN',
    'Liberia': 'LBR',
    'Saint Lucia': 'LCA',
    'Sri Lanka': 'LKA',
    'Lithuania': 'LTU',
    'Latvia': 'LVA',
    'Morocco': 'MAR',
    'Monaco': 'MCO',
    'Madagascar': 'MDG',
    'Republic of Maldives': 'MDV',
    'Malta': 'MLT',
    'Myanmar': 'MMR',
    'Mozambique': 'MOZ',
    'Mauritania': 'MRT',
    'Malaysia': 'MYS',
    'Namibia': 'NAM',
    'New Caledonia': 'FRA',
    'Niue': 'NIU',
    'The Netherlands': 'NLD',
    'Netherlands': 'NLD',
    'Norway': 'NOR',
    'New Zealand': 'NZL',
    'Panama': 'PAN',
    'British Overseas Territory - Pitcairn': 'GBR',
    'Peru': 'PER',
    'Philippines': 'PHL',
    'Republic of Palau': 'PLW',
    'Poland': 'POL',
    'Portugal': 'PRT',
    'Qatar': 'QAT',
    'Russia': 'RUS',
    'Senegal': 'SEN',
    'Saint Helena, Ascension and Tristan da Cunha Overseas Territory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',
    'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',
    'Solomon Islands': 'SLB',
    'El Salvador': 'SLV',
    'São Tomé and Príncipe': 'STP',
    'Suriname': 'SUR',
    'Slovenia': 'SVN',
    'Sweden': 'SWE',
    'Seychelles': 'SYC',
    'Turks and Caicos Islands': 'GBR',
    'Thailand': 'THA',
    'East Timor': 'TLS',
    'Tonga': 'TON',
    'Trinidad and Tobago': 'TTO',
    'Tunisia': 'TUN',
    'Tanzania': 'TZA',
    'Uruguay': 'URY',
    'Saint Vincent and the Grenadines': 'VCT',
    'Vietnam': 'VNM',
    'Yemen': 'YEM',
    'South Africa': 'ZAF',
    'USA; Haiti; Jamaica': 'USA;HTI;JAM',
}


In [27]:
# Add country code to the dataframe
def get_parent_iso(country):
    return country_iso_dict.get(country, None)

# Apply the function to create the 'PARENT_ISO' column
ps_gdf['parent_iso'] = ps_gdf['country'].apply(get_parent_iso)

In [28]:
# There is one row with no parent_iso so let's give it the corresponding country code
ps_gdf.loc[ps_gdf['parent_iso'].isna(), 'parent_iso'] = 'FRA'

In [29]:
# Reclassify FPS values
fps_classes = {
    1: 'less',
    2: 'less',
    3: 'moderately',
    4: 'highly',
    5: 'highly'
}

# Create a new column 'FPS_cat' based on the mapping
ps_gdf['FPS_cat'] = ps_gdf['FPS'].apply(lambda x: fps_classes.get(x, None))
ps_gdf.head(2)


Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry,parent_iso,FPS_cat
0,AIAG10,Low Bay Sanctuary,Antigua and Barbuda,555587197,5.0,48.321285,"POLYGON ((-61.91090 17.57960, -61.91096 17.579...",ATG,highly
1,AIAG11,Nelson's Dockyard National Park,Antigua and Barbuda,555587192,1.0,40.705369,"POLYGON ((-61.75807 17.03541, -61.73745 17.021...",ATG,less


In [33]:
ps_gdf.columns.values

array(['site_id', 'site_name', 'country', 'wdpa_id', 'FPS', 'total_area',
       'geometry', 'parent_iso', 'FPS_cat'], dtype=object)

In [31]:
ps_gdf.to_file(path_out.joinpath("protectedseas.shp").as_posix())

### Global stats

In [16]:
global_area = ps_gdf.groupby(['FPS_cat'], as_index=False)['total_area'].sum().rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})
global_area['location_id'] = 'GLOB'

### Country stats

In [17]:
# Create a mask for rows with multiple values in 'iso_code'
mask = ps_gdf['parent_iso'].str.contains(';', na=False)

# Split the 'iso_code' values and create separate rows only for rows with multiple values
split_rows = ps_gdf[mask].copy()
split_rows['parent_iso'] = split_rows['parent_iso'].str.split(';')
split_rows = split_rows.explode('parent_iso')

# Keep rows with single values in 'iso_code'
single_value_rows = ps_gdf[~mask]

# Concatenate the exploded rows with the single value rows
ps_iso = pd.concat([single_value_rows, split_rows], ignore_index=True)

In [18]:
# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)
regions_data = [
    {
        'region_iso': 'AS',
        'region_name': 'Asia & Pacific',
        'country_iso_3s': [
            "AFG", "ASM", "AUS", "BGD", "BRN", "BTN", "CCK", "CHN", "COK", "CXR", "FJI", "FSM", "GUM", "HKG", "IDN",
            "IND", "IOT", "IRN", "JPN", "KHM", "KIR", "KOR", "LAO", "LKA", "MAC", "MDV", "MHL", "MMR", "MNG", "MNP",
            "MYS", "NCL", "NFK", "NIU", "NPL", "NRU", "NZL", "PAK", "PCN", "PHL", "PLW", "PNG", "PRK", "PYF", "SGP",
            "SLB", "THA", "TKL", "TLS", "TON", "TUV", "TWN", "VNM", "VUT", "WLF", "WSM"
        ]
    },
    {
        'region_iso': 'AF',
        'region_name': 'Africa',
        'country_iso_3s': [
            "AGO", "BDI", "BEN", "BFA", "BWA", "CAF", "CIV", "CMR", "COD", "COG", "COM", "CPV", "DJI", "DZA", "EGY",
            "ERI", "ESH", "ETH", "GAB", "GHA", "GIN", "GMB", "GNB", "GNQ", "KEN", "LBR", "LBY", "LSO", "MAR", "MDG",
            "MLI", "MOZ", "MRT", "MUS", "MWI", "MYT", "NAM", "NER", "NGA", "REU", "RWA", "SDN", "SEN", "SHN", "SLE",
            "SOM", "SSD", "STP", "SWZ", "SYC", "TCD", "TGO", "TUN", "TZA", "UGA", "ZAF", "ZMB", "ZWE"
        ]
    },
    {
        'region_iso': 'EU',
        'region_name': 'Europe',
        'country_iso_3s': [
            "ALA", "ALB", "AND", "ARM", "AUT", "AZE", "BEL", "BGR", "BIH", "BLR", "CHE", "CYP", "CZE", "DEU", "DNK",
            "ESP", "EST", "FIN", "FRA", "FRO", "GBR", "GEO", "GGY", "GIB", "GRC", "HRV", "HUN", "IMN", "IRL", "ISL",
            "ISR", "ITA", "JEY", "KAZ", "KGZ", "LIE", "LTU", "LUX", "LVA", "MCO", "MDA", "MKD", "MLT", "MNE", "NLD",
            "NOR", "POL", "PRT", "ROU", "RUS", "SJM", "SMR", "SRB", "SVK", "SVN", "SWE", "TJK", "TKM", "TUR", "UKR",
            "UZB", "VAT"
        ]
    },
    {
        'region_iso': 'SA',
        'region_name': 'Latin America & Caribbean',
        'country_iso_3s': [
            "ABW", "AIA", "ARG", "ATG", "BES", "BHS", "BLM", "BLZ", "BMU", "BOL", "BRA", "BRB", "CHL", "COL", "CRI",
            "CUB", "CUW", "CYM", "DMA", "DOM", "ECU", "FLK", "GLP", "GRD", "GTM", "GUF", "GUY", "HND", "HTI", "JAM",
            "KNA", "LCA", "MAF", "MEX", "MSR", "MTQ", "NIC", "PAN", "PER", "PRI", "PRY", "SLV", "SUR", "SXM", "TCA",
            "TTO", "UMI", "URY", "VCT", "VEN", "VGB", "VIR"
        ]
    },
    {
        'region_iso': 'PO',
        'region_name': 'Polar',
        'country_iso_3s': [
            "ATF", "BVT", "GRL", "HMD", "SGS"
        ]
    },
    {
        'region_iso': 'NA',
        'region_name': 'North America',
        'country_iso_3s': [
            "CAN", "SPM", "USA"
        ]
    },
        {
        'region_iso': 'WA',
        'region_name': 'West Asia',
        'country_iso_3s': [
            "ARE", "BHR", "IRQ", "JOR", "KWT", "LBN", "OMN", "PSE", "QAT", "SAU", "SYR", "YEM"
        ]
    },
    {
        'region_iso': 'AT', # this region is not in the Protected Planet database
        'region_name': 'Antartica',
        'country_iso_3s': [
            "ATA"
        ]
    }
]

# Convert the region data to a dictionary that maps each country to its region name
country_to_region = {}
for region in regions_data:
    for country in region['country_iso_3s']:
        country_to_region[country] = region['region_iso']

# Create a new column 'region' based on the mapping
ps_iso['region'] = ps_iso['parent_iso'].map(country_to_region)

In [20]:
country_area = ps_iso.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()
country_area = country_area.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})

In [21]:
region_area = ps_iso.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()
region_area = region_area.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})

In [22]:
ps_coverage = pd.concat([country_area, region_area, global_area], ignore_index=True)

In [23]:
ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)