### Set up

In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
path_in = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/"
path_out = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/"

### Processing

In [3]:
# Import shp containing geometries
ps = gpd.read_file(path_in + "ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023_shp_ProtectedSeas_06142023_shp.shp")
ps.head(2)

Unnamed: 0,SITE_ID,SITE_NAME,geometry
0,AIAG1,Antigua & Barbuda EEZ (0-200NM),"POLYGON ((-58.87762 19.58008, -58.82167 19.502..."
1,AIAG10,Low Bay Sanctuary,"POLYGON ((-61.91090 17.57960, -61.91096 17.579..."


In [4]:
# Import csv containing information
protectedseas = pd.read_csv(path_in + "ProtectedSeas/ProtectedSeas_ProtectedSeas_06142023.csv")
protectedseas.columns

Index(['site_id', 'site_name', 'url', 'country', 'state', 'managing_authority',
       'designation', 'purpose', 'restrictions', 'allowed', 'season',
       'effective_from', 'effective_to', 'report_violations', 'latest_updates',
       'protection_focus', 'species_of_concern',
       'removal_of_marine_life_is_prohibited', 'entry_prohibited',
       'speed_restricted', 'discharge_prohibited', 'diving_prohibited',
       'removal_of_historic_artifacts_prohibited', 'stopping_prohibited',
       'anchoring_prohibited', 'landing_prohibited', 'dragging_prohibited',
       'dredging_prohibited', 'industrial_or_mineral_exploration_prohibited',
       'construction_prohibited', 'drilling_prohibited',
       'overflight_or_drones_prohibited', 'tribal', 'bottom_trawl',
       'gillnetting', 'hook_n_line', 'trolling', 'nets', 'traps_n_pots',
       'spear_fishing', 'longlining', 'misc_gear', 'recreational_restrictions',
       'commercial_restrictions', 'wdpa_id', 'iucn_cat', 'year_est',
       

In [5]:
protectedseas.shape

(21197, 53)

In [6]:
# Keep only rows in which wdpa_id is not null and it's different than 0
protectedseas = protectedseas[protectedseas['wdpa_id'].notna()]
protectedseas = protectedseas[protectedseas['wdpa_id']!= '0']
protectedseas.shape

(10879, 53)

In [7]:
# Join csv with shapefile and keep only wdpa geometries
ps_gdf = ps.merge(protectedseas, how='inner', left_on='SITE_ID', right_on='site_id')
ps_gdf.shape

(10879, 56)

In [8]:
ps_gdf = ps_gdf[['site_id','site_name', 'country', 'wdpa_id', 'removal_of_marine_life_is_prohibited','total_area','geometry']]
ps_gdf = ps_gdf.rename(columns={'removal_of_marine_life_is_prohibited':'FPS'})
ps_gdf.head(5)

Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry
0,AIAG10,Low Bay Sanctuary,Antigua and Barbuda,555587197,5.0,48.321285,"POLYGON ((-61.91090 17.57960, -61.91096 17.579..."
1,AIAG11,Nelson's Dockyard National Park,Antigua and Barbuda,555587192,1.0,40.705369,"POLYGON ((-61.75807 17.03541, -61.73745 17.021..."
2,AIAG13,Palastar Reef Sanctuary,Antigua and Barbuda,555587195,5.0,22.754514,"POLYGON ((-61.73350 17.52440, -61.78360 17.497..."
3,AIAG14,Palaster Reef Marine National Park,Antigua and Barbuda,2,1.0,3.207554,"POLYGON ((-61.74275 17.51737, -61.77440 17.523..."
4,AIAG16,Two Foot Bay Sanctuary,Antigua and Barbuda,555587198,5.0,47.9332,"POLYGON ((-61.70064 17.66752, -61.70039 17.667..."


In [9]:
# Save txt file with unique names for countries
countries = ps_gdf['country'].unique()
countries

array(['Antigua and Barbuda', 'USA', 'Albania', 'Netherlands Antilles',
       'United Arab Emirates', 'Argentina', 'France', 'Australia',
       'Barbados', 'Belgium', 'Bangladesh', 'Bulgaria', 'Belize',
       'Brazil', 'Bahamas', 'British Virgin Islands', 'Canada', 'Chile',
       'Cameroon', 'Colombia', 'Comoros', 'Costa Rica', 'Cuba', 'Cyprus',
       'Germany', 'Djbouti', 'Dominica', 'Denmark', 'Dominican Republic',
       'Algeria', 'Ecuador', 'Egypt', 'Spain', 'Estonia', 'Finland',
       'France, Italy, Monaco', 'French Antilles', nan, 'Gabon',
       'United Kingdom', 'Grenada', 'Ghana', 'Gibraltar', 'Guinea',
       'The Gambia', 'Guinea Bissau', 'Greece', 'Guatemala',
       'French Guyana', 'Honduras', 'Croatia', 'Indonesia', 'Indonesia ',
       'India', 'Ireland', 'Iceland', 'Israel', 'Italy', 'Jamaica',
       'Jordan', 'Japan', 'Kenya', 'Cambodia', 'South Korea',
       'Cayman Islands', 'Lebanon', 'Liberia', 'Saint Lucia', 'Sri Lanka',
       'Lithuania', 'Latvia', 'M

In [18]:
country_iso_dict = {
    'Antigua and Barbuda': 'ATG',
    'USA': 'USA',
    'Albania': 'ALB',
    'Netherlands Antilles': 'NLD',
    'United Arab Emirates': 'ARE',
    'Argentina': 'ARG',
    'France': 'FRA',
    'Australia': 'AUS',
    'Barbados': 'BRB',
    'Belgium': 'BEL',
    'Bangladesh': 'BGD',
    'Bulgaria': 'BGR',
    'Belize': 'BLZ',
    'Brazil': 'BRA',
    'Bahamas': 'BHS',
    'British Virgin Islands': 'GBR',
    'Canada': 'CAN',
    'Chile': 'CHL',
    'Cameroon': 'CMR',
    'Colombia': 'COL',
    'Comoros': 'COM',
    'Costa Rica': 'CRI',
    'Cuba': 'CUB',
    'Cyprus': 'CYP',
    'Germany': 'DEU',
    'Djibouti': 'DJI',
    'Djbouti': 'DJI',
    'Dominica': 'DMA',
    'Denmark': 'DNK',
    'Dominican Republic': 'DOM',
    'Algeria': 'DZA',
    'Ecuador': 'ECU',
    'Egypt': 'EGY',
    'Spain': 'ESP',
    'Estonia': 'EST',
    'Finland': 'FIN',
    'France, Italy, Monaco': 'FRA;ITA;MCO',
    'French Antilles': 'FRA',
    'Gabon': 'GAB',
    'United Kingdom': 'GBR',
    'Grenada': 'GRD',
    'Ghana': 'GHA',
    'Gibraltar': 'GBR',
    'Guinea': 'GIN',
    'The Gambia': 'GMB',
    'Guinea Bissau': 'GNB',
    'Greece': 'GRC',
    'Guatemala': 'GTM',
    'French Guyana': 'FRA',
    'Honduras': 'HND',
    'Croatia': 'HRV',
    'Indonesia': 'IDN',
    'Indonesia ': 'IDN',
    'India': 'IND',
    'Ireland': 'IRL',
    'Iceland': 'ISL',
    'Israel': 'ISR',
    'Italy': 'ITA',
    'Jamaica': 'JAM',
    'Jordan': 'JOR',
    'Japan': 'JPN',
    'Kenya': 'KEN',
    'Cambodia': 'KHM',
    'South Korea': 'KOR',
    'Cayman Islands': 'GBR',
    'Lebanon': 'LBN',
    'Liberia': 'LBR',
    'Saint Lucia': 'LCA',
    'Sri Lanka': 'LKA',
    'Lithuania': 'LTU',
    'Latvia': 'LVA',
    'Morocco': 'MAR',
    'Monaco': 'MCO',
    'Madagascar': 'MDG',
    'Republic of Maldives': 'MDV',
    'Malta': 'MLT',
    'Myanmar': 'MMR',
    'Mozambique': 'MOZ',
    'Mauritania': 'MRT',
    'Malaysia': 'MYS',
    'Namibia': 'NAM',
    'New Caledonia': 'FRA',
    'Niue': 'NIU',
    'The Netherlands': 'NLD',
    'Netherlands': 'NLD',
    'Norway': 'NOR',
    'New Zealand': 'NZL',
    'Panama': 'PAN',
    'British Overseas Territory - Pitcairn': 'GBR',
    'Peru': 'PER',
    'Philippines': 'PHL',
    'Republic of Palau': 'PLW',
    'Poland': 'POL',
    'Portugal': 'PRT',
    'Qatar': 'QAT',
    'Russia': 'RUS',
    'Senegal': 'SEN',
    'Saint Helena, Ascension and Tristan da Cunha Overseas Territory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',
    'Saint Helena, Ascension and Tristan da Cunha Overseas Teritory of the United Kingdom of Great Britain and Northern Ireland': 'GBR',
    'Solomon Islands': 'SLB',
    'El Salvador': 'SLV',
    'São Tomé and Príncipe': 'STP',
    'Suriname': 'SUR',
    'Slovenia': 'SVN',
    'Sweden': 'SWE',
    'Seychelles': 'SYC',
    'Turks and Caicos Islands': 'GBR',
    'Thailand': 'THA',
    'East Timor': 'TLS',
    'Tonga': 'TON',
    'Trinidad and Tobago': 'TTO',
    'Tunisia': 'TUN',
    'Tanzania': 'TZA',
    'Uruguay': 'URY',
    'Saint Vincent and the Grenadines': 'VCT',
    'Vietnam': 'VNM',
    'Yemen': 'YEM',
    'South Africa': 'ZAF',
    'USA; Haiti; Jamaica': 'USA;HTI;JAM',
}


In [19]:
def get_parent_iso(country):
    return country_iso_dict.get(country, None)

# Apply the function to create the 'PARENT_ISO' column
ps_gdf['parent_iso'] = ps_gdf['country'].apply(get_parent_iso)

In [20]:
ps_gdf.shape

(10879, 8)

In [21]:
ps_gdf[ps_gdf['parent_iso'].isna()]

Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry,parent_iso
3372,AIFRCAR91,pointe Pasquereau - pointe Saint-Vaast,,392107,2.0,1.642913,"MULTIPOLYGON (((-61.59887 16.27728, -61.59861 ...",


In [22]:
ps_gdf['parent_iso'][ps_gdf['parent_iso'].isna()] = 'FRA'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ps_gdf['parent_iso'][ps_gdf['parent_iso'].isna()] = 'FRA'


In [23]:
len(ps_gdf['country'][ps_gdf['parent_iso'].isna()])

0

In [24]:
ps_gdf[ps_gdf['parent_iso'].str.contains(';')]

Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry,parent_iso
3246,AIFRA3,Pelagos / Pelagos Sanctuary For The Conservati...,"France, Italy, Monaco",365015,1.0,87830.341648,"POLYGON ((8.76729 44.42728, 8.77066 44.42690, ...",FRA;ITA;MCO
10598,NWR184,Navassa Island National Wildlife Refuge,USA; Haiti; Jamaica,555608120,5.0,1473.656765,"POLYGON ((-75.00384 18.20744, -75.01645 18.207...",USA;HTI;JAM


In [25]:
# Create a mask for rows with multiple values in 'iso_code'
mask = ps_gdf['parent_iso'].str.contains(';', na=False)

# Split the 'iso_code' values and create separate rows only for rows with multiple values
split_rows = ps_gdf[mask].copy()
split_rows['parent_iso'] = split_rows['parent_iso'].str.split(';')
split_rows = split_rows.explode('parent_iso')

# Keep rows with single values in 'iso_code'
single_value_rows = ps_gdf[~mask]

# Concatenate the exploded rows with the single value rows
ps_new = pd.concat([single_value_rows, split_rows], ignore_index=True)

ps_new.shape

(10883, 8)

In [26]:
ps_new[ps_new['parent_iso'].str.contains(';')]

Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry,parent_iso


In [27]:
ps_new[ps_new['country']=='USA; Haiti; Jamaica']

Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry,parent_iso
10880,NWR184,Navassa Island National Wildlife Refuge,USA; Haiti; Jamaica,555608120,5.0,1473.656765,"POLYGON ((-75.00384 18.20744, -75.01645 18.207...",USA
10881,NWR184,Navassa Island National Wildlife Refuge,USA; Haiti; Jamaica,555608120,5.0,1473.656765,"POLYGON ((-75.00384 18.20744, -75.01645 18.207...",HTI
10882,NWR184,Navassa Island National Wildlife Refuge,USA; Haiti; Jamaica,555608120,5.0,1473.656765,"POLYGON ((-75.00384 18.20744, -75.01645 18.207...",JAM


In [28]:
# Reclassify FPS values
fps_classes = {
    1: 'Less',
    2: 'Less',
    3: 'Moderately',
    4: 'Highly',
    5: 'Highly'
}

# Create a new column 'FPS_cat' based on the mapping
ps_new['FPS_cat'] = ps_new['FPS'].apply(lambda x: fps_classes.get(x, None))
ps_new.head(2)


Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry,parent_iso,FPS_cat
0,AIAG10,Low Bay Sanctuary,Antigua and Barbuda,555587197,5.0,48.321285,"POLYGON ((-61.91090 17.57960, -61.91096 17.579...",ATG,Highly
1,AIAG11,Nelson's Dockyard National Park,Antigua and Barbuda,555587192,1.0,40.705369,"POLYGON ((-61.75807 17.03541, -61.73745 17.021...",ATG,Less


In [29]:
# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)
regions_data = [
    {
        'region_iso': 'AS',
        'region_name': 'Asia & Pacific',
        'country_iso_3s': [
            "AFG", "ASM", "AUS", "BGD", "BRN", "BTN", "CCK", "CHN", "COK", "CXR", "FJI", "FSM", "GUM", "HKG", "IDN",
            "IND", "IOT", "IRN", "JPN", "KHM", "KIR", "KOR", "LAO", "LKA", "MAC", "MDV", "MHL", "MMR", "MNG", "MNP",
            "MYS", "NCL", "NFK", "NIU", "NPL", "NRU", "NZL", "PAK", "PCN", "PHL", "PLW", "PNG", "PRK", "PYF", "SGP",
            "SLB", "THA", "TKL", "TLS", "TON", "TUV", "TWN", "VNM", "VUT", "WLF", "WSM"
        ]
    },
    {
        'region_iso': 'AF',
        'region_name': 'Africa',
        'country_iso_3s': [
            "AGO", "BDI", "BEN", "BFA", "BWA", "CAF", "CIV", "CMR", "COD", "COG", "COM", "CPV", "DJI", "DZA", "EGY",
            "ERI", "ESH", "ETH", "GAB", "GHA", "GIN", "GMB", "GNB", "GNQ", "KEN", "LBR", "LBY", "LSO", "MAR", "MDG",
            "MLI", "MOZ", "MRT", "MUS", "MWI", "MYT", "NAM", "NER", "NGA", "REU", "RWA", "SDN", "SEN", "SHN", "SLE",
            "SOM", "SSD", "STP", "SWZ", "SYC", "TCD", "TGO", "TUN", "TZA", "UGA", "ZAF", "ZMB", "ZWE"
        ]
    },
    {
        'region_iso': 'EU',
        'region_name': 'Europe',
        'country_iso_3s': [
            "ALA", "ALB", "AND", "ARM", "AUT", "AZE", "BEL", "BGR", "BIH", "BLR", "CHE", "CYP", "CZE", "DEU", "DNK",
            "ESP", "EST", "FIN", "FRA", "FRO", "GBR", "GEO", "GGY", "GIB", "GRC", "HRV", "HUN", "IMN", "IRL", "ISL",
            "ISR", "ITA", "JEY", "KAZ", "KGZ", "LIE", "LTU", "LUX", "LVA", "MCO", "MDA", "MKD", "MLT", "MNE", "NLD",
            "NOR", "POL", "PRT", "ROU", "RUS", "SJM", "SMR", "SRB", "SVK", "SVN", "SWE", "TJK", "TKM", "TUR", "UKR",
            "UZB", "VAT"
        ]
    },
    {
        'region_iso': 'SA',
        'region_name': 'Latin America & Caribbean',
        'country_iso_3s': [
            "ABW", "AIA", "ARG", "ATG", "BES", "BHS", "BLM", "BLZ", "BMU", "BOL", "BRA", "BRB", "CHL", "COL", "CRI",
            "CUB", "CUW", "CYM", "DMA", "DOM", "ECU", "FLK", "GLP", "GRD", "GTM", "GUF", "GUY", "HND", "HTI", "JAM",
            "KNA", "LCA", "MAF", "MEX", "MSR", "MTQ", "NIC", "PAN", "PER", "PRI", "PRY", "SLV", "SUR", "SXM", "TCA",
            "TTO", "UMI", "URY", "VCT", "VEN", "VGB", "VIR"
        ]
    },
    {
        'region_iso': 'PO',
        'region_name': 'Polar',
        'country_iso_3s': [
            "ATF", "BVT", "GRL", "HMD", "SGS"
        ]
    },
    {
        'region_iso': 'NA',
        'region_name': 'North America',
        'country_iso_3s': [
            "CAN", "SPM", "USA"
        ]
    },
    {
        'region_iso': 'GL',
        'region_name': 'Global',
        'country_iso_3s': ['GLOB']
    },
    {
        'region_iso': 'WA',
        'region_name': 'West Asia',
        'country_iso_3s': [
            "ARE", "BHR", "IRQ", "JOR", "KWT", "LBN", "OMN", "PSE", "QAT", "SAU", "SYR", "YEM"
        ]
    },
    {
        'region_iso': 'AT', # this region is not in the Protected Planet database
        'region_name': 'Antartica',
        'country_iso_3s': [
            "ATA"
        ]
    },
    {
        'region_iso': 'ABNJ', # this region is not in the Protected Planet database
        'region_name': 'Areas Beyond National Jurisdiction',
        'country_iso_3s': [
            "ABNJ"
        ]
    }
]

# Convert the region data to a dictionary that maps each country to its region name
country_to_region = {}
for region in regions_data:
    for country in region['country_iso_3s']:
        country_to_region[country] = region['region_iso']

In [30]:
ps_new['region'] = ps_new['parent_iso'].map(country_to_region)
ps_new.shape

(10883, 10)

In [31]:
ps_new.head(2)

Unnamed: 0,site_id,site_name,country,wdpa_id,FPS,total_area,geometry,parent_iso,FPS_cat,region
0,AIAG10,Low Bay Sanctuary,Antigua and Barbuda,555587197,5.0,48.321285,"POLYGON ((-61.91090 17.57960, -61.91096 17.579...",ATG,Highly,SA
1,AIAG11,Nelson's Dockyard National Park,Antigua and Barbuda,555587192,1.0,40.705369,"POLYGON ((-61.75807 17.03541, -61.73745 17.021...",ATG,Less,SA


In [32]:
ps_new.to_file(path_out + "protectedseas/protectedseas.shp")

#### Calculate coverage

In [33]:
ps_country = ps_new.groupby(['parent_iso', 'FPS_cat'], as_index=False)['total_area'].sum()
ps_country = ps_country.rename(columns={'parent_iso':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})
ps_country.head(10)

Unnamed: 0,location_id,fishing_protection_level,area
0,ALB,Less,197.296039
1,ALB,Moderately,344.028483
2,ARE,Highly,6962.49753
3,ARG,Highly,5958.615237
4,ARG,Less,38419.623899
5,ARG,Moderately,7546.694893
6,ATG,Highly,168.263212
7,ATG,Less,45.247978
8,ATG,Moderately,21.163152
9,AUS,Highly,723824.4631


In [34]:
ps_region = ps_new.groupby(['region', 'FPS_cat'], as_index=False)['total_area'].sum()
ps_region = ps_region.rename(columns={'region':'location_id', 'FPS_cat':'fishing_protection_level', 'total_area':'area'})
ps_region.head(10)

Unnamed: 0,location_id,fishing_protection_level,area
0,AF,Highly,35181.55
1,AF,Less,286264.3
2,AF,Moderately,42531.44
3,AS,Highly,884647.9
4,AS,Less,1659106.0
5,AS,Moderately,282526.4
6,EU,Highly,2679297.0
7,EU,Less,3761438.0
8,EU,Moderately,1884751.0
9,,Highly,3961031.0


In [35]:
ps_global = ps_new.groupby(['FPS_cat'], as_index=False)['total_area'].sum()
ps_global['location_id'] = 'GLOB'
ps_global = ps_global.rename(columns={'FPS_cat':'fishing_protection_level', 'total_area':'area'})
ps_global

Unnamed: 0,fishing_protection_level,area,location_id
0,Highly,8180599.0,GLOB
1,Less,9942402.0,GLOB
2,Moderately,3177137.0,GLOB


In [36]:
ps_coverage = pd.concat([ps_country, ps_region, ps_global], ignore_index=True)
ps_coverage

Unnamed: 0,location_id,fishing_protection_level,area
0,ALB,Less,1.972960e+02
1,ALB,Moderately,3.440285e+02
2,ARE,Highly,6.962498e+03
3,ARG,Highly,5.958615e+03
4,ARG,Less,3.841962e+04
...,...,...,...
216,WA,Highly,1.024296e+04
217,WA,Less,2.129971e+04
218,GLOB,Highly,8.180599e+06
219,GLOB,Less,9.942402e+06


In [40]:
ps_coverage.to_csv(path_out + '/tables/fishing_protection_level.csv', index=False)