### Set up

In [6]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.0/250.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m[36m0:00:01[0m[36m0:00:01[0m:01[0m
[?25hCollecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2


In [7]:
import geopandas as gpd
import pandas as pd
import openpyxl

In [8]:
path_in = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/"
path_out = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/"

### Process habitats from [Ocean+](https://habitats.oceanplus.org/) (except mangroves)

In [59]:
cold = pd.read_csv(path_in + "Ocean+HabitatsDownload_Global/coldwatercorals.csv")
salt = pd.read_csv(path_in + "Ocean+HabitatsDownload_Global/saltmarshes.csv")
sea = pd.read_csv(path_in + "Ocean+HabitatsDownload_Global/seagrasses.csv")
warm = pd.read_csv(path_in + "Ocean+HabitatsDownload_Global/warmwatercorals.csv")
glob = pd.read_excel(path_in + "Ocean+HabitatsDownload_Global/global-stats.xlsx")

In [64]:
cold2 = cold[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})
salt2 = salt[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})
sea2 = sea[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})
warm2 = warm[['ISO3', 'protected_area', 'total_area']].rename(columns={'ISO3': 'location_id'})

In [65]:
# Remove rows with '-' in 'protected_area' or 'total_area'
cold2 = cold2[~cold2['protected_area'].str.contains('-') & ~cold2['total_area'].str.contains('-')]
salt2 = salt2[~salt2['protected_area'].str.contains('-') & ~salt2['total_area'].str.contains('-')]
sea2 = sea2[~sea2['protected_area'].str.contains('-') & ~sea2['total_area'].str.contains('-')]
warm2 = warm2[~warm2['protected_area'].str.contains('-') & ~warm2['total_area'].str.contains('-')]


In [39]:
# Bring the wdpa file to get the iso3 and parent_iso equivalences
wdpa = gpd.read_file(path_out + "wdpa/merged_mpa.shp")

# Filter out rows with multiple values in either 'ISO3' or 'PARENT_ISO'
wdpa = wdpa[~wdpa['ISO3'].str.contains(';') & ~wdpa['PARENT_ISO'].str.contains(';')]

# Extract unique ISO3-PARENT_ISO pairs
unique_pairs = wdpa[['ISO3', 'PARENT_ISO']].drop_duplicates()

In [66]:
# Create a mapping dictionary for ISO3-PARENT_ISO pairs and modify the 'location_id' column in the habitats dataframes
mapping_dict = dict(zip(unique_pairs['ISO3'], unique_pairs['PARENT_ISO']))
cold2['location_id'] = cold2['location_id'].map(mapping_dict)
salt2['location_id'] = salt2['location_id'].map(mapping_dict)
sea2['location_id'] = sea2['location_id'].map(mapping_dict)
warm2['location_id'] = warm2['location_id'].map(mapping_dict)

In [85]:
# Group by 'location_id' and calculate the sum of 'protected_area' and 'total_area'
cold2_grouped = cold2.groupby('location_id').sum().reset_index()
salt2_grouped = salt2.groupby('location_id').sum().reset_index()
sea2_grouped = sea2.groupby('location_id').sum().reset_index()
warm2_grouped = warm2.groupby('location_id').sum().reset_index()

In [91]:
# Add the 'habitat_name' column
cold2_grouped['habitat_name'] = 'cold-water corals'
salt2_grouped['habitat_name'] = 'saltmarshes'
sea2_grouped['habitat_name'] = 'seagrasses'
warm2_grouped['habitat_name'] = 'warm-water corals'

In [198]:
# Concatenate the dataframes
habitats = pd.concat([cold2_grouped, salt2_grouped, sea2_grouped, warm2_grouped])
habitats['year'] = 2023
habitats.head(2)

Unnamed: 0,location_id,protected_area,total_area,habitat_name,year
0,ABNJ,421.629373,1874.982214,cold-water corals,2023
1,AGO,0.0,3.395671,cold-water corals,2023


In [199]:
# Calculate global stats for habitats
habitats_global = habitats.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()
habitats_global['location_id'] = 'GLOB'
habitats_global['year'] = 2023
habitats_global

Unnamed: 0,habitat_name,protected_area,total_area,location_id,year
0,cold-water corals,4214.897203,15057.016684,GLOB,2023
1,saltmarshes,111540.045205,217798.398466,GLOB,2023
2,seagrasses,71543.667168,295004.516919,GLOB,2023
3,warm-water corals,62074.76855,147100.573092,GLOB,2023


In [200]:
# Concatenate the global stats to the habitats dataframe
habitats = pd.concat([habitats, habitats_global])
habitats

Unnamed: 0,location_id,protected_area,total_area,habitat_name,year
0,ABNJ,421.629373,1874.982214,cold-water corals,2023
1,AGO,0.000000,3.395671,cold-water corals,2023
2,ALB,0.000000,5.986479,cold-water corals,2023
3,ARG,6.984226,61.826344,cold-water corals,2023
4,ATG,0.000000,0.997747,cold-water corals,2023
...,...,...,...,...,...
81,ZAF,1.398813,1.398813,warm-water corals,2023
0,GLOB,4214.897203,15057.016684,cold-water corals,2023
1,GLOB,111540.045205,217798.398466,saltmarshes,2023
2,GLOB,71543.667168,295004.516919,seagrasses,2023


In [201]:
# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)
regions_data = [
    {
        'region_iso': 'AS',
        'region_name': 'Asia & Pacific',
        'country_iso_3s': [
            "AFG", "ASM", "AUS", "BGD", "BRN", "BTN", "CCK", "CHN", "COK", "CXR", "FJI", "FSM", "GUM", "HKG", "IDN",
            "IND", "IOT", "IRN", "JPN", "KHM", "KIR", "KOR", "LAO", "LKA", "MAC", "MDV", "MHL", "MMR", "MNG", "MNP",
            "MYS", "NCL", "NFK", "NIU", "NPL", "NRU", "NZL", "PAK", "PCN", "PHL", "PLW", "PNG", "PRK", "PYF", "SGP",
            "SLB", "THA", "TKL", "TLS", "TON", "TUV", "TWN", "VNM", "VUT", "WLF", "WSM"
        ]
    },
    {
        'region_iso': 'AF',
        'region_name': 'Africa',
        'country_iso_3s': [
            "AGO", "BDI", "BEN", "BFA", "BWA", "CAF", "CIV", "CMR", "COD", "COG", "COM", "CPV", "DJI", "DZA", "EGY",
            "ERI", "ESH", "ETH", "GAB", "GHA", "GIN", "GMB", "GNB", "GNQ", "KEN", "LBR", "LBY", "LSO", "MAR", "MDG",
            "MLI", "MOZ", "MRT", "MUS", "MWI", "MYT", "NAM", "NER", "NGA", "REU", "RWA", "SDN", "SEN", "SHN", "SLE",
            "SOM", "SSD", "STP", "SWZ", "SYC", "TCD", "TGO", "TUN", "TZA", "UGA", "ZAF", "ZMB", "ZWE"
        ]
    },
    {
        'region_iso': 'EU',
        'region_name': 'Europe',
        'country_iso_3s': [
            "ALA", "ALB", "AND", "ARM", "AUT", "AZE", "BEL", "BGR", "BIH", "BLR", "CHE", "CYP", "CZE", "DEU", "DNK",
            "ESP", "EST", "FIN", "FRA", "FRO", "GBR", "GEO", "GGY", "GIB", "GRC", "HRV", "HUN", "IMN", "IRL", "ISL",
            "ISR", "ITA", "JEY", "KAZ", "KGZ", "LIE", "LTU", "LUX", "LVA", "MCO", "MDA", "MKD", "MLT", "MNE", "NLD",
            "NOR", "POL", "PRT", "ROU", "RUS", "SJM", "SMR", "SRB", "SVK", "SVN", "SWE", "TJK", "TKM", "TUR", "UKR",
            "UZB", "VAT"
        ]
    },
    {
        'region_iso': 'SA',
        'region_name': 'Latin America & Caribbean',
        'country_iso_3s': [
            "ABW", "AIA", "ARG", "ATG", "BES", "BHS", "BLM", "BLZ", "BMU", "BOL", "BRA", "BRB", "CHL", "COL", "CRI",
            "CUB", "CUW", "CYM", "DMA", "DOM", "ECU", "FLK", "GLP", "GRD", "GTM", "GUF", "GUY", "HND", "HTI", "JAM",
            "KNA", "LCA", "MAF", "MEX", "MSR", "MTQ", "NIC", "PAN", "PER", "PRI", "PRY", "SLV", "SUR", "SXM", "TCA",
            "TTO", "UMI", "URY", "VCT", "VEN", "VGB", "VIR"
        ]
    },
    {
        'region_iso': 'PO',
        'region_name': 'Polar',
        'country_iso_3s': [
            "ATF", "BVT", "GRL", "HMD", "SGS"
        ]
    },
    {
        'region_iso': 'NA',
        'region_name': 'North America',
        'country_iso_3s': [
            "CAN", "SPM", "USA"
        ]
    },
    
    {
        'region_iso': 'WA',
        'region_name': 'West Asia',
        'country_iso_3s': [
            "ARE", "BHR", "IRQ", "JOR", "KWT", "LBN", "OMN", "PSE", "QAT", "SAU", "SYR", "YEM"
        ]
    },
    {
        'region_iso': 'AT', # this region is not in the Protected Planet database
        'region_name': 'Antartica',
        'country_iso_3s': [
            "ATA"
        ]
    }
]

# Convert the region data to a dictionary that maps each country to its region name
country_to_region = {}
for region in regions_data:
    for country in region['country_iso_3s']:
        country_to_region[country] = region['region_iso']

In [202]:
habitats_regions = habitats.copy()
habitats_regions['region'] = habitats['location_id'].map(country_to_region)

# Calculate stats for each region
habitats_regions = habitats_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()
habitats_regions['year'] = 2023
habitats_regions.rename(columns={'region': 'location_id'}, inplace=True)
habitats_regions


Unnamed: 0,location_id,habitat_name,protected_area,total_area,year
0,AF,cold-water corals,37.761626,381.993234,2023
1,AF,saltmarshes,6688.702879,19845.915,2023
2,AF,seagrasses,6319.099491,61939.484904,2023
3,AF,warm-water corals,6591.340083,15216.393947,2023
4,AS,cold-water corals,433.7771,1733.448452,2023
5,AS,saltmarshes,11965.69391,44696.365149,2023
6,AS,seagrasses,29085.739962,123207.628344,2023
7,AS,warm-water corals,41327.715018,100106.465948,2023
8,EU,cold-water corals,2657.645874,7253.054271,2023
9,EU,saltmarshes,11397.003598,18425.728461,2023


In [204]:
# Concatenate region statistics to the habitats dataframe
habitats = pd.concat([habitats, habitats_regions])

In [205]:
habitats['location_id'].unique()

array(['ABNJ', 'AGO', 'ALB', 'ARG', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',
       'BRB', 'CAN', 'CHL', 'CHN', 'CIV', 'COK', 'COL', 'CPV', 'CRI',
       'CUB', 'CYP', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'ERI', 'ESP',
       'FJI', 'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC',
       'GRD', 'GTM', 'GUY', 'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL',
       'ISL', 'ITA', 'JAM', 'JPN', 'KEN', 'KIR', 'KNA', 'LBR', 'LCA',
       'LKA', 'MAR', 'MDG', 'MEX', 'MHL', 'MLT', 'MMR', 'MNE', 'MOZ',
       'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NLD', 'NOR', 'NZL',
       'OMN', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRT', 'RUS', 'SAU',
       'SEN', 'SHN', 'SJM', 'STP', 'SUR', 'SWE', 'SYC', 'THA', 'TLS',
       'TON', 'TTO', 'TUN', 'TUV', 'UMI', 'URY', 'USA', 'VCT', 'VEN',
       'VNM', 'VUT', 'WSM', 'YEM', 'ZAF', 'ARE', 'AZE', 'BEL', 'BGR',
       'BHR', 'DEU', 'DJI', 'EGY', 'EST', 'FIN', 'GEO', 'GMB', 'IRN',
       'KHM', 'KOR', 'KWT', 'LBY', 'LTU', 'LVA', 'PAK', 'POL', 'QAT',
       'ROU', 'SDN'

In [206]:
habitats.to_csv(path_out + "habitats/ocean+.csv", index=False)

### Process seamounts from [UN WCMC](https://data.unep-wcmc.org/datasets/41)

In [213]:
# Read required data
seamounts = gpd.read_file(path_in + "Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp")
eez = gpd.read_file(path_out + "/administrative/eez_area_mollweide.shp")
hs = gpd.read_file(path_in + "/high_seas/high_seas.shp")
protected_areas = gpd.read_file(path_out + "wdpa/timeseries/protected_dissolved_2023.shp").to_crs("EPSG:4326")

In [214]:
# Keep relevant fields in eez and hs and merge then in one dataframe
eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]
hs = hs[['geometry']]
hs['SOVEREIGN1'] = 'High Seas'
hs['ISO_SOV1'] = 'ABNJ'
eez_hs = eez.merge(hs, how='outer')

In [215]:
# Join eez info to seamounts falling within eez polygons
seamounts_eez = gpd.sjoin(seamounts, eez_hs, how="left", predicate="within")

In [216]:
# Drop those not associated with an eez or hs
seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])

In [217]:
# Create new column "iso" with the iso_sov codes
def concatenate_iso(row):
    iso_list = [row['ISO_SOV1']]
    if not pd.isna(row['ISO_SOV2']):
        iso_list.append(row['ISO_SOV2'])
    if not pd.isna(row['ISO_SOV3']):
        iso_list.append(row['ISO_SOV3'])
    return ';'.join(iso_list)

seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)

In [218]:
# Split the 'iso_code' values and create separate rows only for rows with multiple values
mask = seamounts_eez['iso'].str.contains(';', na=False)
split_rows = seamounts_eez[mask].copy()
split_rows['iso'] = split_rows['iso'].str.split(';')
split_rows = split_rows.explode('iso')

# Keep rows with single values in 'iso_code'
single_value_rows = seamounts_eez[~mask]

# Concatenate the exploded rows with the single value rows
seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)

In [219]:
# Get area of seamounts per iso
seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()
seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})
seamounts_iso['habitat_name'] = 'seamounts'
seamounts_iso['year'] = 2011
seamounts_iso 

Unnamed: 0,location_id,total_area,habitat_name,year
0,ABNJ,1.483098e+07,seamounts,2011
1,AGO,9.556242e+03,seamounts,2011
2,ARG,3.110730e+05,seamounts,2011
3,ATA,3.551629e+05,seamounts,2011
4,ATG,6.215895e+03,seamounts,2011
...,...,...,...,...
88,VNM,4.421338e+04,seamounts,2011
89,VUT,1.199475e+05,seamounts,2011
90,WSM,4.117997e+04,seamounts,2011
91,YEM,6.294974e+04,seamounts,2011


In [220]:
# Join protection info to seamounts
seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how="left", predicate="within")
seamounts_wdpa['protection'] = "no"  
seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = "yes"


In [221]:
# Remove rows in which protection is "no"
seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != "no"]

In [222]:
# Split the 'iso_code' values and create separate rows only for rows with multiple values
mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)
split_rows = seamounts_wdpa[mask].copy()
split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')
split_rows = split_rows.explode('PARENT_ISO')

# Keep rows with single values in 'iso_code'
single_value_rows = seamounts_wdpa[~mask]

# Concatenate the exploded rows with the single value rows
seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)
seamounts_wdpa_new['PARENT_ISO'].unique()

array(['CAN', 'ABNJ', 'FRA', 'JPN', 'USA', 'PRT', 'ESP', 'BHS', 'MEX',
       'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM', 'COL', 'PLW',
       'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR', 'IDN', 'SYC',
       'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL', 'ZAF', 'ARG',
       'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR', 'MCO'],
      dtype=object)

In [223]:
seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()
seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})
seamounts_protected.head(2)

Unnamed: 0,location_id,protected_area
0,ABNJ,226253.932283
1,ARG,38773.659962


In [224]:
# join 'protected area' field in seamounts_protected to seamounts_iso based on location_id
seamounts_iso2 = seamounts_iso.merge(seamounts_protected, left_on='location_id', right_on='location_id', how='left')
seamounts_iso2.head(5)

Unnamed: 0,location_id,total_area,habitat_name,year,protected_area
0,ABNJ,14830980.0,seamounts,2011,226253.932283
1,AGO,9556.242,seamounts,2011,
2,ARG,311073.0,seamounts,2011,38773.659962
3,ATA,355162.9,seamounts,2011,
4,ATG,6215.895,seamounts,2011,


In [225]:
# set to 0 the protected_area values that are NaN
seamounts_iso2['protected_area'] = seamounts_iso2['protected_area'].fillna(0)
seamounts_iso2.head(5)

Unnamed: 0,location_id,total_area,habitat_name,year,protected_area
0,ABNJ,14830980.0,seamounts,2011,226253.932283
1,AGO,9556.242,seamounts,2011,0.0
2,ARG,311073.0,seamounts,2011,38773.659962
3,ATA,355162.9,seamounts,2011,0.0
4,ATG,6215.895,seamounts,2011,0.0


In [226]:
# Calculate global stats for seamounts
seamounts_global = seamounts_iso2.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()
seamounts_global['location_id'] = 'GLOB'
seamounts_global['year'] = 2011
seamounts_global

Unnamed: 0,habitat_name,protected_area,total_area,location_id,year
0,seamounts,3186900.0,27291130.0,GLOB,2011


In [227]:
# Concatenate the global stats to the seamounts dataframe
seamounts_iso2 = pd.concat([seamounts_iso2, seamounts_global])

In [228]:
seamounts_regions = seamounts_iso2.copy()
seamounts_regions['region'] = seamounts_regions['location_id'].map(country_to_region)

# Calculate stats for each region
seamounts_regions = seamounts_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()
seamounts_regions['year'] = 2011
seamounts_regions.rename(columns={'region': 'location_id'}, inplace=True)
seamounts_regions

Unnamed: 0,location_id,habitat_name,protected_area,total_area,year
0,AF,seamounts,94385.178958,616235.1,2011
1,AS,seamounts,832497.783937,5433433.0,2011
2,AT,seamounts,0.0,355162.9,2011
3,EU,seamounts,894514.910255,2641119.0,2011
4,,seamounts,555588.210725,1664794.0,2011
5,SA,seamounts,581172.154389,1655552.0,2011
6,WA,seamounts,2487.42805,93847.65,2011


In [229]:
# Concatenate region stats to seamounts_iso2
seamounts_iso2 = pd.concat([seamounts_iso2, seamounts_regions])

In [230]:
seamounts_iso2['location_id'].unique()

array(['ABNJ', 'AGO', 'ARG', 'ATA', 'ATG', 'AUS', 'BHS', 'BLZ', 'BRA',
       'BRB', 'BRN', 'CAN', 'CHL', 'CHN', 'CIV', 'COL', 'COM', 'CPV',
       'CRI', 'CUB', 'DMA', 'DNK', 'DOM', 'ECU', 'ESH', 'ESP', 'FJI',
       'FRA', 'FSM', 'GBR', 'GHA', 'GIN', 'GNB', 'GNQ', 'GRC', 'HND',
       'HTI', 'IDN', 'IND', 'ISL', 'ITA', 'JAM', 'JPN', 'KIR', 'KOR',
       'LBR', 'LBY', 'LKA', 'MAR', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT',
       'MMR', 'MOZ', 'MUS', 'MYS', 'NAM', 'NIC', 'NLD', 'NOR', 'NRU',
       'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'PRK',
       'PRT', 'RUS', 'SEN', 'SLB', 'SOM', 'STP', 'SYC', 'TON', 'TUR',
       'TUV', 'TWN', 'TZA', 'URY', 'USA', 'VCT', 'VEN', 'VNM', 'VUT',
       'WSM', 'YEM', 'ZAF', 'GLOB', 'AF', 'AS', 'AT', 'EU', 'NA', 'SA',
       'WA'], dtype=object)

In [231]:
seamounts_iso2.to_csv(path_out + "habitats/seamounts.csv", index=False)

### Process mangroves from GMW

In [232]:
mangroves = pd.read_csv(path_out + "habitats/mangroves.csv")

In [233]:
# Calculate global stats for mangroves
mangroves_global = mangroves.groupby(['habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()
mangroves_global['location_id'] = 'GLOB'
mangroves_global['year'] = 2020
mangroves_global

Unnamed: 0,habitat_name,protected_area,total_area,location_id,year
0,mangroves,61287.20375,147358.990971,GLOB,2020


In [234]:
# Concatenate the global stats to the mangroves dataframe
mangroves = pd.concat([mangroves, mangroves_global])

In [235]:
mangroves_regions = mangroves.copy()
mangroves_regions['region'] = mangroves['location_id'].map(country_to_region)

# Calculate stats for each region
mangroves_regions = mangroves_regions.groupby(['region', 'habitat_name']).agg({'protected_area': 'sum', 'total_area': 'sum'}).reset_index()
mangroves_regions['year'] = 2020
mangroves_regions.rename(columns={'region': 'location_id'}, inplace=True)
mangroves_regions

Unnamed: 0,location_id,habitat_name,protected_area,total_area,year
0,AF,mangroves,10006.97,29344.404399,2020
1,AS,mangroves,21378.75,74629.194446,2020
2,,mangroves,2055.4,2329.115505,2020
3,PO,mangroves,6.72,6.723018,2020
4,SA,mangroves,27811.53375,40875.932666,2020
5,WA,mangroves,27.83,173.620938,2020


In [236]:
# Concatenate stats for regions with mangroves
mangroves = pd.concat([mangroves, mangroves_regions])

In [237]:
mangroves['location_id'].unique()

array(['ZAF', 'YEM', 'WSM', 'WLF', 'VUT', 'VNM', 'VIR', 'VGB', 'VEN',
       'VCT', 'USA', 'TZA', 'TWN', 'TUV', 'TTO', 'TON', 'TLS', 'THA',
       'TGO', 'TCA', 'SYC', 'SXM', 'SUR', 'STP', 'SOM', 'SLV', 'SLE',
       'SLB', 'SGP', 'SEN', 'SDN', 'SAU', 'QAT', 'PYF', 'PRI', 'PNG',
       'PLW', 'PHL', 'PER', 'PAN', 'PAK', 'OMN', 'NZL', 'NIC', 'NGA',
       'NCL', 'MYT', 'MYS', 'MUS', 'MTQ', 'MRT', 'MOZ', 'MMR', 'MHL',
       'MEX', 'MDV', 'MDG', 'MAF', 'LKA', 'LCA', 'LBR', 'KNA', 'KIR',
       'KHM', 'KEN', 'JPN', 'JAM', 'IRN', 'IND', 'IDN', 'HTI', 'HND',
       'GUY', 'GUM', 'GUF', 'GTM', 'GRD', 'GNQ', 'GNB', 'GMB', 'GLP',
       'GIN', 'GHA', 'GAB', 'FSM', 'FJI', 'ERI', 'EGY', 'ECU', 'DOM',
       'DMA', 'DJI', 'CYM', 'CUW', 'CUB', 'CRI', 'COM', 'COL', 'COK',
       'COG', 'COD', 'CMR', 'CIV', 'CHN', 'BRN', 'BRB', 'BRA', 'BMU',
       'BLZ', 'BHS', 'BHR', 'BGD', 'BES', 'BEN', 'AUS', 'ATG', 'ATF',
       'ASM', 'ARE', 'AIA', 'AGO', 'ABW', 'GLOB', 'AF', 'AS', 'NA', 'PO',
       'SA', 'WA

### Concatenate all habitats

In [238]:
# Concatenate the dataframes
habitats_all = pd.concat([habitats, seamounts_iso2, mangroves])
habitats_all

Unnamed: 0,location_id,protected_area,total_area,habitat_name,year
0,ABNJ,421.629373,1874.982214,cold-water corals,2023
1,AGO,0.000000,3.395671,cold-water corals,2023
2,ALB,0.000000,5.986479,cold-water corals,2023
3,ARG,6.984226,61.826344,cold-water corals,2023
4,ATG,0.000000,0.997747,cold-water corals,2023
...,...,...,...,...,...
1,AS,21378.750000,74629.194446,mangroves,2020
2,,2055.400000,2329.115505,mangroves,2020
3,PO,6.720000,6.723018,mangroves,2020
4,SA,27811.533750,40875.932666,mangroves,2020


In [239]:
habitats_all.to_csv(path_out + "habitats/habitats.csv", index=False)