In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
path_in = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/raw/"
path_out = "/Users/sofia/Documents/Repos/skytruth-30x30/data/data/processed/"

In [3]:
# Read required data
seamounts = gpd.read_file(path_in + "Seamounts/DownloadPack-14_001_ZSL002_ModelledSeamounts2011_v1_01_Data_Seamounts_Seamounts.shp")
eez = gpd.read_file(path_out + "/administrative/eez_area_mollweide.shp")
hs = gpd.read_file(path_in + "/high_seas/high_seas.shp")
protected_areas = gpd.read_file(path_out + "wdpa/timeseries/protected_dissolved_2023.shp").to_crs("EPSG:4326")

In [4]:
# Keep relevant fields in eez and hs and merge then in one dataframe
eez = eez[['SOVEREIGN1', 'SOVEREIGN2', 'SOVEREIGN3','ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'geometry']]
hs = hs[['geometry']]
hs['SOVEREIGN1'] = 'High Seas'
hs['ISO_SOV1'] = 'ABNJ'
eez_hs = eez.merge(hs, how='outer')

In [5]:
# Join eez info to seamounts falling within eez polygons
seamounts_eez = gpd.sjoin(seamounts, eez_hs, how="left", predicate="within")
len(seamounts_eez)

33461

In [6]:
seamounts_eez['ISO_SOV1'].isna().sum()

43

In [7]:
# Drop those not associated with an eez or hs
seamounts_eez = seamounts_eez.dropna(subset=['ISO_SOV1'])

In [8]:
# Create new column "iso" with the iso_sov codes
def concatenate_iso(row):
    iso_list = [row['ISO_SOV1']]
    if not pd.isna(row['ISO_SOV2']):
        iso_list.append(row['ISO_SOV2'])
    if not pd.isna(row['ISO_SOV3']):
        iso_list.append(row['ISO_SOV3'])
    return ';'.join(iso_list)

seamounts_eez['iso'] = seamounts_eez.apply(concatenate_iso, axis=1)

In [9]:
# Split the 'iso_code' values and create separate rows only for rows with multiple values
mask = seamounts_eez['iso'].str.contains(';', na=False)
split_rows = seamounts_eez[mask].copy()
split_rows['iso'] = split_rows['iso'].str.split(';')
split_rows = split_rows.explode('iso')

# Keep rows with single values in 'iso_code'
single_value_rows = seamounts_eez[~mask]

# Concatenate the exploded rows with the single value rows
seamounts_eez_new = pd.concat([single_value_rows, split_rows], ignore_index=True)

In [10]:
seamounts_eez_new['iso'].unique()

array(['DNK', 'ABNJ', 'RUS', 'NOR', 'CAN', 'USA', 'FRA', 'ESP', 'JPN',
       'PRT', 'ITA', 'KOR', 'GRC', 'LBY', 'MLT', 'GBR', 'MAR', 'MEX',
       'BHS', 'CUB', 'DOM', 'OMN', 'PHL', 'HND', 'HTI', 'JAM', 'TWN',
       'ATG', 'NLD', 'CPV', 'MHL', 'COL', 'SEN', 'VEN', 'DMA', 'VNM',
       'CHN', 'IND', 'YEM', 'BRB', 'SOM', 'FSM', 'NIC', 'PLW', 'CRI',
       'MYS', 'BRN', 'KIR', 'IDN', 'PAN', 'MDV', 'BRA', 'ECU', 'LKA',
       'GNQ', 'PNG', 'NRU', 'MUS', 'PER', 'SYC', 'TUV', 'SLB', 'NZL',
       'AUS', 'FJI', 'MDG', 'COM', 'MOZ', 'WSM', 'VUT', 'TON', 'CHL',
       'ZAF', 'ARG', 'ATA', 'ISL', 'PRK', 'TUR', 'PAK', 'BLZ', 'MMR',
       'VCT', 'GNB', 'GIN', 'CIV', 'GHA', 'LBR', 'STP', 'TZA', 'AGO',
       'NAM', 'URY', 'ESH'], dtype=object)

In [12]:
# Get area of seamounts per iso
seamounts_iso = seamounts_eez_new.groupby(['iso']).agg({'AREA2D': 'sum'}).reset_index()
seamounts_iso = seamounts_iso.rename(columns={'AREA2D': 'total_area', 'iso': 'location_id'})
seamounts_iso['habitat_name'] = 'seamounts'
seamounts_iso['year'] = 2023
seamounts_iso 

Unnamed: 0,location_id,total_area,habitat_name,year
0,ABNJ,1.483098e+07,seamounts,2023
1,AGO,9.556242e+03,seamounts,2023
2,ARG,3.110730e+05,seamounts,2023
3,ATA,3.551629e+05,seamounts,2023
4,ATG,6.215895e+03,seamounts,2023
...,...,...,...,...
88,VNM,4.421338e+04,seamounts,2023
89,VUT,1.199475e+05,seamounts,2023
90,WSM,4.117997e+04,seamounts,2023
91,YEM,6.294974e+04,seamounts,2023


In [28]:
# Join protection info to seamounts
seamounts_wdpa = gpd.sjoin(seamounts, protected_areas, how="left", predicate="within")
seamounts_wdpa['protection'] = "no"  
seamounts_wdpa.loc[~seamounts_wdpa['index_right'].isna(), 'protection'] = "yes"


In [29]:
# Remove rows in which protection is "no"
seamounts_wdpa = seamounts_wdpa[seamounts_wdpa['protection'] != "no"]

In [30]:
seamounts_wdpa['PARENT_ISO'].unique()

array(['CAN', 'ABNJ', 'FRA', 'FRA;ITA;MCO', 'JPN', 'USA', 'PRT', 'ESP',
       'BHS', 'MEX', 'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM',
       'COL', 'PLW', 'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR',
       'IDN', 'SYC', 'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL',
       'ZAF', 'ARG', 'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR'],
      dtype=object)

In [32]:
# Split the 'iso_code' values and create separate rows only for rows with multiple values
mask = seamounts_wdpa['PARENT_ISO'].str.contains(';', na=False)
split_rows = seamounts_wdpa[mask].copy()
split_rows['PARENT_ISO'] = split_rows['PARENT_ISO'].str.split(';')
split_rows = split_rows.explode('PARENT_ISO')

# Keep rows with single values in 'iso_code'
single_value_rows = seamounts_wdpa[~mask]

# Concatenate the exploded rows with the single value rows
seamounts_wdpa_new = pd.concat([single_value_rows, split_rows], ignore_index=True)
seamounts_wdpa_new['PARENT_ISO'].unique()

array(['CAN', 'ABNJ', 'FRA', 'JPN', 'USA', 'PRT', 'ESP', 'BHS', 'MEX',
       'DOM', 'HND', 'NLD', 'PHL', 'VEN', 'MHL', 'YEM', 'COL', 'PLW',
       'CRI', 'PAN', 'BRA', 'ECU', 'GNQ', 'KIR', 'GBR', 'IDN', 'SYC',
       'COK', 'AUS', 'COM', 'FJI', 'NIU', 'CHL', 'NZL', 'ZAF', 'ARG',
       'ITA', 'GRC', 'CUB', 'TUV', 'PER', 'SHN', 'NOR', 'MCO'],
      dtype=object)

In [38]:
seamounts_protected = seamounts_wdpa_new.groupby(['PARENT_ISO']).agg({'AREA2D': 'sum'}).reset_index()
seamounts_protected = seamounts_protected.rename(columns={'AREA2D': 'protected_area', 'PARENT_ISO': 'location_id'})
seamounts_protected['habitat_name'] = 'seamounts'
seamounts_protected['year'] = 2023
seamounts_protected 

Unnamed: 0,location_id,protected_area,habitat_name,year
0,ABNJ,226253.932283,seamounts,2023
1,ARG,38773.659962,seamounts,2023
2,AUS,250507.827932,seamounts,2023
3,BHS,9405.718473,seamounts,2023
4,BRA,89687.890132,seamounts,2023
5,CAN,66235.357502,seamounts,2023
6,CHL,239414.964764,seamounts,2023
7,COK,238289.821637,seamounts,2023
8,COL,30080.163652,seamounts,2023
9,COM,1584.80965,seamounts,2023


In [39]:
# join 'protected area' field in seamounts_protected to seamounts_iso based on location_id
seamounts_iso2 = seamounts_iso.merge(seamounts_protected[['location_id', 'protected_area']], left_on='location_id', right_on='location_id', how='left')

In [41]:
# set to 0 the protected_area values that are NaN
seamounts_iso2['protected_area'] = seamounts_iso2['protected_area'].fillna(0)
seamounts_iso2

Unnamed: 0,location_id,total_area,habitat_name,year,protected_area
0,ABNJ,1.483098e+07,seamounts,2023,226253.932283
1,AGO,9.556242e+03,seamounts,2023,0.000000
2,ARG,3.110730e+05,seamounts,2023,38773.659962
3,ATA,3.551629e+05,seamounts,2023,0.000000
4,ATG,6.215895e+03,seamounts,2023,0.000000
...,...,...,...,...,...
88,VNM,4.421338e+04,seamounts,2023,0.000000
89,VUT,1.199475e+05,seamounts,2023,0.000000
90,WSM,4.117997e+04,seamounts,2023,0.000000
91,YEM,6.294974e+04,seamounts,2023,2487.428050


In [None]:
seamounts_iso2.to_csv(path_out + "habitat/seamounts.csv", index=False)