# Incorporate marine species to NRC
### In this notebook we incorporate marine data to the updated terrestrial data calculated in the Terrestrial_NRC notebook
April 2022

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
import requests as re
from copy import deepcopy
from itertools import repeat
import functools

## Import, explore and prepare the data

In [2]:
path = '/Users/sofia/Documents/HE_Data/NRC/NRC_Marine'

In [3]:
# Import tables
gadm = pd.read_csv('/Users/sofia/Documents/HE_Data/NRC/NRC_Terrestrial/NRC_Terrestrial_20220420.csv') # This is the updated gadm_centroid
spi = pd.read_csv(f'{path}/Marine_SPI_by_country_202203323_ter-1.csv')# table with spi and protection
mar = pd.read_csv(f'{path}/NRC_marine_species_data_20220323_ter-1.csv') # Species able provided by Alex (MOL)
pop = pd.read_csv(f'{path}/Pop2020_EEZ.csv') # calculated in arcgis pro using the population2020.crf and the EEZ shapefile
hm = pd.read_csv(f'{path}/marine_perc_human.csv') # human modification table
EEZ = pd.read_csv(f'{path}/eez_dissolve_centroid.csv') # centroids of the EEZ created in ArcGIS pro (inside option deactivated so the centroid can fall outside the geometry)

In [4]:
gadm.head(1)

Unnamed: 0.1,Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,...,birds,mammals,reptiles,endemic_amphibians,endemic_birds,endemic_mammals,endemic_reptiles,nspecies_ter,total_endemic_ter,filter_similar_ter
0,0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,...,193,4,32,0,0,0,3,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ..."


In [8]:
len(gadm)

252

In [9]:
spi.head(1)

Unnamed: 0,SOVEREIGN1,iso_ter1,year,nspecies,SPI_low,SPI_high,percentprotected_low,percentprotected_high
0,Albania,ALB,1980,347,0.0,0.0,0.0,0.0


In [5]:
len(spi['iso_ter1'].unique()) # There are 183 unique countries/territories with SPI data

183

In [11]:
mar.head(1)

Unnamed: 0,speciesgroup,species,countryname,iso_ter1,percentprotected,NSPS,stewardship
0,marine fishes,Abalistes filamentosus,Australia,AUS,25-50%,75-100,22


In [6]:
len(mar['iso_ter1'].unique()) # There are 183 unique countries/territories with marine species

183

In [13]:
pop.head(1)

Unnamed: 0,OID_,GID_0,ZONE_CODE,COUNT,AREA,SUM
0,1,ABW,1,55.0,0.003819,1547.605201


In [14]:
len(pop['GID_0'].unique()) # There are 200 countries/territories that have population in their EEZ

200

In [15]:
hm.head(1)

Unnamed: 0.1,Unnamed: 0,ISO_TER1_F,no_human,human,very_high
0,2,ABW,0.0,0.783047,0.216953


In [16]:
len(hm['ISO_TER1_F'].unique()) # There are 203 countries/territories that have human modification data in their EEZ

203

In [17]:
EEZ.head()

Unnamed: 0,OID_,GID_0,AREA_KM2,ORIG_FID,Latitude,Longitude
0,1,ABW,29970.299588,1,13.74138,-69.673412
1,2,AGO,495859.762742,2,-11.701098,11.035371
2,3,AIA,90157.964205,3,20.01803,-62.543285
3,4,ALB,12165.548773,4,40.927084,19.113001
4,5,ARE,57838.146798,5,25.058911,54.03058


In [18]:
len(EEZ['GID_0'].unique()) # There are 201 countries/territories that have EEZ

201

#### Check the GID_0 to see if there are discrepancies with gadm

In [7]:
# Number of countries in species list according to GID_0
country_list = list(mar['iso_ter1'].unique())
len(country_list) ## 183 territories in the species table

183

In [8]:
# Find GID_0 that are in species table but not in gadm 
list1= list(gadm['GID_0'])
list2=list(mar['iso_ter1'].unique())
list(set(list2).difference(list1))  # XXZ is international waters and UMI is the United States Minor Outlying Islands. None of them are in gadm so let's remove them

['UMI', 'XXZ']

In [9]:
# Remove 'XXZ', 'UMI' from species table to have only GID_0 that are in gadm
l = list(set(list2).difference(list1))
mar = mar[~mar['iso_ter1'].isin(l)]
len(mar.iso_ter1.unique())

181

In [10]:
# Find GID_0 that are in spi table but not in gadm:
list1= list(gadm['GID_0'])
list2=list(spi['iso_ter1'].unique())
list(set(list2).difference(list1)) 

['UMI', 'XXZ']

In [11]:
# Remove 'XXZ', 'UMI' from species table to have only GID_0 that are in gadm
l = list(set(list2).difference(list1))
spi = spi[~spi['iso_ter1'].isin(l)]
len(mar.iso_ter1.unique())

181

In [12]:
# Find GID_0 that are in hm but not in gadm:
list1= list(gadm['GID_0'])
list2=list(hm['ISO_TER1_F'].unique())
list(set(list2).difference(list1)) 

['TMP', 'UMI']

In [13]:
# Remove 'XXZ', 'UMI' from hm table to have only GID_0 that are in gadm
l = list(set(list2).difference(list1))
hm = hm[~hm['ISO_TER1_F'].isin(l)]
len(hm.ISO_TER1_F.unique())

201

In [14]:
# Find GID_0 that are in EEZ table but not in gadm
list1= list(gadm['GID_0'])
list2=list(EEZ['GID_0'].unique())
list(set(list2).difference(list1)) 

[]

In [15]:
# Find GID_0 that are in population but not in gadm:
list1= list(gadm['GID_0'])
list2=list(pop['GID_0'].unique())
list(set(list2).difference(list1)) 

[]

#### Note: Although there are 201 EEZ (and population and human modification for those), the species and the spi datasets only have info for 181 countries. So we'll only consider those for the marine NRC. Let's remove the GID_0 that are not present in the species table

In [16]:
# Find mismatches in GID_0 between mar table and hm table:
list1= list(mar['iso_ter1'])
list2=list(hm['ISO_TER1_F'].unique())
list3=list(set(list2).difference(list1))
# Keep only records which GID_0 is in the species dataset (mar)
hm2 = hm[~hm['ISO_TER1_F'].isin(list3)]
len(hm2)

181

In [17]:
list1= list(mar['iso_ter1'])
list2=list(hm2['ISO_TER1_F'].unique())
list(set(list2).difference(list1)) # Now all hm have species data

[]

In [18]:
# Change name of human modification fields to prepare it for join
hm2 = hm2.rename(columns={'ISO_TER1_F':'GID_0', 'no_human':'hm_no_mar', 'human':'hm_mar', 'very_high':'hm_vh_mar'}).drop(columns= {'Unnamed: 0'})
hm2.head()

Unnamed: 0,GID_0,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,0.0,0.783047,0.216953
2,AIA,0.0,0.943677,0.056323
3,ALB,0.000263,0.003746,0.995991
4,ARE,0.000116,0.034045,0.965838
5,ARG,0.000635,0.906946,0.092419


In [19]:
# The human modification table was given in 0-1 values, let's make it in % to match the terrestrial data
hm2['hm_no_mar'] = hm2['hm_no_mar'].apply(lambda x: x*100)
hm2['hm_mar'] = hm2['hm_mar'].apply(lambda x: x*100)
hm2['hm_vh_mar'] = hm2['hm_vh_mar'].apply(lambda x: x*100)
hm2.head()

Unnamed: 0,GID_0,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,0.0,78.30475,21.69525
2,AIA,0.0,94.367741,5.632259
3,ALB,0.026286,0.374581,99.599133
4,ARE,0.011637,3.404538,96.583825
5,ARG,0.063502,90.69461,9.241887


In [20]:
# Find mismatches in GID_0 between mar table and population table:
list1= list(mar['iso_ter1'])
list2=list(pop['GID_0'].unique())
list3=list(set(list2).difference(list1)) 
# Keep only records which GID_0 is in the species dataset (mar)
pop2 = pop[~pop['GID_0'].isin(list3)]
len(pop2)

180

In [21]:
list1= list(mar['iso_ter1'])
list2=list(pop2['GID_0'].unique())
list(set(list1).difference(list2)) # ATA is the only GID_0 in the species dataset that is not included in pop table (pop=0)

['ATA']

In [22]:
# Change name pop fields to prepare it for join
pop2 = pop2.drop(columns={'OID_', 'ZONE_CODE', 'COUNT', 'AREA'}).rename(columns = {'SUM':'Pop2020_EEZ'})
pop2.head(2)

Unnamed: 0,GID_0,Pop2020_EEZ
0,ABW,1547.605201
2,AIA,1693.719824


In [23]:
# Find mismatches in GID_0 between mar table and EEZ table:
list1= list(mar['iso_ter1'])
list2=list(EEZ['GID_0'].unique())
list3=list(set(list2).difference(list1))
# Keep only records which GID_0 is in the species dataset (mar)
eez2 = EEZ[~EEZ['GID_0'].isin(list3)]
len(eez2)

181

------------------------------------------------------------------------------------------------------------------
## Overview tab: generate general information
### Add Population data 
Population by EEZ was calculated in ArcGIS Pro using the EEZ_dissolved (by GID_0) shp and the population2020.crf

In [24]:
gadm.columns

Index(['Unnamed: 0', 'GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph',
       'max_bird', 'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all',
       'sentence', 'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority',
       'has_raisg', 'Area_Country', 'protection_needed_ter', 'iso2',
       'max_highlited_sp', 'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter',
       'Pop2020', 'SPI_ter', 'prop_protected_ter', 'amphibians', 'birds',
       'mammals', 'reptiles', 'endemic_amphibians', 'endemic_birds',
       'endemic_mammals', 'endemic_reptiles', 'nspecies_ter',
       'total_endemic_ter', 'filter_similar_ter'],
      dtype='object')

In [25]:
len(pop2)

180

In [26]:
# Add new population data and remove old field (SUM)
df = pd.merge(gadm, pop2,  how='left', left_on=['GID_0'], right_on = ['GID_0']).drop(columns={'Unnamed: 0'})
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ'],
      dtype='object')

In [27]:
df[df.GID_0=='ATA']

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,mammals,reptiles,endemic_amphibians,endemic_birds,endemic_mammals,endemic_reptiles,nspecies_ter,total_endemic_ter,filter_similar_ter,Pop2020_EEZ
11,ATA,Antarctica,https://live.staticflickr.com/1590/25126847203...,,0,10,10,0,0,0,...,0,0,0,0,0,0,97,0,"{""filter_Area_Country"": [""COD"", ""KAZ"", ""ARG"", ...",


### SPI and % Protected
MOL sent a first version with these values, which is in https://eowilson.maps.arcgis.com/home/item.html?id=d48f5ea1e59a42048f57e4c44c1a82a3. However, in this case they grouped the EEZ by sovereign1, so they didn't match the terrestrial NRC, which are divided by territories (French Guiana is shown independently from France). So MOL sent a new table dividing the EEZ by iso_ter1 (when this was null they used the field iso_sov1). This was done for both tables (SPI&Protection and species)

In [28]:
# Take the last value (2021) for the general overview
last = spi[spi['year']==2021].copy()
last.head(5)

Unnamed: 0,SOVEREIGN1,iso_ter1,year,nspecies,SPI_low,SPI_high,percentprotected_low,percentprotected_high
41,Albania,ALB,2021,347,1.86,1.86,0.69,0.69
83,Algeria,DZA,2021,489,0.22,0.22,0.02,0.02
125,Antarctica,ATA,2021,121,50.79,50.79,35.55,35.55
167,Antigua and Barbuda,ATG,2021,1519,9.04,9.04,0.3,0.3
209,Argentina,ARG,2021,671,7.0,7.0,9.25,9.25


In [29]:
# Take only relevant fields and change their names (include marine in name to distinguish them from terrestrial)
last = last[['iso_ter1','SPI_high', 'percentprotected_high']]
last = last.rename(columns= {'SPI_high':'SPI_mar', 'percentprotected_high':'prop_protected_mar', 'iso_ter1':'GID_0'})
last.head(1)

Unnamed: 0,GID_0,SPI_mar,prop_protected_mar
41,ALB,1.86,0.69


In [30]:
len(last) # There are 181 territories with marine SPI and protection data

181

In [31]:
# Create new dataframe with the merge of the gadm_centroid and the new values
df= pd.merge(df, last ,how='left', left_on=['GID_0'], right_on = ['GID_0'])
df.head(1)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,endemic_amphibians,endemic_birds,endemic_mammals,endemic_reptiles,nspecies_ter,total_endemic_ter,filter_similar_ter,Pop2020_EEZ,SPI_mar,prop_protected_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,0,0,0,3,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",1547.605201,0.0,0.0


### Add fake % protection needed

In [40]:
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ', 'SPI_mar', 'prop_protected_mar'],
      dtype='object')

In [41]:
df['protection_needed_mar']= 90-df['prop_protected_mar']
df

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,endemic_birds,endemic_mammals,endemic_reptiles,nspecies_ter,total_endemic_ter,filter_similar_ter,Pop2020_EEZ,SPI_mar,prop_protected_mar,protection_needed_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,0,0,3,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",1547.605201,0.00,0.00,90.00
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,0,0,4,1085,5,"{""filter_Area_Country"": [""MDG"", ""UKR"", ""CAF"", ...",,,,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,18,9,23,2596,66,"{""filter_Area_Country"": [""BOL"", ""ETH"", ""COL"", ...",,,,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,0,0,1,230,1,"{""filter_Area_Country"": [""PCN"", ""MAF"", ""SMR"", ...",1693.719824,4.68,0.28,89.72
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,0,0,0,301,0,"{""filter_Area_Country"": [""STP"", ""KIR"", ""MTQ"", ...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,ZMB,Zambia,https://live.staticflickr.com/8468/28685898864...,65.9,10,9,10,10,3,9,...,2,6,3,2019,13,"{""filter_Area_Country"": [""CAF"", ""SSD"", ""SOM"", ...",,,,
248,ZWE,Zimbabwe,https://live.staticflickr.com/929/43605929641_...,36.8,10,9,9,9,3,9,...,0,0,0,1729,2,"{""filter_Area_Country"": [""MYS"", ""FIN"", ""COG"", ...",,,,
249,KNA,Saint Kitts and Nevis,https://upload.wikimedia.org/wikipedia/commons...,1.4,6,10,10,10,6,10,...,0,0,0,254,0,"{""filter_Area_Country"": [""ASM"", ""SPM"", ""XAD"", ...",5722.801333,11.26,1.38,88.62
250,NAM,Namibia,https://live.staticflickr.com/8403/29279259886...,25.9,10,10,10,10,0,0,...,2,1,30,1747,37,"{""filter_Area_Country"": [""MMR"", ""ZMB"", ""CHL"", ...",1758.179405,7.82,1.81,88.19


### Add human modification data

In [42]:
hm2.head()

Unnamed: 0,GID_0,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,0.0,78.30475,21.69525
2,AIA,0.0,94.367741,5.632259
3,ALB,0.026286,0.374581,99.599133
4,ARE,0.011637,3.404538,96.583825
5,ARG,0.063502,90.69461,9.241887


In [43]:
len(hm2)

181

In [44]:
# Create new dataframe with the merge of the gadm_centroid and the new values
df= pd.merge(df, hm2 ,how='left', left_on=['GID_0'], right_on = ['GID_0'])
df.head(1)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,nspecies_ter,total_endemic_ter,filter_similar_ter,Pop2020_EEZ,SPI_mar,prop_protected_mar,protection_needed_mar,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",1547.605201,0.0,0.0,90.0,0.0,78.30475,21.69525


In [45]:
df.head()

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,nspecies_ter,total_endemic_ter,filter_similar_ter,Pop2020_EEZ,SPI_mar,prop_protected_mar,protection_needed_mar,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",1547.605201,0.0,0.0,90.0,0.0,78.30475,21.69525
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,1085,5,"{""filter_Area_Country"": [""MDG"", ""UKR"", ""CAF"", ...",,,,,,,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,2596,66,"{""filter_Area_Country"": [""BOL"", ""ETH"", ""COL"", ...",,,,,,,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,230,1,"{""filter_Area_Country"": [""PCN"", ""MAF"", ""SMR"", ...",1693.719824,4.68,0.28,89.72,0.0,94.367741,5.632259
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,301,0,"{""filter_Area_Country"": [""STP"", ""KIR"", ""MTQ"", ...",,,,,,,


### Calculate number of marine species and endemic species

In [46]:
mar.head(1)

Unnamed: 0,speciesgroup,species,countryname,iso_ter1,percentprotected,NSPS,stewardship
0,marine fishes,Abalistes filamentosus,Australia,AUS,25-50%,75-100,22


In [47]:
## Get number of species (by taxa) per country
mar2 = mar[['speciesgroup', 'species','countryname', 'iso_ter1']]
mar_num = mar2.groupby(by = ['speciesgroup', 'countryname', 'iso_ter1']).count().reset_index()
mar_num.head(5)

Unnamed: 0,speciesgroup,countryname,iso_ter1,species
0,marine fishes,Albania,ALB,345
1,marine fishes,Algeria,DZA,470
2,marine fishes,Antarctica,ATA,106
3,marine fishes,Antigua and Barbuda,ATG,1493
4,marine fishes,Argentina,ARG,632


In [48]:
## Distinguish between taxa
mamm = mar_num[mar_num['speciesgroup']=='marine mammals']
fish = mar_num[mar_num['speciesgroup']=='marine fishes']

In [46]:
mamm.head(1)

Unnamed: 0,speciesgroup,countryname,iso_ter1,species
181,marine mammals,Albania,ALB,2


In [47]:
fish.head(1)

Unnamed: 0,speciesgroup,countryname,iso_ter1,species
0,marine fishes,Albania,ALB,345


In [49]:
## Include number of species in dataframe
df= pd.merge(df, mamm,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1','countryname'}).rename(columns={'species': 'mammals_mar'})
df= pd.merge(df, fish,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1','countryname'}).rename(columns={'species': 'fishes_mar'})
df.head(5)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,filter_similar_ter,Pop2020_EEZ,SPI_mar,prop_protected_mar,protection_needed_mar,hm_no_mar,hm_mar,hm_vh_mar,mammals_mar,fishes_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",1547.605201,0.0,0.0,90.0,0.0,78.30475,21.69525,19.0,1466.0
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,"{""filter_Area_Country"": [""MDG"", ""UKR"", ""CAF"", ...",,,,,,,,,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,"{""filter_Area_Country"": [""BOL"", ""ETH"", ""COL"", ...",,,,,,,,,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,"{""filter_Area_Country"": [""PCN"", ""MAF"", ""SMR"", ...",1693.719824,4.68,0.28,89.72,0.0,94.367741,5.632259,25.0,1494.0
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,"{""filter_Area_Country"": [""STP"", ""KIR"", ""MTQ"", ...",,,,,,,,,


In [50]:
## Calculate number of endemic species per country: mammals
mamm_e = mar[(mar['speciesgroup']=='marine mammals')&(mar['stewardship']==1)]
mamm_e = mamm_e.groupby(['speciesgroup','iso_ter1']).sum()
mamm_e = mamm_e.reset_index().rename(columns={'stewardship':'endemic_mammals_mar'})
mamm_e.head(5)

Unnamed: 0,speciesgroup,iso_ter1,endemic_mammals_mar
0,marine mammals,AUS,1
1,marine mammals,ECU,2
2,marine mammals,NZL,1
3,marine mammals,USA,1


In [51]:
## Calculate number of endemic species per country: fishes
fish_e = mar[(mar['speciesgroup']=='marine fishes')&(mar['stewardship']==1)]
fish_e = fish_e.groupby(['speciesgroup','iso_ter1']).sum()
fish_e = fish_e.reset_index().rename(columns={'stewardship':'endemic_fishes_mar'})
fish_e.head(5)

Unnamed: 0,speciesgroup,iso_ter1,endemic_fishes_mar
0,marine fishes,ARG,2
1,marine fishes,ATA,5
2,marine fishes,ATF,3
3,marine fishes,AUS,379
4,marine fishes,BHS,2


In [52]:
## Merge endemic data in dataframe
df= pd.merge(df, mamm_e,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1'})
df= pd.merge(df, fish_e,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1'})
df.head(5)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,SPI_mar,prop_protected_mar,protection_needed_mar,hm_no_mar,hm_mar,hm_vh_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,0.0,0.0,90.0,0.0,78.30475,21.69525,19.0,1466.0,,
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,,,,,,,,,,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,,,,,,,,,,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,4.68,0.28,89.72,0.0,94.367741,5.632259,25.0,1494.0,,
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,,,,,,,,,,


In [53]:
sum(df['endemic_fishes_mar'].notnull())

44

In [54]:
## Make columns type integer
cols = ['mammals_mar', 'endemic_mammals_mar', 'fishes_mar', 'endemic_fishes_mar']
df[cols] = df[cols].fillna(0) 
df[cols] = df[cols].astype(int)

In [55]:
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ', 'SPI_mar', 'prop_protected_mar',
       'protection_needed_mar', 'hm_no_mar', 'hm_mar', 'hm_vh_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar'],
      dtype='object')

In [56]:
# Calculate total number of species and total number of endemic marine
df['nspecies_mar']= df['mammals_mar']+df['fishes_mar']
df['total_endemic_mar']= df['endemic_mammals_mar']+df['endemic_fishes_mar']
df.head(5)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,protection_needed_mar,hm_no_mar,hm_mar,hm_vh_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,90.0,0.0,78.30475,21.69525,19,1466,0,0,1485,0
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,,,,,0,0,0,0,0,0
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,,,,,0,0,0,0,0,0
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,89.72,0.0,94.367741,5.632259,25,1494,0,0,1519,0
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,,,,,0,0,0,0,0,0


In [57]:
# How many countries have marine data?
len(df[df['nspecies_mar']>0]) # 181

181

In [58]:
# How many countries do not have marine data?
len(df[df['nspecies_mar']==0]) # 71

71

In [59]:
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ', 'SPI_mar', 'prop_protected_mar',
       'protection_needed_mar', 'hm_no_mar', 'hm_mar', 'hm_vh_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar'],
      dtype='object')

In [60]:
eez2.head()

Unnamed: 0,OID_,GID_0,AREA_KM2,ORIG_FID,Latitude,Longitude
0,1,ABW,29970.3,1,13.74138,-69.673412
2,3,AIA,90157.96,3,20.01803,-62.543285
3,4,ALB,12165.55,4,40.927084,19.113001
4,5,ARE,57838.15,5,25.058911,54.03058
5,6,ARG,1072577.0,6,-47.089988,-62.159504


In [61]:
len(eez2)

181

In [62]:
eez2= eez2.drop(columns={'OID_','ORIG_FID'}).rename(columns={'AREA_KM2':'Area_EEZ'})
eez2.head()

Unnamed: 0,GID_0,Area_EEZ,Latitude,Longitude
0,ABW,29970.3,13.74138,-69.673412
2,AIA,90157.96,20.01803,-62.543285
3,ALB,12165.55,40.927084,19.113001
4,ARE,57838.15,25.058911,54.03058
5,ARG,1072577.0,-47.089988,-62.159504


In [63]:
df = pd.merge(left = df, right = eez2, left_on = "GID_0", right_on = "GID_0", how = "left").drop(columns = {'Latitude','Longitude'})
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ', 'SPI_mar', 'prop_protected_mar',
       'protection_needed_mar', 'hm_no_mar', 'hm_mar', 'hm_vh_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar', 'Area_EEZ'],
      dtype='object')

In [64]:
df.head()

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,hm_no_mar,hm_mar,hm_vh_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,0.0,78.30475,21.69525,19,1466,0,0,1485,0,29970.299588
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,,,,0,0,0,0,0,0,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,,,,0,0,0,0,0,0,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,0.0,94.367741,5.632259,25,1494,0,0,1519,0,90157.964205
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,,,,0,0,0,0,0,0,


### Add fake global SPI marine

In [65]:
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ', 'SPI_mar', 'prop_protected_mar',
       'protection_needed_mar', 'hm_no_mar', 'hm_mar', 'hm_vh_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar', 'Area_EEZ'],
      dtype='object')

In [68]:
df['Global_SPI_mar']= np.where(df['Area_EEZ']>0, 45, 'NaN')
df

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,hm_mar,hm_vh_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,78.304750,21.695250,19,1466,0,0,1485,0,29970.299588,45
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,,,0,0,0,0,0,0,,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,,,0,0,0,0,0,0,,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,94.367741,5.632259,25,1494,0,0,1519,0,90157.964205,45
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,,,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,ZMB,Zambia,https://live.staticflickr.com/8468/28685898864...,65.9,10,9,10,10,3,9,...,,,0,0,0,0,0,0,,
248,ZWE,Zimbabwe,https://live.staticflickr.com/929/43605929641_...,36.8,10,9,9,9,3,9,...,,,0,0,0,0,0,0,,
249,KNA,Saint Kitts and Nevis,https://upload.wikimedia.org/wikipedia/commons...,1.4,6,10,10,10,6,10,...,59.032359,40.967641,25,1466,0,0,1491,0,9501.814149,45
250,NAM,Namibia,https://live.staticflickr.com/8403/29279259886...,25.9,10,10,10,10,0,0,...,97.461351,2.501774,45,651,0,1,696,1,562215.381131,45


---------------------------------------------------------------------------------------------------------------------------------------------
## Process to create the challenges tab (create array with similar filters)
### Create matrix to identify countries with shared stewardship to create the stewardship filter
This code is more efficient than that described in the notebook "shared_stewardship", which was used during the first iteration of the NRC

In [70]:
mar.head()

Unnamed: 0,speciesgroup,species,countryname,iso_ter1,percentprotected,NSPS,stewardship
0,marine fishes,Abalistes filamentosus,Australia,AUS,25-50%,75-100,22
1,marine fishes,Abalistes filamentosus,Australia,CCK,0-25%,0-25,22
2,marine fishes,Abalistes filamentosus,Australia,NFK,75-100%,75-100,22
3,marine fishes,Abalistes filamentosus,East Timor,TLS,0-25%,0-25,22
4,marine fishes,Abalistes filamentosus,Fiji,FJI,0-25%,50-75,22


In [71]:
# Create a copy with only the species name and the iso3
mar2 = mar[['iso_ter1','species']].copy()
mar2.head(5)

Unnamed: 0,iso_ter1,species
0,AUS,Abalistes filamentosus
1,CCK,Abalistes filamentosus
2,NFK,Abalistes filamentosus
3,TLS,Abalistes filamentosus
4,FJI,Abalistes filamentosus


In [72]:
%%time
# Create a matrix that has, for each country, the number of shared species with each of the other countries
m = mar2.merge(mar2, on='species') # perform a self-merge based on the species
mat = pd.crosstab(m.iso_ter1_x, m.iso_ter1_y) # perform crosstabulation operation
mat.reset_index(inplace=True)
mat= mat.rename(columns = {'iso_ter1_x':'index'})
mat.head(5)

CPU times: user 2.67 s, sys: 732 ms, total: 3.4 s
Wall time: 3.58 s


iso_ter1_y,index,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
0,ABW,1485,1327,61,133,187,257,3,329,1328,...,1435,1380,1485,1280,1329,300,325,279,116,441
1,AIA,1327,1519,58,118,204,300,4,339,1503,...,1462,1374,1492,1441,1498,273,352,319,111,460
2,ALB,61,58,347,31,38,24,0,49,58,...,78,59,61,54,57,36,28,27,23,99
3,ARE,133,118,31,1142,50,327,0,905,121,...,485,132,133,103,123,823,522,435,1023,816
4,ARG,187,204,38,50,671,136,32,308,203,...,393,217,248,182,205,93,175,162,33,386


In [73]:
mat.shape 

(181, 182)

In [51]:
# Save local copy
# mat.to_csv('/Users/sofia/Documents/HE_Data/NRC/NRC_Marine/stewardship_matrix.csv',index=False)

### Get shared stewardship countries
Using the stewardship matrix. 

In [63]:
mat.columns.values

array(['index', 'ABW', 'AIA', 'ALB', 'ARE', 'ARG', 'ASM', 'ATA', 'ATF',
       'ATG', 'AUS', 'BEL', 'BES', 'BGD', 'BGR', 'BHS', 'BIH', 'BLM',
       'BLZ', 'BMU', 'BRA', 'BRB', 'BRN', 'CAN', 'CCK', 'CHL', 'CHN',
       'CMR', 'COD', 'COG', 'COK', 'COL', 'COM', 'CPV', 'CRI', 'CUB',
       'CUW', 'CYM', 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU',
       'EGY', 'ESH', 'ESP', 'EST', 'FIN', 'FJI', 'FLK', 'FRA', 'FRO',
       'FSM', 'GAB', 'GBR', 'GEO', 'GGY', 'GIN', 'GLP', 'GMB', 'GNB',
       'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUF', 'GUM', 'GUY', 'HMD',
       'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL',
       'ISR', 'ITA', 'JAM', 'JEY', 'JOR', 'JPN', 'KEN', 'KHM', 'KIR',
       'KNA', 'KOR', 'KWT', 'LBN', 'LBR', 'LCA', 'LKA', 'LTU', 'LVA',
       'MAF', 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR',
       'MNP', 'MOZ', 'MRT', 'MSR', 'MTQ', 'MUS', 'MYS', 'MYT', 'NAM',
       'NCL', 'NFK', 'NGA', 'NIC', 'NIU', 'NLD', 'NOR', 'NZL', 'OMN',
       'PAK', 'PAN

In [74]:
mat.columns.values[1:183]

array(['ABW', 'AIA', 'ALB', 'ARE', 'ARG', 'ASM', 'ATA', 'ATF', 'ATG',
       'AUS', 'BEL', 'BES', 'BGD', 'BGR', 'BHS', 'BIH', 'BLM', 'BLZ',
       'BMU', 'BRA', 'BRB', 'BRN', 'CAN', 'CCK', 'CHL', 'CHN', 'CMR',
       'COD', 'COG', 'COK', 'COL', 'COM', 'CPV', 'CRI', 'CUB', 'CUW',
       'CYM', 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY',
       'ESH', 'ESP', 'EST', 'FIN', 'FJI', 'FLK', 'FRA', 'FRO', 'FSM',
       'GAB', 'GBR', 'GEO', 'GGY', 'GIN', 'GLP', 'GMB', 'GNB', 'GNQ',
       'GRC', 'GRD', 'GRL', 'GTM', 'GUF', 'GUM', 'GUY', 'HMD', 'HND',
       'HRV', 'HTI', 'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR',
       'ITA', 'JAM', 'JEY', 'JOR', 'JPN', 'KEN', 'KHM', 'KIR', 'KNA',
       'KOR', 'KWT', 'LBN', 'LBR', 'LCA', 'LKA', 'LTU', 'LVA', 'MAF',
       'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNP',
       'MOZ', 'MRT', 'MSR', 'MTQ', 'MUS', 'MYS', 'MYT', 'NAM', 'NCL',
       'NFK', 'NGA', 'NIC', 'NIU', 'NLD', 'NOR', 'NZL', 'OMN', 'PAK',
       'PAN', 'PCN',

In [75]:
# Get only the values (skip index)
df_mat = mat[mat.columns.values[1:183]]
df_mat.head(5)

iso_ter1_y,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,AUS,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
0,1485,1327,61,133,187,257,3,329,1328,400,...,1435,1380,1485,1280,1329,300,325,279,116,441
1,1327,1519,58,118,204,300,4,339,1503,425,...,1462,1374,1492,1441,1498,273,352,319,111,460
2,61,58,347,31,38,24,0,49,58,48,...,78,59,61,54,57,36,28,27,23,99
3,133,118,31,1142,50,327,0,905,121,843,...,485,132,133,103,123,823,522,435,1023,816
4,187,204,38,50,671,136,32,308,203,396,...,393,217,248,182,205,93,175,162,33,386


In [76]:
# set index using countries
df_mat = df_mat.set_index(mat['index'].values) 
df_mat.head(5)

iso_ter1_y,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,AUS,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
ABW,1485,1327,61,133,187,257,3,329,1328,400,...,1435,1380,1485,1280,1329,300,325,279,116,441
AIA,1327,1519,58,118,204,300,4,339,1503,425,...,1462,1374,1492,1441,1498,273,352,319,111,460
ALB,61,58,347,31,38,24,0,49,58,48,...,78,59,61,54,57,36,28,27,23,99
ARE,133,118,31,1142,50,327,0,905,121,843,...,485,132,133,103,123,823,522,435,1023,816
ARG,187,204,38,50,671,136,32,308,203,396,...,393,217,248,182,205,93,175,162,33,386


In [77]:
df_mat.columns = mat['index'].values
df_mat.head(5)

Unnamed: 0,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,AUS,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
ABW,1485,1327,61,133,187,257,3,329,1328,400,...,1435,1380,1485,1280,1329,300,325,279,116,441
AIA,1327,1519,58,118,204,300,4,339,1503,425,...,1462,1374,1492,1441,1498,273,352,319,111,460
ALB,61,58,347,31,38,24,0,49,58,48,...,78,59,61,54,57,36,28,27,23,99
ARE,133,118,31,1142,50,327,0,905,121,843,...,485,132,133,103,123,823,522,435,1023,816
ARG,187,204,38,50,671,136,32,308,203,396,...,393,217,248,182,205,93,175,162,33,386


In [78]:
# Now it has the same shape
df_mat.shape

(181, 181)

In [79]:
# Create stewardship dictionary: for each country identify the 10 countries that share more species with it
df_sort = df_mat.copy()
steward_dict = dict.fromkeys(df_sort.columns.values)
for key in steward_dict:
    df_sort = df_sort.sort_values(by = [key], ascending=False)
    sub = df_sort[key][1:11] # to skip the same country
    vals = sub.index.values.tolist()
    vals.append(key)
    
    steward_dict[key] = json.dumps(vals)

In [80]:
# Convert stewardship dictionary into dataframe
steward_df = pd.DataFrame(steward_dict.items(), columns = ["GID_0","filter_steward"])
steward_df.head(5)

Unnamed: 0,GID_0,filter_steward
0,ABW,"[""VEN"", ""COL"", ""CUW"", ""DOM"", ""BES"", ""USA"", ""BH..."
1,AIA,"[""DOM"", ""BLM"", ""GLP"", ""PRI"", ""ATG"", ""BES"", ""MA..."
2,ALB,"[""GRC"", ""ITA"", ""ESP"", ""HRV"", ""DZA"", ""FRA"", ""TU..."
3,ARE,"[""IRN"", ""SAU"", ""OMN"", ""PAK"", ""SYC"", ""YEM"", ""MD..."
4,ARG,"[""CHL"", ""BRA"", ""NZL"", ""AUS"", ""USA"", ""ZAF"", ""NF..."


In [81]:
steward_df.shape

(181, 2)

### Get nearest EEZ
Similar to what was done in the Terrestrial_NRC notebook for the countries' centroids: the idea here is to find, for each of the displayed EEZ, the closest ones using the distance between their centroids. 

Note that the original EEZ layer often has several EEZ associated with the same GID_0 (being GID_0 the iso_ter1 or, when this is missing, the iso_sov1). This leads to problems when trying to match the EEZ and the country polygons currently displayed in the NRC. For this reason, using ArcGIS Pro, we first dissolved the EEZ layer according to the GID_0 field, so that all polygons with same GID_0 code were considered as 1. Then, the centroids were calculated using the "Feature to Point tool". The centroids were not forced to fall within the feature since most are made of several polygons and forcing the centroid to fall just in one of them can lead to weird situation such as the centroid of the Spanish EEZ falling in the Canary Islands. For this reason, the "inside" option of the tool was disabled and the resulting centroids are located at the center of all the polygons conforming each of those EEZ, even if thar means that they fall, somethimes, outside the corresponding EEZ (in the ocean or even in a different EEZ)

In [82]:
from math import radians
import pandas as pd
import numpy as np
from sklearn.metrics import DistanceMetric

In [83]:
eez2.head()

Unnamed: 0,GID_0,Area_EEZ,Latitude,Longitude
0,ABW,29970.3,13.74138,-69.673412
2,AIA,90157.96,20.01803,-62.543285
3,ALB,12165.55,40.927084,19.113001
4,ARE,57838.15,25.058911,54.03058
5,ARG,1072577.0,-47.089988,-62.159504


In [84]:
len(eez2)

181

In [85]:
df_coord = pd.DataFrame(data = eez2['GID_0'])
len(df_coord)

181

In [86]:
df_coord.head()

Unnamed: 0,GID_0
0,ABW
2,AIA
3,ALB
4,ARE
5,ARG


In [87]:
len(df['GID_0'].values)

252

In [88]:
# Get the coordinates of each EEZ centroid
y_list = []
x_list = []
for index, i in eez2.iterrows():
    y_list.append(i['Latitude'])
    x_list.append(i['Longitude'])
    
df_coord['x'] = x_list
df_coord['y'] = y_list
df_coord.head(5)

Unnamed: 0,GID_0,x,y
0,ABW,-69.673412,13.74138
2,AIA,-62.543285,20.01803
3,ALB,19.113001,40.927084
4,ARE,54.03058,25.058911
5,ARG,-62.159504,-47.089988


In [89]:
# Convert the coordinates to radians
df_coord['lat'] = np.radians(df_coord['y'])
df_coord['lon'] = np.radians(df_coord['x'])
df_coord.head(5)

Unnamed: 0,GID_0,x,y,lat,lon
0,ABW,-69.673412,13.74138,0.239832,-1.21603
2,AIA,-62.543285,20.01803,0.349381,-1.091586
3,ALB,19.113001,40.927084,0.714312,0.333585
4,ARE,54.03058,25.058911,0.437361,0.943012
5,ARG,-62.159504,-47.089988,-0.821875,-1.084888


In [90]:
# Get distance metric and use it to calculate the distance between coordinates of each country
dist = DistanceMetric.get_metric('haversine')
dist_df = pd.DataFrame(dist.pairwise(df_coord[['lat','lon']].to_numpy())*6373,  columns=df_coord.GID_0.unique(), index=df_coord.GID_0.unique())
dist_df.head(5)

Unnamed: 0,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,AUS,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
ABW,0.0,1030.808723,8914.511641,12547.816812,6807.636739,11363.979416,10402.096103,14419.463293,1119.079832,16480.608889,...,5516.075937,865.55441,419.428711,921.11543,641.667561,17196.124674,13781.883918,11743.613019,12950.292114,11709.958023
AIA,1030.808723,0.0,7885.881036,11527.545191,7464.512951,12230.8902,10872.544121,14304.121273,251.553043,17497.492407,...,5649.937921,768.373044,880.534088,208.905758,396.759468,16376.658731,14630.528192,12595.968453,11985.848882,11508.89188
ALB,8914.511641,7885.881036,0.0,3670.470318,12645.491889,16907.327294,12690.409407,10176.126307,7809.772415,14634.491854,...,9906.514331,8290.467684,8669.939067,8022.311156,8273.171738,9138.47689,16035.353774,16738.656936,4467.695397,8856.152641
ARE,12547.816812,11527.545191,3670.470318,0.0,13972.846658,15354.787082,11798.337743,7500.825686,11431.579298,11014.248601,...,12616.292443,11862.212594,12270.933759,11674.109276,11906.979081,5928.339267,13212.30832,14961.932367,1369.32308,7497.564528
ARG,6807.636739,7464.512951,12645.491889,13972.846658,0.0,10117.891654,3757.815605,8684.571735,7323.187947,11165.678004,...,11219.997133,6701.539762,6671.524486,7509.462226,7162.428327,15984.14587,11318.944369,10422.740599,12883.478703,7102.284726


In [91]:
dist_df.shape

(181, 181)

In [92]:
# Sort the table for each point. 0 values correspond to the same country, so take the 1:11 and add the row names to a dictionary to have the names of the 10 closest EEZ. 
dist_df_sort = dist_df.copy()
neighbour_dict = dict.fromkeys(dist_df_sort.columns.values)
for key in neighbour_dict:
    dist_df_sort = dist_df_sort.sort_values(by = [key]) # sort countries from closest to farthest
    sub = dist_df_sort[key][1:11] # keep the 10 closest ones but skip the same country (0 distance)
    vals = sub.index.values.tolist() # take the values
    neighbour_dict[key] = json.dumps(vals) # include them in dictionary

In [93]:
# Convert neighboring dictionary into dataframe
neigh_df = pd.DataFrame(neighbour_dict.items(), columns = ["GID_0","filter_neigh"])
neigh_df.head(5)

Unnamed: 0,GID_0,filter_neigh
0,ABW,"[""CUW"", ""BES"", ""VEN"", ""PRI"", ""DOM"", ""HTI"", ""VI..."
1,AIA,"[""VGB"", ""MAF"", ""BLM"", ""SXM"", ""ATG"", ""KNA"", ""VI..."
2,ALB,"[""BIH"", ""HRV"", ""ITA"", ""GRC"", ""SVN"", ""MLT"", ""BG..."
3,ARE,"[""QAT"", ""IRN"", ""KWT"", ""IRQ"", ""OMN"", ""PAK"", ""YE..."
4,ARG,"[""FLK"", ""URY"", ""CHL"", ""SGS"", ""ATA"", ""PER"", ""BR..."


In [94]:
neigh_df[neigh_df['GID_0']=='USA'].values

array([['USA',
        '["MEX", "CAN", "BLZ", "CUB", "GTM", "HND", "CYM", "SLV", "BHS", "NIC"]']],
      dtype=object)

In [95]:
neigh_df.shape

(181, 2)

In [96]:
# See in which countries the steward and neighbour dictionaries differ
list1= list(neigh_df['GID_0'])
list2=list(steward_df['GID_0'])
list3 = list(set(list1).difference(list2))
list3

[]

In [97]:
# Merge these two dataframes together
df_dict = pd.merge(left = neigh_df, right = steward_df, left_on = "GID_0", right_on = "GID_0", how = "left")
df_dict.head(5)

Unnamed: 0,GID_0,filter_neigh,filter_steward
0,ABW,"[""CUW"", ""BES"", ""VEN"", ""PRI"", ""DOM"", ""HTI"", ""VI...","[""VEN"", ""COL"", ""CUW"", ""DOM"", ""BES"", ""USA"", ""BH..."
1,AIA,"[""VGB"", ""MAF"", ""BLM"", ""SXM"", ""ATG"", ""KNA"", ""VI...","[""DOM"", ""BLM"", ""GLP"", ""PRI"", ""ATG"", ""BES"", ""MA..."
2,ALB,"[""BIH"", ""HRV"", ""ITA"", ""GRC"", ""SVN"", ""MLT"", ""BG...","[""GRC"", ""ITA"", ""ESP"", ""HRV"", ""DZA"", ""FRA"", ""TU..."
3,ARE,"[""QAT"", ""IRN"", ""KWT"", ""IRQ"", ""OMN"", ""PAK"", ""YE...","[""IRN"", ""SAU"", ""OMN"", ""PAK"", ""SYC"", ""YEM"", ""MD..."
4,ARG,"[""FLK"", ""URY"", ""CHL"", ""SGS"", ""ATA"", ""PER"", ""BR...","[""CHL"", ""BRA"", ""NZL"", ""AUS"", ""USA"", ""ZAF"", ""NF..."


In [98]:
df_dict.shape

(181, 3)

### Get below and above countries for each field in challenges

In [99]:
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ', 'SPI_mar', 'prop_protected_mar',
       'protection_needed_mar', 'hm_no_mar', 'hm_mar', 'hm_vh_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar', 'Area_EEZ',
       'Global_SPI_mar'],
      dtype='object')

In [100]:
df.head()

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,hm_mar,hm_vh_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,78.30475,21.69525,19,1466,0,0,1485,0,29970.299588,45.0
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,,,0,0,0,0,0,0,,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,,,0,0,0,0,0,0,,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,94.367741,5.632259,25,1494,0,0,1519,0,90157.964205,45.0
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,,,0,0,0,0,0,0,,


In [101]:
## Fill Nan in pop with zeroes
df['Pop2020_EEZ'] = df['Pop2020_EEZ'].fillna(0) 
df.head()

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,hm_mar,hm_vh_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,78.30475,21.69525,19,1466,0,0,1485,0,29970.299588,45.0
1,AFG,Afghanistan,https://p1.pxfuel.com/preview/967/12/53/afghan...,70.6,10,9,10,10,0,9,...,,,0,0,0,0,0,0,,
2,AGO,Angola,https://live.staticflickr.com/3787/13698381215...,192.2,10,10,10,10,3,5,...,,,0,0,0,0,0,0,,
3,AIA,Anguilla,https://live.staticflickr.com/8063/8194570372_...,,9,7,10,10,6,0,...,94.367741,5.632259,25,1494,0,0,1519,0,90157.964205,45.0
4,ALA,Åland,https://p1.pxfuel.com/preview/294/670/561/alan...,,1,1,1,1,0,1,...,,,0,0,0,0,0,0,,


In [102]:
fields = ['GID_0', 'NAME_0', 'Area_EEZ', 'Pop2020_EEZ', 'prop_protected_mar', 'hm_vh_mar', 'protection_needed_mar', 'total_endemic_mar', 'nspecies_mar', 'SPI_mar', "continent"]

##### % of protection needed still pending from MOL

In [103]:
df_fields = df[fields].copy()
df_fields.head(2)

Unnamed: 0,GID_0,NAME_0,Area_EEZ,Pop2020_EEZ,prop_protected_mar,hm_vh_mar,protection_needed_mar,total_endemic_mar,nspecies_mar,SPI_mar,continent
0,ABW,Aruba,29970.299588,1547.605201,0.0,21.69525,90.0,0,1485,0.0,North America
1,AFG,Afghanistan,,0.0,,,,0,0,,Asia


In [104]:
filter_fields = ['Area_EEZ','Pop2020_EEZ', 'hm_vh_mar', 'prop_protected_mar', 'protection_needed_mar', 'total_endemic_mar','nspecies_mar', 'SPI_mar']

In [105]:
# Get dictionaries for the other fields
df_sort = df.copy()
nber_index = 5
max_index = len(df_sort.index) - 1

new_fields = []
for field in filter_fields:
    df_sort = df_sort.sort_values(by = [field]).reset_index(drop=True)
    collapse_list = []
    for index, i in df_sort.iterrows():
        country_gid = df_sort.GID_0[index]
        above_index = index - nber_index
        below_index = index + nber_index + 1
        if above_index < 0:
            below_index = nber_index * 2 
            above_index = 0
        if below_index > max_index:
            above_index = max_index - (nber_index * 2)
            below_index = max_index

        sub_pd = df_sort.GID_0[above_index:below_index]
        val_list = sub_pd.values.tolist()
        collapse_list.append(json.dumps(val_list))

        #val_list_rem = val_list.remove(country_gid)
    filter_field = f"filter_{field}"
    new_fields.append(filter_field)
    df_sort[filter_field] = collapse_list

In [106]:
df_sort.head(2)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,Area_EEZ,Global_SPI_mar,filter_Area_EEZ,filter_Pop2020_EEZ,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar
0,NGA,Nigeria,https://live.staticflickr.com/3799/10667053324...,1089.4,10,10,10,10,3,5,...,179049.037315,45,"[""KEN"", ""COM"", ""STP"", ""PRI"", ""MRT"", ""NGA"", ""BR...","[""ITA"", ""MMR"", ""KOR"", ""TUR"", ""ESP"", ""NGA"", ""BG...","[""SPM"", ""LKA"", ""PAK"", ""GBR"", ""TWN"", ""NGA"", ""KO...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""YEM"", ""TKL"", ""SLE"", ""PRK"", ""ABW"", ""NGA"", ""IN...","[""HMD"", ""DNK"", ""PCN"", ""ISL"", ""ISR"", ""NGA"", ""TK...","[""NGA"", ""TTO"", ""ISR"", ""BMU"", ""PRK"", ""SLE"", ""CO..."
1,TTO,Trinidad and Tobago,https://upload.wikimedia.org/wikipedia/commons...,43.7,10,9,10,10,10,10,...,76574.299151,45,"[""HRV"", ""DEU"", ""ARE"", ""NLD"", ""MYT"", ""TTO"", ""TL...","[""GNQ"", ""GNB"", ""GEO"", ""PYF"", ""ISL"", ""TTO"", ""TL...","[""SLE"", ""VIR"", ""MEX"", ""COG"", ""JAM"", ""TTO"", ""SD...","[""TTO"", ""LBR"", ""YEM"", ""TKL"", ""SLE"", ""PRK"", ""RE...","[""SHN"", ""VGB"", ""MCO"", ""COD"", ""STP"", ""TTO"", ""XS...","[""QAT"", ""ISR"", ""SHN"", ""VGB"", ""MCO"", ""TTO"", ""XS...","[""NIU"", ""MAR"", ""SHN"", ""GBR"", ""CPV"", ""TTO"", ""BM...","[""NGA"", ""TTO"", ""ISR"", ""BMU"", ""PRK"", ""SLE"", ""CO..."


In [107]:
len(df_sort)

252

In [108]:
df_sort = df_sort[df_sort['Area_EEZ']>0]
len(df_sort)

181

### Get countries from same continent

In [109]:
df_sort.continent.unique()

array(['Africa', 'North America', 'Asia', 'Europe', 'South America',
       'Oceania', 'Antarctica'], dtype=object)

In [110]:
continent_dict = {}

In [111]:
for continent in df_sort.continent.unique():
    countries = df_sort.loc[df_sort['continent'] == continent].GID_0
    continent_dict[continent] = countries.tolist()

In [112]:
# Create same continent dictionary
same_continent_dict = dict.fromkeys(df_sort.GID_0.unique())
for key in same_continent_dict:
    continent_name = df_sort.loc[df_sort['GID_0'] == key, "continent"].to_list()[0]
    vals = continent_dict[continent_name]
    same_continent_dict[key] = json.dumps(vals)

In [113]:
# Convert to dataframe
continent_df = pd.DataFrame(same_continent_dict.items(), columns = ["GID_0","filter_continent"])
continent_df.head()

Unnamed: 0,GID_0,filter_continent
0,NGA,"[""NGA"", ""ISR"", ""SLE"", ""COD"", ""REU"", ""STP"", ""CP..."
1,TTO,"[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC..."
2,ISR,"[""NGA"", ""ISR"", ""SLE"", ""COD"", ""REU"", ""STP"", ""CP..."
3,BMU,"[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC..."
4,PRK,"[""PRK"", ""IRQ"", ""IND"", ""QAT"", ""SGP"", ""YEM"", ""BR..."


In [114]:
continent_df.shape

(181, 2)

In [115]:
# Merge the 3 filters we have so far into one dataframe
df_dict = pd.merge(left = continent_df, 
                   right = df_dict, left_on = "GID_0", right_on = "GID_0", how = "left")

In [116]:
df_dict.shape

(181, 4)

In [117]:
df_dict.head()

Unnamed: 0,GID_0,filter_continent,filter_neigh,filter_steward
0,NGA,"[""NGA"", ""ISR"", ""SLE"", ""COD"", ""REU"", ""STP"", ""CP...","[""STP"", ""CMR"", ""GNQ"", ""GAB"", ""FRA"", ""COG"", ""CO...","[""LBR"", ""SLE"", ""GIN"", ""GNQ"", ""GAB"", ""GNB"", ""SE..."
1,TTO,"[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC...","[""GRD"", ""VCT"", ""BRB"", ""LCA"", ""GUY"", ""MTQ"", ""DM...","[""VEN"", ""COL"", ""GRD"", ""VCT"", ""PAN"", ""NIC"", ""CU..."
2,ISR,"[""NGA"", ""ISR"", ""SLE"", ""COD"", ""REU"", ""STP"", ""CP...","[""LBN"", ""CYP"", ""JOR"", ""EGY"", ""TUR"", ""GRC"", ""BG...","[""EGY"", ""SAU"", ""SDN"", ""IRN"", ""GRC"", ""JOR"", ""ES..."
3,BMU,"[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC...","[""TCA"", ""BHS"", ""VGB"", ""AIA"", ""DOM"", ""ATG"", ""PR...","[""USA"", ""BHS"", ""VEN"", ""DOM"", ""CUW"", ""MEX"", ""CU..."
4,PRK,"[""PRK"", ""IRQ"", ""IND"", ""QAT"", ""SGP"", ""YEM"", ""BR...","[""KOR"", ""JPN"", ""CHN"", ""TWN"", ""MNP"", ""PHL"", ""GU...","[""KOR"", ""JPN"", ""CHN"", ""TWN"", ""RUS"", ""USA"", ""CA..."


In [118]:
df_dict[df_dict['filter_continent'].isnull()]

Unnamed: 0,GID_0,filter_continent,filter_neigh,filter_steward


### Create the `filter_similar_marine` field with all the filters together

In [119]:
new_fields

['filter_Area_EEZ',
 'filter_Pop2020_EEZ',
 'filter_hm_vh_mar',
 'filter_prop_protected_mar',
 'filter_protection_needed_mar',
 'filter_total_endemic_mar',
 'filter_nspecies_mar',
 'filter_SPI_mar']

In [120]:
new_fields.append("filter_neigh")
new_fields.append("filter_steward")
new_fields.append("filter_continent")
new_fields

['filter_Area_EEZ',
 'filter_Pop2020_EEZ',
 'filter_hm_vh_mar',
 'filter_prop_protected_mar',
 'filter_protection_needed_mar',
 'filter_total_endemic_mar',
 'filter_nspecies_mar',
 'filter_SPI_mar',
 'filter_neigh',
 'filter_steward',
 'filter_continent']

In [121]:
df_sort.shape

(181, 63)

In [122]:
df_sort.head(1)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,Area_EEZ,Global_SPI_mar,filter_Area_EEZ,filter_Pop2020_EEZ,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar
0,NGA,Nigeria,https://live.staticflickr.com/3799/10667053324...,1089.4,10,10,10,10,3,5,...,179049.037315,45,"[""KEN"", ""COM"", ""STP"", ""PRI"", ""MRT"", ""NGA"", ""BR...","[""ITA"", ""MMR"", ""KOR"", ""TUR"", ""ESP"", ""NGA"", ""BG...","[""SPM"", ""LKA"", ""PAK"", ""GBR"", ""TWN"", ""NGA"", ""KO...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""YEM"", ""TKL"", ""SLE"", ""PRK"", ""ABW"", ""NGA"", ""IN...","[""HMD"", ""DNK"", ""PCN"", ""ISL"", ""ISR"", ""NGA"", ""TK...","[""NGA"", ""TTO"", ""ISR"", ""BMU"", ""PRK"", ""SLE"", ""CO..."


In [123]:
df_filter = pd.merge(left = df_sort, right = df_dict, left_on = "GID_0", right_on = "GID_0", how = "left")
df_filter.head(2)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,filter_Pop2020_EEZ,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar,filter_continent,filter_neigh,filter_steward
0,NGA,Nigeria,https://live.staticflickr.com/3799/10667053324...,1089.4,10,10,10,10,3,5,...,"[""ITA"", ""MMR"", ""KOR"", ""TUR"", ""ESP"", ""NGA"", ""BG...","[""SPM"", ""LKA"", ""PAK"", ""GBR"", ""TWN"", ""NGA"", ""KO...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""YEM"", ""TKL"", ""SLE"", ""PRK"", ""ABW"", ""NGA"", ""IN...","[""HMD"", ""DNK"", ""PCN"", ""ISL"", ""ISR"", ""NGA"", ""TK...","[""NGA"", ""TTO"", ""ISR"", ""BMU"", ""PRK"", ""SLE"", ""CO...","[""NGA"", ""ISR"", ""SLE"", ""COD"", ""REU"", ""STP"", ""CP...","[""STP"", ""CMR"", ""GNQ"", ""GAB"", ""FRA"", ""COG"", ""CO...","[""LBR"", ""SLE"", ""GIN"", ""GNQ"", ""GAB"", ""GNB"", ""SE..."
1,TTO,Trinidad and Tobago,https://upload.wikimedia.org/wikipedia/commons...,43.7,10,9,10,10,10,10,...,"[""GNQ"", ""GNB"", ""GEO"", ""PYF"", ""ISL"", ""TTO"", ""TL...","[""SLE"", ""VIR"", ""MEX"", ""COG"", ""JAM"", ""TTO"", ""SD...","[""TTO"", ""LBR"", ""YEM"", ""TKL"", ""SLE"", ""PRK"", ""RE...","[""SHN"", ""VGB"", ""MCO"", ""COD"", ""STP"", ""TTO"", ""XS...","[""QAT"", ""ISR"", ""SHN"", ""VGB"", ""MCO"", ""TTO"", ""XS...","[""NIU"", ""MAR"", ""SHN"", ""GBR"", ""CPV"", ""TTO"", ""BM...","[""NGA"", ""TTO"", ""ISR"", ""BMU"", ""PRK"", ""SLE"", ""CO...","[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC...","[""GRD"", ""VCT"", ""BRB"", ""LCA"", ""GUY"", ""MTQ"", ""DM...","[""VEN"", ""COL"", ""GRD"", ""VCT"", ""PAN"", ""NIC"", ""CU..."


In [124]:
df_filter.shape

(181, 66)

In [125]:
similar_list = []
for index, i in df_filter.iterrows():
    filter_dict = i[new_fields].to_dict()   
    vals = json.dumps(filter_dict).replace('NaN','"NaN"').replace('"[', '[').replace(']"', ']').replace('\\', '')
    similar_list.append(vals)
    #similar_list.append(json.loads(json.dumps(filter_dict)))    
df_filter['filter_similar_mar'] = similar_list
df_filter.head(2)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar,filter_continent,filter_neigh,filter_steward,filter_similar_mar
0,NGA,Nigeria,https://live.staticflickr.com/3799/10667053324...,1089.4,10,10,10,10,3,5,...,"[""SPM"", ""LKA"", ""PAK"", ""GBR"", ""TWN"", ""NGA"", ""KO...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""TKL"", ""SLE"", ""PRK"", ""REU"", ""ABW"", ""NGA"", ""CP...","[""YEM"", ""TKL"", ""SLE"", ""PRK"", ""ABW"", ""NGA"", ""IN...","[""HMD"", ""DNK"", ""PCN"", ""ISL"", ""ISR"", ""NGA"", ""TK...","[""NGA"", ""TTO"", ""ISR"", ""BMU"", ""PRK"", ""SLE"", ""CO...","[""NGA"", ""ISR"", ""SLE"", ""COD"", ""REU"", ""STP"", ""CP...","[""STP"", ""CMR"", ""GNQ"", ""GAB"", ""FRA"", ""COG"", ""CO...","[""LBR"", ""SLE"", ""GIN"", ""GNQ"", ""GAB"", ""GNB"", ""SE...","{""filter_Area_EEZ"": [""KEN"", ""COM"", ""STP"", ""PRI..."
1,TTO,Trinidad and Tobago,https://upload.wikimedia.org/wikipedia/commons...,43.7,10,9,10,10,10,10,...,"[""SLE"", ""VIR"", ""MEX"", ""COG"", ""JAM"", ""TTO"", ""SD...","[""TTO"", ""LBR"", ""YEM"", ""TKL"", ""SLE"", ""PRK"", ""RE...","[""SHN"", ""VGB"", ""MCO"", ""COD"", ""STP"", ""TTO"", ""XS...","[""QAT"", ""ISR"", ""SHN"", ""VGB"", ""MCO"", ""TTO"", ""XS...","[""NIU"", ""MAR"", ""SHN"", ""GBR"", ""CPV"", ""TTO"", ""BM...","[""NGA"", ""TTO"", ""ISR"", ""BMU"", ""PRK"", ""SLE"", ""CO...","[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC...","[""GRD"", ""VCT"", ""BRB"", ""LCA"", ""GUY"", ""MTQ"", ""DM...","[""VEN"", ""COL"", ""GRD"", ""VCT"", ""PAN"", ""NIC"", ""CU...","{""filter_Area_EEZ"": [""HRV"", ""DEU"", ""ARE"", ""NLD..."


In [126]:
df_merge = df_filter[["GID_0", "filter_similar_mar"]]

In [127]:
df_merge.shape

(181, 2)

In [128]:
df_merge.head(2)

Unnamed: 0,GID_0,filter_similar_mar
0,NGA,"{""filter_Area_EEZ"": [""KEN"", ""COM"", ""STP"", ""PRI..."
1,TTO,"{""filter_Area_EEZ"": [""HRV"", ""DEU"", ""ARE"", ""NLD..."


In [129]:
df.columns

Index(['GID_0', 'NAME_0', 'jpg_url', 'GNI_PPP', 'max_amph', 'max_bird',
       'max_mamm', 'max_rept', 'max_cact', 'max_coni', 'max_all', 'sentence',
       'Global_SPI_ter', 'GlobalID', 'continent', 'has_priority', 'has_raisg',
       'Area_Country', 'protection_needed_ter', 'iso2', 'max_highlited_sp',
       'x', 'y', 'hm_ter', 'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter',
       'prop_protected_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Pop2020_EEZ', 'SPI_mar', 'prop_protected_mar',
       'protection_needed_mar', 'hm_no_mar', 'hm_mar', 'hm_vh_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar', 'Area_EEZ',
       'Global_SPI_mar'],
      dtype='object')

In [130]:
df = pd.merge(left = df, right = df_merge, left_on = "GID_0", right_on = "GID_0", how = "left")
df.head(1)

Unnamed: 0,GID_0,NAME_0,jpg_url,GNI_PPP,max_amph,max_bird,max_mamm,max_rept,max_cact,max_coni,...,hm_vh_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar,filter_similar_mar
0,ABW,Aruba,https://live.staticflickr.com/1952/31416683438...,3.9,3,1,10,10,10,0,...,21.69525,19,1466,0,0,1485,0,29970.299588,45,"{""filter_Area_EEZ"": [""CUW"", ""GRD"", ""LVA"", ""DMA..."


In [131]:
df.to_csv(f'{path}/NRC_Marine_20220426.csv')