# Incorporate marine species to NRC
### In this notebook we incorporate marine data to the updated terrestrial data calculated in the Terrestrial_NRC notebook
April 2022

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
import requests as re
from copy import deepcopy
from itertools import repeat
import functools

## Import, explore and prepare the data

In [2]:
path = '/Users/sofia/Documents/HE_Data/NRC/NRC_Marine'

In [3]:
# Import tables
gadm = pd.read_csv('/Users/sofia/Documents/HE_Data/NRC/NRC_Terrestrial/NRC_Terrestrial_20220426.csv') # This is the updated gadm_centroid
spi = pd.read_csv(f'{path}/Marine_SPI_by_country_202203323_ter-1.csv')# table with spi and protection
mar = pd.read_csv(f'{path}/NRC_marine_species_data_20220323_ter-1.csv') # Species able provided by Alex (MOL)
pop = pd.read_csv(f'{path}/Pop2020_eez.csv') # calculated in arcgis pro using the population2020.crf and the EEZ shapefile
hm = pd.read_csv(f'{path}/marine_perc_human.csv') # human modification table
eez = gpd.read_file(f'{path}/eez_centroids/eez_centroids.shp')
prt = pd.read_csv(f'{path}/marine_protection_needed.csv')

In [4]:
gadm.head(1)

Unnamed: 0.1,Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,...,birds,mammals,reptiles,endemic_amphibians,endemic_birds,endemic_mammals,endemic_reptiles,nspecies_ter,total_endemic_ter,filter_similar_ter
0,0,ABW,Aruba,181.938403,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,...,193,4,32,0,0,0,3,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ..."


In [5]:
len(gadm)

254

In [7]:
spi.head(1)

Unnamed: 0,SOVEREIGN1,iso_ter1,year,nspecies,SPI_low,SPI_high,percentprotected_low,percentprotected_high,GID_0
0,Albania,ALB,1980,347,0.0,0.0,0.0,0.0,ALB


In [6]:
len(spi['iso_ter1'].unique()) # There are 183 unique countries/territories with SPI data

183

In [9]:
mar.head(1)

Unnamed: 0,speciesgroup,species,countryname,iso_ter1,percentprotected,NSPS,stewardship
0,marine fishes,Abalistes filamentosus,Australia,AUS,25-50%,75-100,22


In [10]:
len(mar['iso_ter1'].unique()) # There are 183 unique countries/territories with marine species

183

In [11]:
pop.head(1)

Unnamed: 0,OID_,GID_0,ZONE_CODE,COUNT,AREA,SUM
0,1,ABW,1,55.0,0.003819,1547.605201


In [5]:
len(pop['GID_0'].unique()) # There are 201 countries/territories that have population in their EEZ

201

In [13]:
hm.head(1)

Unnamed: 0.1,Unnamed: 0,ISO_TER1_F,no_human,human,very_high
0,2,ABW,0.0,0.783047,0.216953


In [6]:
len(hm['ISO_TER1_F'].unique()) # There are 203 countries/territories that have human modification data in their EEZ

203

In [9]:
eez.head()

Unnamed: 0,GID_0,AREA_KM,ORIG_FID,geometry
0,ABW,29970.299588,1,POINT (-69.67341 13.74138)
1,AGO,495859.762742,2,POINT (11.03537 -11.70110)
2,AIA,90157.964205,3,POINT (-62.54329 20.01803)
3,ALB,12165.548773,4,POINT (19.11300 40.92708)
4,ARE,57838.146798,5,POINT (54.03058 25.05891)


In [7]:
len(eez['GID_0'].unique()) # There are 202 countries/territories that have EEZ

202

In [5]:
# There are more hm countries than eez countries??
list1 = list(eez['GID_0'].unique())
list2 = list(hm['ISO_TER1_F'].unique())
list(set(list2).difference(list1)) # 'TMP' is not inlcuded in gadm and EEZ datasets, so let's remove it

['TMP']

In [6]:
hm[hm.ISO_TER1_F=='TMP']

Unnamed: 0.1,Unnamed: 0,ISO_TER1_F,no_human,human,very_high
181,183,TMP,0.00166,0.0,0.99834


In [7]:
hm = hm.drop(index=[181])
len(hm)

202

In [8]:
prt.head(2)

Unnamed: 0.1,Unnamed: 0,ISO_TER1_F,Perc_Prot_Need
0,1,ABW,7.356267
1,2,AGO,7.608244


In [9]:
len(prt)

203

In [12]:
list1 = list(eez['GID_0'].unique())
list2 = list(prt['ISO_TER1_F'].unique())
list(set(list2).difference(list1)) 

['TMP']

In [13]:
prt[prt['ISO_TER1_F']=='TMP']

Unnamed: 0.1,Unnamed: 0,ISO_TER1_F,Perc_Prot_Need
181,182,TMP,0.0


In [14]:
prt = prt.drop(index=[181])
len(prt)

202

#### Check the GID_0 to see if there are discrepancies with gadm

In [15]:
# Find GID_0 that are in species table but not in gadm 
list1= list(gadm['GID_0'])
list2=list(mar['iso_ter1'].unique())
list(set(list2).difference(list1))  # XXZ is international waters

['XXZ']

In [16]:
# Remove 'XXZ'
l = list(set(list2).difference(list1))
mar = mar[~mar['iso_ter1'].isin(l)]
len(mar.iso_ter1.unique())

182

In [17]:
# Find GID_0 that are in spi table but not in gadm:
list1= list(gadm['GID_0'])
list2=list(spi['iso_ter1'].unique())
list(set(list2).difference(list1)) 

['XXZ']

In [18]:
# Remove 'XXZ'
l = list(set(list2).difference(list1))
spi = spi[~spi['iso_ter1'].isin(l)]
len(spi.iso_ter1.unique())

182

In [19]:
# Find GID_0 that are in hm but not in gadm:
list1= list(gadm['GID_0'])
list2=list(hm['ISO_TER1_F'].unique())
list(set(list2).difference(list1)) 

[]

In [20]:
# Find GID_0 that are in population but not in gadm:
list1= list(gadm['GID_0'])
list2=list(pop['GID_0'].unique())
list(set(list2).difference(list1)) 

[]

In [21]:
# Find GID_0 that are in EEZ table but not in gadm
list1= list(gadm['GID_0'])
list2=list(eez['GID_0'].unique())
list(set(list2).difference(list1)) 

[]

### Create "Marine" field in gadm table to identify countries that have eez and those that are only terrestrial

In [20]:
# Create list with countries that have eez
l = list(eez['GID_0'])
len(eez)

202

In [21]:
# Give true or false to marine field according to the presence of eez
gadm['Marine'] = gadm['GID_0'].apply(lambda x: any([k in x for k in l]))
gadm['Marine'] = gadm['Marine'].map({True: 'True', False: 'False'})
type(gadm['Marine'][0])

str

#### Prepare tables

In [22]:
# Change name of human modification fields to prepare it for join
hm = hm.rename(columns={'ISO_TER1_F':'GID_0', 'no_human':'hm_no_mar', 'human':'hm_mar', 'very_high':'hm_vh_mar'}).drop(columns= {'Unnamed: 0'})
hm.head()

Unnamed: 0,GID_0,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,0.0,0.783047,0.216953
1,AGO,0.000223,0.778475,0.221302
2,AIA,0.0,0.943677,0.056323
3,ALB,0.000263,0.003746,0.995991
4,ARE,0.000116,0.034045,0.965838


In [23]:
# The human modification table was given in 0-1 values, let's make it in % to match the terrestrial data
hm['hm_no_mar'] = hm['hm_no_mar'].apply(lambda x: x*100)
hm['hm_mar'] = hm['hm_mar'].apply(lambda x: x*100)
hm['hm_vh_mar'] = hm['hm_vh_mar'].apply(lambda x: x*100)
hm.head()

Unnamed: 0,GID_0,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,0.0,78.30475,21.69525
1,AGO,0.022313,77.847478,22.130209
2,AIA,0.0,94.367741,5.632259
3,ALB,0.026286,0.374581,99.599133
4,ARE,0.011637,3.404538,96.583825


In [24]:
# Change name pop fields to prepare it for join
pop = pop.drop(columns={'OID_', 'ZONE_CODE', 'COUNT', 'AREA'}).rename(columns = {'SUM':'Pop2020_EEZ'})
pop.head(2)

Unnamed: 0,GID_0,Pop2020_EEZ
0,ABW,1547.605201
1,AGO,350682.727157


In [25]:
eez = eez.rename(columns={'AREA_KM':'Area_EEZ'})
eez.head()

Unnamed: 0,GID_0,Area_EEZ,ORIG_FID,geometry
0,ABW,29970.299588,1,POINT (-69.67341 13.74138)
1,AGO,495859.762742,2,POINT (11.03537 -11.70110)
2,AIA,90157.964205,3,POINT (-62.54329 20.01803)
3,ALB,12165.548773,4,POINT (19.11300 40.92708)
4,ARE,57838.146798,5,POINT (54.03058 25.05891)


In [26]:
prt.head(1)

Unnamed: 0.1,Unnamed: 0,ISO_TER1_F,Perc_Prot_Need
0,1,ABW,7.356267


In [28]:
prt = prt.rename(columns={'ISO_TER1_F':'GID_0', 'Perc_Prot_Need':'protection_needed_mar'})

------------------------------------------------------------------------------------------------------------------
## Overview tab: generate general information
### Add Population data 
Population by EEZ was calculated in ArcGIS Pro using the EEZ_dissolved (by GID_0) shp and the population2020.crf

In [29]:
gadm.columns

Index(['Unnamed: 0', 'GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y',
       'jpg_url', 'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine'],
      dtype='object')

In [42]:
len(pop)

201

In [30]:
# Add new population data 
df = pd.merge(gadm, pop,  how='left', left_on=['GID_0'], right_on = ['GID_0']).drop(columns={'Unnamed: 0'})
df.columns

Index(['GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y', 'jpg_url',
       'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine', 'Pop2020_EEZ'],
      dtype='object')

### Add human modification data

In [44]:
hm.head()

Unnamed: 0,GID_0,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,0.0,78.30475,21.69525
1,AGO,0.022313,77.847478,22.130209
2,AIA,0.0,94.367741,5.632259
3,ALB,0.026286,0.374581,99.599133
4,ARE,0.011637,3.404538,96.583825


In [45]:
len(hm)

202

In [31]:
# Add human modification data
df= pd.merge(df, hm ,how='left', left_on=['GID_0'], right_on = ['GID_0'])
df.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,endemic_mammals,endemic_reptiles,nspecies_ter,total_endemic_ter,filter_similar_ter,Marine,Pop2020_EEZ,hm_no_mar,hm_mar,hm_vh_mar
0,ABW,Aruba,181.938403,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,0,3,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",True,1547.605201,0.0,78.30475,21.69525


### Add SPI and % Protected (from MOL). 
MOL sent a first version with these values, which is in https://eowilson.maps.arcgis.com/home/item.html?id=d48f5ea1e59a42048f57e4c44c1a82a3. However, in this case they grouped the EEZ by sovereign1, so they didn't match the terrestrial NRC, which are divided by territories (French Guiana is shown independently from France). So MOL sent a new table dividing the EEZ by iso_ter1 (when this was null they used the field iso_sov1). This was done for both tables (SPI&Protection and species)

In [32]:
# Take the last value (2021) for the general overview
last = spi[spi['year']==2021].copy()
last.head(5)

Unnamed: 0,SOVEREIGN1,iso_ter1,year,nspecies,SPI_low,SPI_high,percentprotected_low,percentprotected_high,GID_0
41,Albania,ALB,2021,347,1.86,1.86,0.69,0.69,ALB
83,Algeria,DZA,2021,489,0.22,0.22,0.02,0.02,DZA
125,Antarctica,ATA,2021,121,50.79,50.79,35.55,35.55,ATA
167,Antigua and Barbuda,ATG,2021,1519,9.04,9.04,0.3,0.3,ATG
209,Argentina,ARG,2021,671,7.0,7.0,9.25,9.25,ARG


In [33]:
# Take only relevant fields and change their names (include marine in name to distinguish them from terrestrial)
last = last[['iso_ter1','SPI_high', 'percentprotected_high']]
last = last.rename(columns= {'SPI_high':'SPI_mar', 'percentprotected_high':'prop_protected_mar', 'iso_ter1':'GID_0'})
last.head(1)

Unnamed: 0,GID_0,SPI_mar,prop_protected_mar
41,ALB,1.86,0.69


In [34]:
len(last) # There are 182 territories with marine SPI and protection data

182

In [35]:
# Add data to dataframe
df= pd.merge(df, last ,how='left', left_on=['GID_0'], right_on = ['GID_0'])
df.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,nspecies_ter,total_endemic_ter,filter_similar_ter,Marine,Pop2020_EEZ,hm_no_mar,hm_mar,hm_vh_mar,SPI_mar,prop_protected_mar
0,ABW,Aruba,181.938403,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,232,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",True,1547.605201,0.0,78.30475,21.69525,0.0,0.0


In [36]:
df[df.GID_0=='ATA']

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,nspecies_ter,total_endemic_ter,filter_similar_ter,Marine,Pop2020_EEZ,hm_no_mar,hm_mar,hm_vh_mar,SPI_mar,prop_protected_mar
11,ATA,Antarctica,12365050.0,"MULTIPOLYGON (((-99.8479 -74.9229, -99.8583 -7...",20.814124,-80.561889,https://live.staticflickr.com/1590/25126847203...,1.0,0.0,31d4f242-3c5b-4e94-bc65-e288880d9dda,...,97,0,"{""filter_Area_Country"": [""COD"", ""KAZ"", ""ARG"", ...",True,,77.049071,22.390602,0.560327,50.79,35.55


In [37]:
df.Pop2020_EEZ[df.GID_0=='ATA']=0
df[df.GID_0=='ATA']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.Pop2020_EEZ[df.GID_0=='ATA']=0


Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,nspecies_ter,total_endemic_ter,filter_similar_ter,Marine,Pop2020_EEZ,hm_no_mar,hm_mar,hm_vh_mar,SPI_mar,prop_protected_mar
11,ATA,Antarctica,12365050.0,"MULTIPOLYGON (((-99.8479 -74.9229, -99.8583 -7...",20.814124,-80.561889,https://live.staticflickr.com/1590/25126847203...,1.0,0.0,31d4f242-3c5b-4e94-bc65-e288880d9dda,...,97,0,"{""filter_Area_Country"": [""COD"", ""KAZ"", ""ARG"", ...",True,0.0,77.049071,22.390602,0.560327,50.79,35.55


### Add % protection needed (from MOL, sent on 09/05/2022 as "marine_protection_needed.csv")

In [38]:
prt.head()

Unnamed: 0,GID_0,protection_needed_mar
0,ABW,7.356267
1,AGO,7.608244
2,AIA,1.264193
3,ALB,32.743156
4,ARE,56.964522


In [39]:
# Create new dataframe with the merge of the gadm_centroid and the new values
df= pd.merge(df, prt ,how='left', left_on=['GID_0'], right_on = ['GID_0'])
df.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,total_endemic_ter,filter_similar_ter,Marine,Pop2020_EEZ,hm_no_mar,hm_mar,hm_vh_mar,SPI_mar,prop_protected_mar,protection_needed_mar
0,ABW,Aruba,181.938403,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,3,"{""filter_Area_Country"": [""JEY"", ""CXR"", ""WLF"", ...",True,1547.605201,0.0,78.30475,21.69525,0.0,0.0,7.356267


In [44]:
df.NAME_0[df['prop_protected_mar']+df['protection_needed_mar']>100]

20                 Bonaire, Sint Eustatius and Saba
47                                 Saint-Barthélemy
105                                      Guadeloupe
156                                    Saint-Martin
174                                      Martinique
197                                           Palau
215    South Georgia and the South Sandwich Islands
Name: NAME_0, dtype: object

In [51]:
df[df['prop_protected_mar']+df['protection_needed_mar']>100]

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,total_endemic_ter,filter_similar_ter,Marine,Pop2020_EEZ,hm_no_mar,hm_mar,hm_vh_mar,SPI_mar,prop_protected_mar,protection_needed_mar
20,BES,"Bonaire, Sint Eustatius and Saba",323.694202,"MULTIPOLYGON (((-68.2196 12.1024, -68.22150000...",-68.30805,12.227559,https://live.staticflickr.com/4243/34769022732...,1.0,0.0,32708093-e1ab-4de5-a812-40d97b7eb1bb,...,2,"{""filter_Area_Country"": [""KNA"", ""CYM"", ""MDV"", ...",True,2288.150663,0.008161,45.1014,54.890439,100.0,99.6,47.585776
47,BLM,Saint-Barthélemy,24.707771,"MULTIPOLYGON (((-62.796 17.8971, -62.8099 17.8...",-62.829,17.895739,https://live.staticflickr.com/7902/46654798595...,1.0,0.0,25745b33-e5d4-4e43-8fb2-a796e4d261c4,...,0,"{""filter_Area_Country"": [""XCL"", ""CCK"", ""TKL"", ...",True,1986.575879,0.0,5.312427,94.687573,99.95,98.14,14.993366
105,GLP,Guadeloupe,1650.236866,"MULTIPOLYGON (((-61.6754 15.9524, -61.6801 15....",-61.651575,16.144102,https://live.staticflickr.com/4633/38515950705...,1.0,0.0,8478faa3-a366-4182-bffd-2b3d24c3220b,...,19,"{""filter_Area_Country"": [""KIR"", ""MTQ"", ""HKG"", ...",True,19163.302327,0.0,88.765162,11.234838,100.0,99.46,0.561081
156,MAF,Saint-Martin,56.170152,"MULTIPOLYGON (((-63.1394 18.0524, -63.1407 18....",-63.0452,18.087927,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,552ae9ce-ecc5-449b-a8b8-5c1d3fa7f142,...,0,"{""filter_Area_Country"": [""SXM"", ""NFK"", ""TUV"", ...",True,3449.908873,0.0,8.105873,91.894127,99.97,95.87,21.575249
174,MTQ,Martinique,1117.982471,"MULTIPOLYGON (((-60.8182 14.4683, -60.8162 14....",-60.913925,14.566617,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,9b3b7327-38c7-410c-88a2-713cff39609b,...,8,"{""filter_Area_Country"": [""TON"", ""FSM"", ""TCA"", ...",True,10189.737475,0.0,82.063211,17.936789,100.0,99.71,3.712245
197,PLW,Palau,487.475074,"MULTIPOLYGON (((134.592 7.4113, 134.59 7.3997,...",134.565239,7.499537,https://live.staticflickr.com/5321/9971805846_...,1.0,0.0,48925f15-05d0-41e1-8f77-43e67bcce72b,...,31,"{""filter_Area_Country"": [""SHN"", ""BRB"", ""ATG"", ...",True,5218.013152,5.326723,78.016975,16.656302,100.0,93.68,11.720121
215,SGS,South Georgia and the South Sandwich Islands,4096.678453,"MULTIPOLYGON (((-35.9672 -54.8164, -35.99 -54....",-36.93635,-54.21488,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,310c3875-b803-4470-ae78-821a0f4e3495,...,2,"{""filter_Area_Country"": [""LUX"", ""WSM"", ""XNC"", ...",True,0.0,79.086726,20.880691,0.032582,99.52,99.16,1.660595


In [52]:
prt.head()

Unnamed: 0,GID_0,protection_needed_mar
0,ABW,7.356267
1,AGO,7.608244
2,AIA,1.264193
3,ALB,32.743156
4,ARE,56.964522


In [53]:
last.head()

Unnamed: 0,GID_0,SPI_mar,prop_protected_mar
41,ALB,1.86,0.69
83,DZA,0.22,0.02
125,ATA,50.79,35.55
167,ATG,9.04,0.3
209,ARG,7.0,9.25


In [59]:
spi[spi.iso_ter1=='TMP']

Unnamed: 0,SOVEREIGN1,iso_ter1,year,nspecies,SPI_low,SPI_high,percentprotected_low,percentprotected_high,GID_0


In [None]:
# Create new dataframe with the merge of the gadm_centroid and the new values
last2= pd.merge(last, prt ,how='left', left_on=['GID_0'], right_on = ['GID_0'])
df.head(1)

In [48]:
spi[spi['GID_0']=='SGS']

Unnamed: 0,SOVEREIGN1,iso_ter1,year,nspecies,SPI_low,SPI_high,percentprotected_low,percentprotected_high,GID_0
6972,United Kingdom,SGS,1980,208,0.0,0.0,0.0,0.0,SGS
6973,United Kingdom,SGS,1981,208,0.0,0.0,0.0,0.0,SGS
6974,United Kingdom,SGS,1982,208,0.0,0.0,0.0,0.0,SGS
6975,United Kingdom,SGS,1983,208,0.0,0.0,0.0,0.0,SGS
6976,United Kingdom,SGS,1984,208,0.0,0.0,0.0,0.0,SGS
6977,United Kingdom,SGS,1985,208,0.0,0.0,0.0,0.0,SGS
6978,United Kingdom,SGS,1986,208,0.0,0.0,0.0,0.0,SGS
6979,United Kingdom,SGS,1987,208,0.0,0.0,0.0,0.0,SGS
6980,United Kingdom,SGS,1988,208,0.0,0.0,0.0,0.0,SGS
6981,United Kingdom,SGS,1989,208,0.0,0.0,0.0,0.0,SGS


#### REMEMBER % protection needed was fake, we need to add correct value when we get it from MOL

In [43]:
df.columns

Index(['GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y', 'jpg_url',
       'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine', 'Pop2020_EEZ', 'hm_no_mar', 'hm_mar',
       'hm_vh_mar', 'SPI_mar', 'prop_protected_mar', 'protection_needed_mar'],
      dtype='object')

### Calculate number of marine species and endemic species
This info comes from MOL

In [38]:
mar.head(1)

Unnamed: 0,speciesgroup,species,countryname,iso_ter1,percentprotected,NSPS,stewardship
0,marine fishes,Abalistes filamentosus,Australia,AUS,25-50%,75-100,22


In [44]:
## Get number of species (by taxa) per country
mar2 = mar[['speciesgroup', 'species','countryname', 'iso_ter1']]
mar_num = mar2.groupby(by = ['speciesgroup', 'countryname', 'iso_ter1']).count().reset_index()
mar_num.head(5)

Unnamed: 0,speciesgroup,countryname,iso_ter1,species
0,marine fishes,Albania,ALB,345
1,marine fishes,Algeria,DZA,470
2,marine fishes,Antarctica,ATA,106
3,marine fishes,Antigua and Barbuda,ATG,1493
4,marine fishes,Argentina,ARG,632


In [45]:
## Distinguish between taxa
mamm = mar_num[mar_num['speciesgroup']=='marine mammals']
fish = mar_num[mar_num['speciesgroup']=='marine fishes']

In [81]:
mamm.head(1)

Unnamed: 0,speciesgroup,countryname,iso_ter1,species
182,marine mammals,Albania,ALB,2


In [82]:
fish.head(1)

Unnamed: 0,speciesgroup,countryname,iso_ter1,species
0,marine fishes,Albania,ALB,345


In [46]:
## Include number of species in dataframe
df= pd.merge(df, mamm,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1','countryname'}).rename(columns={'species': 'mammals_mar'})
df= pd.merge(df, fish,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1','countryname'}).rename(columns={'species': 'fishes_mar'})
df.head(5)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,Marine,Pop2020_EEZ,hm_no_mar,hm_mar,hm_vh_mar,SPI_mar,prop_protected_mar,protection_needed_mar,mammals_mar,fishes_mar
0,ABW,Aruba,181.9384,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,True,1547.605201,0.0,78.30475,21.69525,0.0,0.0,99.8,19.0,1466.0
1,AFG,Afghanistan,643857.5,"POLYGON ((68.5385 31.7546, 68.58199999999999 3...",66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,...,False,,,,,,,,,
2,AGO,Angola,1247422.0,"MULTIPOLYGON (((11.8993 -17.2103, 11.8816 -17....",17.578022,-12.338271,https://live.staticflickr.com/3787/13698381215...,1.0,0.0,174ce788-4f67-4ae0-922f-d2ddac87f8c3,...,True,350682.727157,0.022313,77.847478,22.130209,,,,,
3,AIA,Anguilla,83.30331,"MULTIPOLYGON (((-63.0685 18.2368, -63.054 18.2...",-63.054023,18.214919,https://live.staticflickr.com/8063/8194570372_...,1.0,0.0,9f5f24d8-8b21-49a8-8f55-90b47cf63e7b,...,True,1693.719824,0.0,94.367741,5.632259,4.68,0.28,99.52,25.0,1494.0
4,ALA,Åland,1506.261,"MULTIPOLYGON (((20.1734 60.2873, 20.1828 60.28...",19.9677,60.241295,https://p1.pxfuel.com/preview/294/670/561/alan...,1.0,0.0,2b45351b-a335-490e-914e-7748d4f41f66,...,False,,,,,,,,,


In [47]:
## Calculate number of endemic species per country: mammals
mamm_e = mar[(mar['speciesgroup']=='marine mammals')&(mar['stewardship']==1)]
mamm_e = mamm_e.groupby(['speciesgroup','iso_ter1']).sum()
mamm_e = mamm_e.reset_index().rename(columns={'stewardship':'endemic_mammals_mar'})
mamm_e.head(5)

Unnamed: 0,speciesgroup,iso_ter1,endemic_mammals_mar
0,marine mammals,AUS,1
1,marine mammals,ECU,2
2,marine mammals,NZL,1
3,marine mammals,USA,1


In [48]:
## Calculate number of endemic species per country: fishes
fish_e = mar[(mar['speciesgroup']=='marine fishes')&(mar['stewardship']==1)]
fish_e = fish_e.groupby(['speciesgroup','iso_ter1']).sum()
fish_e = fish_e.reset_index().rename(columns={'stewardship':'endemic_fishes_mar'})
fish_e.head(5)

Unnamed: 0,speciesgroup,iso_ter1,endemic_fishes_mar
0,marine fishes,ARG,2
1,marine fishes,ATA,5
2,marine fishes,ATF,3
3,marine fishes,AUS,379
4,marine fishes,BHS,2


In [49]:
## Merge endemic data in dataframe
df= pd.merge(df, mamm_e,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1'})
df= pd.merge(df, fish_e,  how='left', left_on=['GID_0'], right_on = ['iso_ter1']).drop(columns = {'speciesgroup','iso_ter1'})
df.head(5)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,hm_no_mar,hm_mar,hm_vh_mar,SPI_mar,prop_protected_mar,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar
0,ABW,Aruba,181.9384,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,0.0,78.30475,21.69525,0.0,0.0,99.8,19.0,1466.0,,
1,AFG,Afghanistan,643857.5,"POLYGON ((68.5385 31.7546, 68.58199999999999 3...",66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,...,,,,,,,,,,
2,AGO,Angola,1247422.0,"MULTIPOLYGON (((11.8993 -17.2103, 11.8816 -17....",17.578022,-12.338271,https://live.staticflickr.com/3787/13698381215...,1.0,0.0,174ce788-4f67-4ae0-922f-d2ddac87f8c3,...,0.022313,77.847478,22.130209,,,,,,,
3,AIA,Anguilla,83.30331,"MULTIPOLYGON (((-63.0685 18.2368, -63.054 18.2...",-63.054023,18.214919,https://live.staticflickr.com/8063/8194570372_...,1.0,0.0,9f5f24d8-8b21-49a8-8f55-90b47cf63e7b,...,0.0,94.367741,5.632259,4.68,0.28,99.52,25.0,1494.0,,
4,ALA,Åland,1506.261,"MULTIPOLYGON (((20.1734 60.2873, 20.1828 60.28...",19.9677,60.241295,https://p1.pxfuel.com/preview/294/670/561/alan...,1.0,0.0,2b45351b-a335-490e-914e-7748d4f41f66,...,,,,,,,,,,


In [50]:
sum(df['endemic_fishes_mar'].notnull())

45

In [51]:
df.columns

Index(['GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y', 'jpg_url',
       'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine', 'Pop2020_EEZ', 'hm_no_mar', 'hm_mar',
       'hm_vh_mar', 'SPI_mar', 'prop_protected_mar', 'protection_needed_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar'],
      dtype='object')

In [52]:
# Calculate total number of species and total number of endemic marine
df['nspecies_mar']= df['mammals_mar']+df['fishes_mar']
df['total_endemic_mar']= df['endemic_mammals_mar']+df['endemic_fishes_mar']
df.head(5)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,hm_vh_mar,SPI_mar,prop_protected_mar,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar
0,ABW,Aruba,181.9384,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,21.69525,0.0,0.0,99.8,19.0,1466.0,,,1485.0,
1,AFG,Afghanistan,643857.5,"POLYGON ((68.5385 31.7546, 68.58199999999999 3...",66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,...,,,,,,,,,,
2,AGO,Angola,1247422.0,"MULTIPOLYGON (((11.8993 -17.2103, 11.8816 -17....",17.578022,-12.338271,https://live.staticflickr.com/3787/13698381215...,1.0,0.0,174ce788-4f67-4ae0-922f-d2ddac87f8c3,...,22.130209,,,,,,,,,
3,AIA,Anguilla,83.30331,"MULTIPOLYGON (((-63.0685 18.2368, -63.054 18.2...",-63.054023,18.214919,https://live.staticflickr.com/8063/8194570372_...,1.0,0.0,9f5f24d8-8b21-49a8-8f55-90b47cf63e7b,...,5.632259,4.68,0.28,99.52,25.0,1494.0,,,1519.0,
4,ALA,Åland,1506.261,"MULTIPOLYGON (((20.1734 60.2873, 20.1828 60.28...",19.9677,60.241295,https://p1.pxfuel.com/preview/294/670/561/alan...,1.0,0.0,2b45351b-a335-490e-914e-7748d4f41f66,...,,,,,,,,,,


In [53]:
df.columns

Index(['GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y', 'jpg_url',
       'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine', 'Pop2020_EEZ', 'hm_no_mar', 'hm_mar',
       'hm_vh_mar', 'SPI_mar', 'prop_protected_mar', 'protection_needed_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar'],
      dtype='object')

In [55]:
# How many countries have marine data?
len(df[~df['nspecies_mar'].isnull()]) # 181

181

In [56]:
# How many countries do not have marine data?
len(df[df['nspecies_mar'].isnull()]) # 73

73

In [57]:
# How many countries do not have eez?
len(df[df['Marine']=='False'])

52

In [58]:
# How many countries have eez without species?
len(df[(df['Marine']== 'True') & (df['nspecies_mar'].isnull())])

21

In [162]:
l1 = df[(df['Marine']== 'True') & (df['nspecies_mar'].isnull())].GID_0 # countries that have eez without species

In [160]:
l1

2                 Angola
16            Azerbaijan
19                 Benin
24               Bahrain
34     Wallis and Futuna
64         Côte d'Ivoire
75      Christmas Island
80              Djibouti
87               Eritrea
102                Ghana
103            Gibraltar
134               Jordan
136           Kazakhstan
147                Libya
168           Montenegro
189                Nauru
204            Palestina
222              Somalia
234                Syria
237                 Togo
241         Turkmenistan
Name: NAME_0, dtype: object

In [60]:
# How many countries have eez without spi?
len(df[(df['Marine']== 'True') & (df['SPI_mar'].isnull())])

20

In [61]:
l2 = df[(df['Marine']== 'True') & (df['SPI_mar'].isnull())].NAME_0 # countries that have eez without spi

In [64]:
list(set(l1).difference(l2)) # Jordan has spi data but not species data

['Jordan']

### Add Area of eez

In [65]:
eez.head()

Unnamed: 0,GID_0,Area_EEZ,ORIG_FID,geometry
0,ABW,29970.299588,1,POINT (-69.67341 13.74138)
1,AGO,495859.762742,2,POINT (11.03537 -11.70110)
2,AIA,90157.964205,3,POINT (-62.54329 20.01803)
3,ALB,12165.548773,4,POINT (19.11300 40.92708)
4,ARE,57838.146798,5,POINT (54.03058 25.05891)


In [66]:
len(eez)

202

In [67]:
eez2 = eez[['GID_0', 'Area_EEZ']]
eez2.head()

Unnamed: 0,GID_0,Area_EEZ
0,ABW,29970.299588
1,AGO,495859.762742
2,AIA,90157.964205
3,ALB,12165.548773
4,ARE,57838.146798


In [68]:
df = pd.merge(left = df, right = eez2, left_on = "GID_0", right_on = "GID_0", how = "left")
df.columns

Index(['GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y', 'jpg_url',
       'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine', 'Pop2020_EEZ', 'hm_no_mar', 'hm_mar',
       'hm_vh_mar', 'SPI_mar', 'prop_protected_mar', 'protection_needed_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar', 'Area_EEZ'],
      dtype='object')

In [69]:
df.head(2)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,SPI_mar,prop_protected_mar,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ
0,ABW,Aruba,181.938403,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,0.0,0.0,99.8,19.0,1466.0,,,1485.0,,29970.299588
1,AFG,Afghanistan,643857.477165,"POLYGON ((68.5385 31.7546, 68.58199999999999 3...",66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,...,,,,,,,,,,


### Add fake global SPI marine

In [70]:
df['Global_SPI_mar']= np.where(df['Marine']=='True', '45', 'NaN')
df.head(2)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,prop_protected_mar,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar
0,ABW,Aruba,181.938403,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,0.0,99.8,19.0,1466.0,,,1485.0,,29970.299588,45.0
1,AFG,Afghanistan,643857.477165,"POLYGON ((68.5385 31.7546, 68.58199999999999 3...",66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,...,,,,,,,,,,


---------------------------------------------------------------------------------------------------------------------------------------------
## Process to create the challenges tab (create array with similar filters)
### Create matrix to identify countries with shared stewardship to create the stewardship filter
This code is more efficient than that described in the notebook "shared_stewardship", which was used during the first iteration of the NRC

In [63]:
mar.head()

Unnamed: 0,speciesgroup,species,countryname,iso_ter1,percentprotected,NSPS,stewardship
0,marine fishes,Abalistes filamentosus,Australia,AUS,25-50%,75-100,22
1,marine fishes,Abalistes filamentosus,Australia,CCK,0-25%,0-25,22
2,marine fishes,Abalistes filamentosus,Australia,NFK,75-100%,75-100,22
3,marine fishes,Abalistes filamentosus,East Timor,TLS,0-25%,0-25,22
4,marine fishes,Abalistes filamentosus,Fiji,FJI,0-25%,50-75,22


In [71]:
# Create a copy with only the species name and the iso3
mar2 = mar[['iso_ter1','species']].copy()
mar2.head(5)

Unnamed: 0,iso_ter1,species
0,AUS,Abalistes filamentosus
1,CCK,Abalistes filamentosus
2,NFK,Abalistes filamentosus
3,TLS,Abalistes filamentosus
4,FJI,Abalistes filamentosus


In [165]:
%%time
# Create a matrix that has, for each country, the number of shared species with each of the other countries
m = mar2.merge(mar2, on='species') # perform a self-merge based on the species
mat = pd.crosstab(m.iso_ter1_x, m.iso_ter1_y) # perform crosstabulation operation
mat.reset_index(inplace=True)
mat= mat.rename(columns = {'iso_ter1_x':'index'})
mat.head(5)

CPU times: user 2.89 s, sys: 1.2 s, total: 4.08 s
Wall time: 4.33 s


iso_ter1_y,index,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
0,ABW,1485,1327,61,133,187,257,3,329,1328,...,1435,1380,1485,1280,1329,300,325,279,116,441
1,AIA,1327,1519,58,118,204,300,4,339,1503,...,1462,1374,1492,1441,1498,273,352,319,111,460
2,ALB,61,58,347,31,38,24,0,49,58,...,78,59,61,54,57,36,28,27,23,99
3,ARE,133,118,31,1142,50,327,0,905,121,...,485,132,133,103,123,823,522,435,1023,816
4,ARG,187,204,38,50,671,136,32,308,203,...,393,217,248,182,205,93,175,162,33,386


In [73]:
mat.shape 

(182, 183)

In [51]:
# Save local copy
# mat.to_csv('/Users/sofia/Documents/HE_Data/NRC/NRC_Marine/stewardship_matrix.csv',index=False)

### Get shared stewardship countries
Using the stewardship matrix. 

In [74]:
mat.columns.values

array(['index', 'ABW', 'AIA', 'ALB', 'ARE', 'ARG', 'ASM', 'ATA', 'ATF',
       'ATG', 'AUS', 'BEL', 'BES', 'BGD', 'BGR', 'BHS', 'BIH', 'BLM',
       'BLZ', 'BMU', 'BRA', 'BRB', 'BRN', 'CAN', 'CCK', 'CHL', 'CHN',
       'CMR', 'COD', 'COG', 'COK', 'COL', 'COM', 'CPV', 'CRI', 'CUB',
       'CUW', 'CYM', 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU',
       'EGY', 'ESH', 'ESP', 'EST', 'FIN', 'FJI', 'FLK', 'FRA', 'FRO',
       'FSM', 'GAB', 'GBR', 'GEO', 'GGY', 'GIN', 'GLP', 'GMB', 'GNB',
       'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUF', 'GUM', 'GUY', 'HMD',
       'HND', 'HRV', 'HTI', 'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL',
       'ISR', 'ITA', 'JAM', 'JEY', 'JOR', 'JPN', 'KEN', 'KHM', 'KIR',
       'KNA', 'KOR', 'KWT', 'LBN', 'LBR', 'LCA', 'LKA', 'LTU', 'LVA',
       'MAF', 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR',
       'MNP', 'MOZ', 'MRT', 'MSR', 'MTQ', 'MUS', 'MYS', 'MYT', 'NAM',
       'NCL', 'NFK', 'NGA', 'NIC', 'NIU', 'NLD', 'NOR', 'NZL', 'OMN',
       'PAK', 'PAN

In [67]:
mat.columns.values[1:183]

array(['ABW', 'AIA', 'ALB', 'ARE', 'ARG', 'ASM', 'ATA', 'ATF', 'ATG',
       'AUS', 'BEL', 'BES', 'BGD', 'BGR', 'BHS', 'BIH', 'BLM', 'BLZ',
       'BMU', 'BRA', 'BRB', 'BRN', 'CAN', 'CCK', 'CHL', 'CHN', 'CMR',
       'COD', 'COG', 'COK', 'COL', 'COM', 'CPV', 'CRI', 'CUB', 'CUW',
       'CYM', 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY',
       'ESH', 'ESP', 'EST', 'FIN', 'FJI', 'FLK', 'FRA', 'FRO', 'FSM',
       'GAB', 'GBR', 'GEO', 'GGY', 'GIN', 'GLP', 'GMB', 'GNB', 'GNQ',
       'GRC', 'GRD', 'GRL', 'GTM', 'GUF', 'GUM', 'GUY', 'HMD', 'HND',
       'HRV', 'HTI', 'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR',
       'ITA', 'JAM', 'JEY', 'JOR', 'JPN', 'KEN', 'KHM', 'KIR', 'KNA',
       'KOR', 'KWT', 'LBN', 'LBR', 'LCA', 'LKA', 'LTU', 'LVA', 'MAF',
       'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR', 'MNP',
       'MOZ', 'MRT', 'MSR', 'MTQ', 'MUS', 'MYS', 'MYT', 'NAM', 'NCL',
       'NFK', 'NGA', 'NIC', 'NIU', 'NLD', 'NOR', 'NZL', 'OMN', 'PAK',
       'PAN', 'PCN',

In [166]:
# Get only the values (skip index)
df_mat = mat[mat.columns.values[1:183]]
df_mat.head(5)

iso_ter1_y,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,AUS,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
0,1485,1327,61,133,187,257,3,329,1328,400,...,1435,1380,1485,1280,1329,300,325,279,116,441
1,1327,1519,58,118,204,300,4,339,1503,425,...,1462,1374,1492,1441,1498,273,352,319,111,460
2,61,58,347,31,38,24,0,49,58,48,...,78,59,61,54,57,36,28,27,23,99
3,133,118,31,1142,50,327,0,905,121,843,...,485,132,133,103,123,823,522,435,1023,816
4,187,204,38,50,671,136,32,308,203,396,...,393,217,248,182,205,93,175,162,33,386


In [167]:
# set index using countries
df_mat = df_mat.set_index(mat['index'].values) 
df_mat.head(5)

iso_ter1_y,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,AUS,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
ABW,1485,1327,61,133,187,257,3,329,1328,400,...,1435,1380,1485,1280,1329,300,325,279,116,441
AIA,1327,1519,58,118,204,300,4,339,1503,425,...,1462,1374,1492,1441,1498,273,352,319,111,460
ALB,61,58,347,31,38,24,0,49,58,48,...,78,59,61,54,57,36,28,27,23,99
ARE,133,118,31,1142,50,327,0,905,121,843,...,485,132,133,103,123,823,522,435,1023,816
ARG,187,204,38,50,671,136,32,308,203,396,...,393,217,248,182,205,93,175,162,33,386


In [168]:
df_mat.columns = mat['index'].values
df_mat.head(5)

Unnamed: 0,ABW,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,AUS,...,USA,VCT,VEN,VGB,VIR,VNM,VUT,WSM,YEM,ZAF
ABW,1485,1327,61,133,187,257,3,329,1328,400,...,1435,1380,1485,1280,1329,300,325,279,116,441
AIA,1327,1519,58,118,204,300,4,339,1503,425,...,1462,1374,1492,1441,1498,273,352,319,111,460
ALB,61,58,347,31,38,24,0,49,58,48,...,78,59,61,54,57,36,28,27,23,99
ARE,133,118,31,1142,50,327,0,905,121,843,...,485,132,133,103,123,823,522,435,1023,816
ARG,187,204,38,50,671,136,32,308,203,396,...,393,217,248,182,205,93,175,162,33,386


In [169]:
# Now it has the same shape
df_mat.shape

(182, 182)

In [170]:
# Create stewardship dictionary: for each country identify the 10 countries that share more species with it
df_sort = df_mat.copy()
steward_dict = dict.fromkeys(df_sort.columns.values)
for key in steward_dict:
    df_sort = df_sort.sort_values(by = [key], ascending=False)
    sub = df_sort[key][1:11] # to skip the same country
    vals = sub.index.values.tolist()
    vals.append(key)
    
    steward_dict[key] = json.dumps(vals)

In [171]:
# Convert stewardship dictionary into dataframe
steward_df = pd.DataFrame(steward_dict.items(), columns = ["GID_0","filter_steward"])
steward_df.head(5)

Unnamed: 0,GID_0,filter_steward
0,ABW,"[""VEN"", ""COL"", ""CUW"", ""DOM"", ""BES"", ""USA"", ""BH..."
1,AIA,"[""DOM"", ""BLM"", ""GLP"", ""PRI"", ""ATG"", ""BES"", ""MA..."
2,ALB,"[""GRC"", ""ITA"", ""ESP"", ""HRV"", ""DZA"", ""FRA"", ""TU..."
3,ARE,"[""IRN"", ""SAU"", ""OMN"", ""PAK"", ""SYC"", ""YEM"", ""MD..."
4,ARG,"[""CHL"", ""BRA"", ""NZL"", ""AUS"", ""USA"", ""ZAF"", ""NF..."


In [172]:
steward_df.shape

(182, 2)

### Get nearest EEZ
Similar to what was done in the Terrestrial_NRC notebook for the countries' centroids: the idea here is to find, for each of the displayed EEZ, the closest ones using the distance between their centroids. 

Note that the original EEZ layer often has several EEZ associated with the same GID_0 (being GID_0 the iso_ter1 or, when this is missing, the iso_sov1). This leads to problems when trying to match the EEZ and the country polygons currently displayed in the NRC. For this reason, using ArcGIS Pro, we first dissolved the EEZ layer according to the GID_0 field, so that all polygons with same GID_0 code were considered as 1. Then, the centroids were calculated using the "Feature to Point tool". The centroids were not forced to fall within the feature since most are made of several polygons and forcing the centroid to fall just in one of them can lead to weird situation such as the centroid of the Spanish EEZ falling in the Canary Islands. For this reason, the "inside" option of the tool was disabled and the resulting centroids are located at the center of all the polygons conforming each of those EEZ, even if thar means that they fall, somethimes, outside the corresponding EEZ (in the ocean or even in a different EEZ)

In [82]:
from math import radians
import pandas as pd
import numpy as np
from sklearn.metrics import DistanceMetric

In [176]:
eez.head()

Unnamed: 0,GID_0,Area_EEZ,ORIG_FID,geometry
0,ABW,29970.299588,1,POINT (-69.67341 13.74138)
1,AGO,495859.762742,2,POINT (11.03537 -11.70110)
2,AIA,90157.964205,3,POINT (-62.54329 20.01803)
3,ALB,12165.548773,4,POINT (19.11300 40.92708)
4,ARE,57838.146798,5,POINT (54.03058 25.05891)


In [177]:
len(eez)

202

In [178]:
eez['Longitude'] = eez.geometry.x
eez['Latitude'] = eez.geometry.y

In [179]:
eez.head()

Unnamed: 0,GID_0,Area_EEZ,ORIG_FID,geometry,Longitude,Latitude
0,ABW,29970.299588,1,POINT (-69.67341 13.74138),-69.673412,13.74138
1,AGO,495859.762742,2,POINT (11.03537 -11.70110),11.035371,-11.701098
2,AIA,90157.964205,3,POINT (-62.54329 20.01803),-62.543285,20.01803
3,ALB,12165.548773,4,POINT (19.11300 40.92708),19.113001,40.927084
4,ARE,57838.146798,5,POINT (54.03058 25.05891),54.03058,25.058911


In [180]:
df_coord = pd.DataFrame(data = eez[['GID_0']])
len(df_coord)

202

In [181]:
df_coord.head()

Unnamed: 0,GID_0
0,ABW
1,AGO
2,AIA
3,ALB
4,ARE


In [182]:
# Get the coordinates of each EEZ centroid
y_list = []
x_list = []
for index, i in eez.iterrows():
    y_list.append(i['Latitude'])
    x_list.append(i['Longitude'])
    
df_coord['x'] = x_list
df_coord['y'] = y_list
df_coord.head(5)

Unnamed: 0,GID_0,x,y
0,ABW,-69.673412,13.74138
1,AGO,11.035371,-11.701098
2,AIA,-62.543285,20.01803
3,ALB,19.113001,40.927084
4,ARE,54.03058,25.058911


In [183]:
# Convert the coordinates to radians
df_coord['lat'] = np.radians(df_coord['y'])
df_coord['lon'] = np.radians(df_coord['x'])
df_coord.head(5)

Unnamed: 0,GID_0,x,y,lat,lon
0,ABW,-69.673412,13.74138,0.239832,-1.21603
1,AGO,11.035371,-11.701098,-0.204223,0.192604
2,AIA,-62.543285,20.01803,0.349381,-1.091586
3,ALB,19.113001,40.927084,0.714312,0.333585
4,ARE,54.03058,25.058911,0.437361,0.943012


In [184]:
# Get distance metric and use it to calculate the distance between coordinates of each country
dist = DistanceMetric.get_metric('haversine')
dist_df = pd.DataFrame(dist.pairwise(df_coord[['lat','lon']].to_numpy())*6373,  columns=df_coord.GID_0.unique(), index=df_coord.GID_0.unique())
dist_df.head(5)

Unnamed: 0,ABW,AGO,AIA,ALB,ARE,ARG,ASM,ATA,ATF,ATG,...,VCT,VEN,VGB,VIR,VNM,VUT,WLF,WSM,YEM,ZAF
ABW,0.0,9337.734978,1030.808723,8914.511641,12547.816812,6807.636739,11363.979416,10402.096103,14419.463293,1119.079832,...,865.55441,419.428711,921.11543,641.667561,17196.124674,13781.883918,11735.482146,11743.613019,12950.292114,11709.958023
AGO,9337.734978,0.0,8788.020528,5912.482037,6200.347364,7791.008092,17178.766459,6778.021008,5947.82512,8548.075782,...,8486.363636,8918.319824,8996.091257,8943.236001,11097.523707,16020.016864,17264.212124,17209.599342,5289.556005,3447.906848
AIA,1030.808723,8788.020528,0.0,7885.881036,11527.545191,7464.512951,12230.8902,10872.544121,14304.121273,251.553043,...,768.373044,880.534088,208.905758,396.759468,16376.658731,14630.528192,12582.531107,12595.968453,11985.848882,11508.89188
ALB,8914.511641,5912.482037,7885.881036,0.0,3670.470318,12645.491889,16907.327294,12690.409407,10176.126307,7809.772415,...,8290.467684,8669.939067,8022.311156,8273.171738,9138.47689,16035.353774,16685.064151,16738.656936,4467.695397,8856.152641
ARE,12547.816812,6200.347364,11527.545191,3670.470318,0.0,13972.846658,15354.787082,11798.337743,7500.825686,11431.579298,...,11862.212594,12270.933759,11674.109276,11906.979081,5928.339267,13212.30832,14938.473077,14961.932367,1369.32308,7497.564528


In [185]:
dist_df.shape

(202, 202)

In [186]:
# Sort the table for each point. 0 values correspond to the same country, so take the 1:11 and add the row names to a dictionary to have the names of the 10 closest EEZ. 
dist_df_sort = dist_df.copy()
neighbour_dict = dict.fromkeys(dist_df_sort.columns.values)
for key in neighbour_dict:
    dist_df_sort = dist_df_sort.sort_values(by = [key]) # sort countries from closest to farthest
    sub = dist_df_sort[key][1:11] # keep the 10 closest ones but skip the same country (0 distance)
    vals = sub.index.values.tolist() # take the values
    neighbour_dict[key] = json.dumps(vals) # include them in dictionary

In [187]:
# Convert neighboring dictionary into dataframe
neigh_df = pd.DataFrame(neighbour_dict.items(), columns = ["GID_0","filter_neigh"])
neigh_df.head(5)

Unnamed: 0,GID_0,filter_neigh
0,ABW,"[""CUW"", ""BES"", ""VEN"", ""PRI"", ""DOM"", ""HTI"", ""VI..."
1,AGO,"[""COD"", ""COG"", ""GAB"", ""GNQ"", ""NAM"", ""STP"", ""CM..."
2,AIA,"[""VGB"", ""MAF"", ""BLM"", ""SXM"", ""ATG"", ""KNA"", ""VI..."
3,ALB,"[""MNE"", ""BIH"", ""HRV"", ""ITA"", ""GRC"", ""SVN"", ""ML..."
4,ARE,"[""QAT"", ""BHR"", ""IRN"", ""KWT"", ""IRQ"", ""OMN"", ""PA..."


In [188]:
neigh_df[neigh_df['GID_0']=='USA'].values

array([['USA',
        '["CAN", "MEX", "BLZ", "CUB", "GTM", "BHS", "CYM", "HND", "UMI", "SLV"]']],
      dtype=object)

In [189]:
neigh_df.shape

(202, 2)

In [190]:
# See in which countries the steward and neighbour dictionaries differ
list1= list(neigh_df['GID_0'])
list2=list(steward_df['GID_0'])
list3 = list(set(list1).difference(list2))
list3

['CXR',
 'TGO',
 'TKM',
 'PSE',
 'KAZ',
 'DJI',
 'GHA',
 'BEN',
 'ERI',
 'AGO',
 'LBY',
 'WLF',
 'SYR',
 'BHR',
 'NRU',
 'GIB',
 'CIV',
 'SOM',
 'MNE',
 'AZE']

In [191]:
# Merge these two dataframes together
df_dict = pd.merge(left = neigh_df, right = steward_df, left_on = "GID_0", right_on = "GID_0", how = "left")
df_dict.head(5)

Unnamed: 0,GID_0,filter_neigh,filter_steward
0,ABW,"[""CUW"", ""BES"", ""VEN"", ""PRI"", ""DOM"", ""HTI"", ""VI...","[""VEN"", ""COL"", ""CUW"", ""DOM"", ""BES"", ""USA"", ""BH..."
1,AGO,"[""COD"", ""COG"", ""GAB"", ""GNQ"", ""NAM"", ""STP"", ""CM...",
2,AIA,"[""VGB"", ""MAF"", ""BLM"", ""SXM"", ""ATG"", ""KNA"", ""VI...","[""DOM"", ""BLM"", ""GLP"", ""PRI"", ""ATG"", ""BES"", ""MA..."
3,ALB,"[""MNE"", ""BIH"", ""HRV"", ""ITA"", ""GRC"", ""SVN"", ""ML...","[""GRC"", ""ITA"", ""ESP"", ""HRV"", ""DZA"", ""FRA"", ""TU..."
4,ARE,"[""QAT"", ""BHR"", ""IRN"", ""KWT"", ""IRQ"", ""OMN"", ""PA...","[""IRN"", ""SAU"", ""OMN"", ""PAK"", ""SYC"", ""YEM"", ""MD..."


In [192]:
df_dict.tail()

Unnamed: 0,GID_0,filter_neigh,filter_steward
197,VUT,"[""NCL"", ""SLB"", ""NFK"", ""NRU"", ""TON"", ""WSM"", ""WL...","[""AUS"", ""PNG"", ""FJI"", ""PHL"", ""NCL"", ""MHL"", ""ID..."
198,WLF,"[""WSM"", ""ASM"", ""TKL"", ""NIU"", ""TON"", ""COK"", ""VU...",
199,WSM,"[""WLF"", ""ASM"", ""TKL"", ""NIU"", ""TON"", ""COK"", ""VU...","[""UMI"", ""AUS"", ""COK"", ""KIR"", ""USA"", ""NCL"", ""FJ..."
200,YEM,"[""SOM"", ""DJI"", ""OMN"", ""ERI"", ""ARE"", ""QAT"", ""BH...","[""YEM"", ""IRN"", ""OMN"", ""PAK"", ""SYC"", ""MDG"", ""TZ..."
201,ZAF,"[""MOZ"", ""NAM"", ""MDG"", ""ATF"", ""REU"", ""MYT"", ""CO...","[""MOZ"", ""ATF"", ""MDG"", ""TZA"", ""SYC"", ""AUS"", ""MU..."


In [193]:
df_dict.shape

(202, 3)

### Get below and above countries for each field in challenges

In [194]:
df.columns

Index(['GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y', 'jpg_url',
       'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine', 'Pop2020_EEZ', 'hm_no_mar', 'hm_mar',
       'hm_vh_mar', 'SPI_mar', 'prop_protected_mar', 'protection_needed_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar', 'Area_EEZ',
       'Global_SPI_mar'],
      dtype='object')

In [195]:
df.head()

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,prop_protected_mar,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar
0,ABW,Aruba,181.9384,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,0.0,99.8,19.0,1466.0,,,1485.0,,29970.299588,45.0
1,AFG,Afghanistan,643857.5,"POLYGON ((68.5385 31.7546, 68.58199999999999 3...",66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,...,,,,,,,,,,
2,AGO,Angola,1247422.0,"MULTIPOLYGON (((11.8993 -17.2103, 11.8816 -17....",17.578022,-12.338271,https://live.staticflickr.com/3787/13698381215...,1.0,0.0,174ce788-4f67-4ae0-922f-d2ddac87f8c3,...,,,,,,,,,495859.762742,45.0
3,AIA,Anguilla,83.30331,"MULTIPOLYGON (((-63.0685 18.2368, -63.054 18.2...",-63.054023,18.214919,https://live.staticflickr.com/8063/8194570372_...,1.0,0.0,9f5f24d8-8b21-49a8-8f55-90b47cf63e7b,...,0.28,99.52,25.0,1494.0,,,1519.0,,90157.964205,45.0
4,ALA,Åland,1506.261,"MULTIPOLYGON (((20.1734 60.2873, 20.1828 60.28...",19.9677,60.241295,https://p1.pxfuel.com/preview/294/670/561/alan...,1.0,0.0,2b45351b-a335-490e-914e-7748d4f41f66,...,,,,,,,,,,


In [196]:
fields = ['GID_0', 'NAME_0', 'Area_EEZ', 'Pop2020_EEZ', 'prop_protected_mar', 'hm_vh_mar', 'protection_needed_mar', 'total_endemic_mar', 'nspecies_mar', 'SPI_mar', "continent"]

##### correct % of protection needed still pending from MOL

In [197]:
df_fields = df[fields].copy()
df_fields.head(2)

Unnamed: 0,GID_0,NAME_0,Area_EEZ,Pop2020_EEZ,prop_protected_mar,hm_vh_mar,protection_needed_mar,total_endemic_mar,nspecies_mar,SPI_mar,continent
0,ABW,Aruba,29970.299588,1547.605201,0.0,21.69525,99.8,,1485.0,0.0,North America
1,AFG,Afghanistan,,,,,,,,,Asia


In [198]:
filter_fields = ['Area_EEZ','Pop2020_EEZ', 'hm_vh_mar', 'prop_protected_mar', 'protection_needed_mar', 'total_endemic_mar','nspecies_mar', 'SPI_mar']

In [199]:
# Get dictionaries for the other fields
df_sort = df.copy()
nber_index = 5
max_index = len(df_sort.index) - 1

new_fields = []
for field in filter_fields:
    df_sort = df_sort.sort_values(by = [field]).reset_index(drop=True)
    collapse_list = []
    for index, i in df_sort.iterrows():
        country_gid = df_sort.GID_0[index]
        above_index = index - nber_index
        below_index = index + nber_index + 1
        if above_index < 0:
            below_index = nber_index * 2 
            above_index = 0
        if below_index > max_index:
            above_index = max_index - (nber_index * 2)
            below_index = max_index

        sub_pd = df_sort.GID_0[above_index:below_index]
        val_list = sub_pd.values.tolist()
        collapse_list.append(json.dumps(val_list))

        #val_list_rem = val_list.remove(country_gid)
    filter_field = f"filter_{field}"
    new_fields.append(filter_field)
    df_sort[filter_field] = collapse_list

In [200]:
df_sort.head(2)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,Area_EEZ,Global_SPI_mar,filter_Area_EEZ,filter_Pop2020_EEZ,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar
0,SLE,Sierra Leone,72601.119939,"MULTIPOLYGON (((-13.3035 9.039899999999999, -1...",-11.781517,8.565445,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,445d666a-9836-4775-8700-bd10ce82ac8c,...,160578.587795,45,"[""GUF"", ""SUR"", ""UKR"", ""GUY"", ""SWE"", ""SLE"", ""UR...","[""ARG"", ""DEU"", ""TZA"", ""YEM"", ""SLB"", ""SLE"", ""CU...","[""ERI"", ""GRD"", ""SLV"", ""GHA"", ""HND"", ""SLE"", ""VI...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""GUF"", ""EGY"", ""MRT"", ""GIN"", ""ESH"", ""SLE"", ""RU...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB..."
1,TTO,Trinidad and Tobago,5159.134673,"MULTIPOLYGON (((-61.4974 10.6394, -61.4985 10....",-61.296458,10.423903,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,190d0b55-9c6e-494c-a2f7-9d24eb931410,...,76574.299151,45,"[""DEU"", ""ARE"", ""TKM"", ""NLD"", ""MYT"", ""TTO"", ""TL...","[""PSE"", ""GNB"", ""GEO"", ""PYF"", ""ISL"", ""TTO"", ""TL...","[""SLE"", ""VIR"", ""MEX"", ""COG"", ""JAM"", ""TTO"", ""SD...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""NIU"", ""MAR"", ""SHN"", ""GBR"", ""CPV"", ""TTO"", ""BM...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB..."


In [201]:
len(df_sort)

254

In [202]:
df_sort = df_sort[df_sort['Marine']=='True']
len(df_sort)

202

### Get countries from same continent

In [203]:
df_sort.continent.unique()

array(['Africa', 'North America', 'Asia', 'Europe', 'South America',
       'Oceania', 'Antarctica', nan], dtype=object)

In [204]:
continent_dict = {}

In [205]:
for continent in df_sort.continent.unique():
    countries = df_sort.loc[df_sort['continent'] == continent].GID_0
    continent_dict[continent] = countries.tolist()

In [206]:
# Create same continent dictionary
same_continent_dict = dict.fromkeys(df_sort.GID_0.unique())
for key in same_continent_dict:
    continent_name = df_sort.loc[df_sort['GID_0'] == key, "continent"].to_list()[0]
    vals = continent_dict[continent_name]
    same_continent_dict[key] = json.dumps(vals)

In [207]:
# Convert to dataframe
continent_df = pd.DataFrame(same_continent_dict.items(), columns = ["GID_0","filter_continent"])
continent_df.head()

Unnamed: 0,GID_0,filter_continent
0,SLE,"[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP..."
1,TTO,"[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC..."
2,PRK,"[""PRK"", ""IND"", ""IRQ"", ""QAT"", ""SGP"", ""YEM"", ""BR..."
3,BMU,"[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC..."
4,NGA,"[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP..."


In [208]:
continent_df.shape

(202, 2)

In [209]:
# Merge the 3 filters we have so far into one dataframe
df_dict = pd.merge(left = continent_df, 
                   right = df_dict, left_on = "GID_0", right_on = "GID_0", how = "left")

In [210]:
df_dict.shape

(202, 4)

In [216]:
df_dict.tail(21)

Unnamed: 0,GID_0,filter_continent,filter_neigh,filter_steward
181,MYT,"[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP...","[""COM"", ""TZA"", ""MDG"", ""SYC"", ""MOZ"", ""KEN"", ""RE...","[""MDG"", ""SYC"", ""COM"", ""MUS"", ""MOZ"", ""TZA"", ""MD..."
182,WLF,"[""PYF"", ""TKL"", ""WSM"", ""VUT"", ""FSM"", ""TON"", ""CC...","[""WSM"", ""ASM"", ""TKL"", ""NIU"", ""TON"", ""COK"", ""VU...",
183,CXR,"[""PRK"", ""IND"", ""IRQ"", ""QAT"", ""SGP"", ""YEM"", ""BR...","[""CCK"", ""FJI"", ""SGP"", ""IDN"", ""MYS"", ""BRN"", ""TL...",
184,KAZ,"[""PRK"", ""IND"", ""IRQ"", ""QAT"", ""SGP"", ""YEM"", ""BR...","[""AZE"", ""TKM"", ""GEO"", ""UKR"", ""TUR"", ""IRQ"", ""IR...",
185,NRU,"[""PYF"", ""TKL"", ""WSM"", ""VUT"", ""FSM"", ""TON"", ""CC...","[""SLB"", ""MHL"", ""PNG"", ""VUT"", ""FSM"", ""NCL"", ""TK...",
186,SOM,"[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP...","[""YEM"", ""DJI"", ""KEN"", ""SYC"", ""ERI"", ""TZA"", ""OM...",
187,AZE,"[""PRK"", ""IND"", ""IRQ"", ""QAT"", ""SGP"", ""YEM"", ""BR...","[""TKM"", ""KAZ"", ""GEO"", ""IRQ"", ""IRN"", ""KWT"", ""SY...",
188,TKM,"[""PRK"", ""IND"", ""IRQ"", ""QAT"", ""SGP"", ""YEM"", ""BR...","[""AZE"", ""KAZ"", ""IRN"", ""GEO"", ""IRQ"", ""KWT"", ""BH...",
189,CIV,"[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP...","[""GHA"", ""LBR"", ""TGO"", ""BEN"", ""SLE"", ""NGA"", ""GI...",
190,AGO,"[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP...","[""COD"", ""COG"", ""GAB"", ""GNQ"", ""NAM"", ""STP"", ""CM...",


In [217]:
df_dict[df_dict['filter_continent'].isnull()]

Unnamed: 0,GID_0,filter_continent,filter_neigh,filter_steward


### Create the `filter_similar_marine` field with all the filters together

In [218]:
new_fields

['filter_Area_EEZ',
 'filter_Pop2020_EEZ',
 'filter_hm_vh_mar',
 'filter_prop_protected_mar',
 'filter_protection_needed_mar',
 'filter_total_endemic_mar',
 'filter_nspecies_mar',
 'filter_SPI_mar']

In [219]:
type(new_fields)

list

In [220]:
new_fields[0]

'filter_Area_EEZ'

In [221]:
new_fields.append("filter_neigh")
new_fields.append("filter_steward")
new_fields.append("filter_continent")
new_fields

['filter_Area_EEZ',
 'filter_Pop2020_EEZ',
 'filter_hm_vh_mar',
 'filter_prop_protected_mar',
 'filter_protection_needed_mar',
 'filter_total_endemic_mar',
 'filter_nspecies_mar',
 'filter_SPI_mar',
 'filter_neigh',
 'filter_steward',
 'filter_continent']

In [222]:
df_sort.shape

(202, 57)

In [223]:
df_sort.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,Area_EEZ,Global_SPI_mar,filter_Area_EEZ,filter_Pop2020_EEZ,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar
0,SLE,Sierra Leone,72601.119939,"MULTIPOLYGON (((-13.3035 9.039899999999999, -1...",-11.781517,8.565445,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,445d666a-9836-4775-8700-bd10ce82ac8c,...,160578.587795,45,"[""GUF"", ""SUR"", ""UKR"", ""GUY"", ""SWE"", ""SLE"", ""UR...","[""ARG"", ""DEU"", ""TZA"", ""YEM"", ""SLB"", ""SLE"", ""CU...","[""ERI"", ""GRD"", ""SLV"", ""GHA"", ""HND"", ""SLE"", ""VI...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""GUF"", ""EGY"", ""MRT"", ""GIN"", ""ESH"", ""SLE"", ""RU...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB..."


In [224]:
df_sort= df_sort.rename(columns={'filter_Area_EEZ': 'filter_Area_Country', 'filter_Pop2020_EEZ': 'filter_Pop2020'})

In [225]:
df_sort.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,Area_EEZ,Global_SPI_mar,filter_Area_Country,filter_Pop2020,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar
0,SLE,Sierra Leone,72601.119939,"MULTIPOLYGON (((-13.3035 9.039899999999999, -1...",-11.781517,8.565445,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,445d666a-9836-4775-8700-bd10ce82ac8c,...,160578.587795,45,"[""GUF"", ""SUR"", ""UKR"", ""GUY"", ""SWE"", ""SLE"", ""UR...","[""ARG"", ""DEU"", ""TZA"", ""YEM"", ""SLB"", ""SLE"", ""CU...","[""ERI"", ""GRD"", ""SLV"", ""GHA"", ""HND"", ""SLE"", ""VI...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""GUF"", ""EGY"", ""MRT"", ""GIN"", ""ESH"", ""SLE"", ""RU...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB..."


In [226]:
new_fields = ['filter_Area_Country',
 'filter_Pop2020',
 'filter_hm_vh_mar',
 'filter_prop_protected_mar',
 'filter_protection_needed_mar',
 'filter_total_endemic_mar',
 'filter_nspecies_mar',
 'filter_SPI_mar',
 'filter_neigh',
 'filter_steward',
 'filter_continent']
new_fields

['filter_Area_Country',
 'filter_Pop2020',
 'filter_hm_vh_mar',
 'filter_prop_protected_mar',
 'filter_protection_needed_mar',
 'filter_total_endemic_mar',
 'filter_nspecies_mar',
 'filter_SPI_mar',
 'filter_neigh',
 'filter_steward',
 'filter_continent']

In [227]:
df_filter = pd.merge(left = df_sort, right = df_dict, left_on = "GID_0", right_on = "GID_0", how = "left")
df_filter.head(2)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,filter_Pop2020,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar,filter_continent,filter_neigh,filter_steward
0,SLE,Sierra Leone,72601.119939,"MULTIPOLYGON (((-13.3035 9.039899999999999, -1...",-11.781517,8.565445,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,445d666a-9836-4775-8700-bd10ce82ac8c,...,"[""ARG"", ""DEU"", ""TZA"", ""YEM"", ""SLB"", ""SLE"", ""CU...","[""ERI"", ""GRD"", ""SLV"", ""GHA"", ""HND"", ""SLE"", ""VI...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""GUF"", ""EGY"", ""MRT"", ""GIN"", ""ESH"", ""SLE"", ""RU...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB...","[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP...","[""GIN"", ""LBR"", ""GNB"", ""SEN"", ""GMB"", ""CIV"", ""MR...","[""SLE"", ""GNQ"", ""GAB"", ""GNB"", ""SEN"", ""GIN"", ""ST..."
1,TTO,Trinidad and Tobago,5159.134673,"MULTIPOLYGON (((-61.4974 10.6394, -61.4985 10....",-61.296458,10.423903,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,190d0b55-9c6e-494c-a2f7-9d24eb931410,...,"[""PSE"", ""GNB"", ""GEO"", ""PYF"", ""ISL"", ""TTO"", ""TL...","[""SLE"", ""VIR"", ""MEX"", ""COG"", ""JAM"", ""TTO"", ""SD...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""NIU"", ""MAR"", ""SHN"", ""GBR"", ""CPV"", ""TTO"", ""BM...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB...","[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC...","[""GRD"", ""VCT"", ""BRB"", ""LCA"", ""GUY"", ""MTQ"", ""DM...","[""VEN"", ""COL"", ""GRD"", ""VCT"", ""PAN"", ""NIC"", ""CU..."


In [228]:
df_filter.shape

(202, 60)

In [239]:
df_filter['filter_steward']= np.where(df_filter['filter_steward'].isnull(), df_filter['filter_nspecies_mar'], df_filter['filter_steward'])
df_filter[df_filter['filter_steward'].isnull()]

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,filter_Pop2020,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar,filter_continent,filter_neigh,filter_steward


In [240]:
df_filter[df_filter['GID_0']=='GIB']

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,filter_Pop2020,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar,filter_continent,filter_neigh,filter_steward
200,GIB,Gibraltar,7.207784,"POLYGON ((-5.3535 36.1572, -5.339 36.1547, -5....",-5.348887,36.136745,https://live.staticflickr.com/828/40960392564_...,1.0,0.0,50f28b21-1658-425a-85f6-9ab8deaec909,...,"[""VIR"", ""WSM"", ""JEY"", ""MNE"", ""LCA"", ""GIB"", ""BL...","[""ISR"", ""GGY"", ""BEL"", ""SVN"", ""IRQ"", ""GIB"", ""MN...","[""TGO"", ""DJI"", ""BHR"", ""SYR"", ""PSE"", ""GIB"", ""MN...","[""TGO"", ""DJI"", ""BHR"", ""SYR"", ""PSE"", ""GIB"", ""MN...","[""TGO"", ""DJI"", ""BHR"", ""SYR"", ""PSE"", ""GIB"", ""MN...","[""TGO"", ""DJI"", ""BHR"", ""SYR"", ""PSE"", ""GIB"", ""MN...","[""TGO"", ""DJI"", ""BHR"", ""SYR"", ""PSE"", ""GIB"", ""MN...","[""MCO"", ""FRO"", ""SVN"", ""ISL"", ""ALB"", ""NOR"", ""IR...","[""ESP"", ""MAR"", ""DZA"", ""MCO"", ""TUN"", ""JEY"", ""PR...","[""TGO"", ""DJI"", ""BHR"", ""SYR"", ""PSE"", ""GIB"", ""MN..."


In [241]:
similar_list = []
for index, i in df_filter.iterrows():
    filter_dict = i[new_fields].to_dict()   
    vals = json.dumps(filter_dict).replace('NaN','"NaN"').replace('"[', '[').replace(']"', ']').replace('\\', '')
    similar_list.append(vals)
    #similar_list.append(json.loads(json.dumps(filter_dict)))    
df_filter['filter_similar_mar'] = similar_list
df_filter.head(2)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,filter_hm_vh_mar,filter_prop_protected_mar,filter_protection_needed_mar,filter_total_endemic_mar,filter_nspecies_mar,filter_SPI_mar,filter_continent,filter_neigh,filter_steward,filter_similar_mar
0,SLE,Sierra Leone,72601.119939,"MULTIPOLYGON (((-13.3035 9.039899999999999, -1...",-11.781517,8.565445,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,445d666a-9836-4775-8700-bd10ce82ac8c,...,"[""ERI"", ""GRD"", ""SLV"", ""GHA"", ""HND"", ""SLE"", ""VI...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SLE"", ""CP...","[""GUF"", ""EGY"", ""MRT"", ""GIN"", ""ESH"", ""SLE"", ""RU...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB...","[""SLE"", ""NGA"", ""ISR"", ""COD"", ""REU"", ""STP"", ""CP...","[""GIN"", ""LBR"", ""GNB"", ""SEN"", ""GMB"", ""CIV"", ""MR...","[""SLE"", ""GNQ"", ""GAB"", ""GNB"", ""SEN"", ""GIN"", ""ST...","{""filter_Area_Country"": [""GUF"", ""SUR"", ""UKR"", ..."
1,TTO,Trinidad and Tobago,5159.134673,"MULTIPOLYGON (((-61.4974 10.6394, -61.4985 10....",-61.296458,10.423903,https://upload.wikimedia.org/wikipedia/commons...,1.0,0.0,190d0b55-9c6e-494c-a2f7-9d24eb931410,...,"[""SLE"", ""VIR"", ""MEX"", ""COG"", ""JAM"", ""TTO"", ""SD...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""TKL"", ""IND"", ""NGA"", ""YEM"", ""LBR"", ""TTO"", ""SL...","[""NIU"", ""MAR"", ""SHN"", ""GBR"", ""CPV"", ""TTO"", ""BM...","[""SLE"", ""TTO"", ""PRK"", ""BMU"", ""NGA"", ""ISR"", ""AB...","[""TTO"", ""BMU"", ""ABW"", ""VGB"", ""GRD"", ""SPM"", ""LC...","[""GRD"", ""VCT"", ""BRB"", ""LCA"", ""GUY"", ""MTQ"", ""DM...","[""VEN"", ""COL"", ""GRD"", ""VCT"", ""PAN"", ""NIC"", ""CU...","{""filter_Area_Country"": [""DEU"", ""ARE"", ""TKM"", ..."


In [242]:
df_merge = df_filter[["GID_0", "filter_similar_mar"]]

In [243]:
df_merge.shape

(202, 2)

In [244]:
df_merge.head(2)

Unnamed: 0,GID_0,filter_similar_mar
0,SLE,"{""filter_Area_Country"": [""GUF"", ""SUR"", ""UKR"", ..."
1,TTO,"{""filter_Area_Country"": [""DEU"", ""ARE"", ""TKM"", ..."


In [245]:
df.columns

Index(['GID_0', 'NAME_0', 'Area_Country', 'geometry', 'x', 'y', 'jpg_url',
       'has_priority', 'has_raisg', 'GlobalID', 'max_highlited_sp',
       'continent', 'GNI_PPP', 'sentence', 'Global_SPI_ter', 'hm_ter',
       'hm_no_ter', 'hm_vh_ter', 'Pop2020', 'SPI_ter', 'prop_protected_ter',
       'protection_needed_ter', 'amphibians', 'birds', 'mammals', 'reptiles',
       'endemic_amphibians', 'endemic_birds', 'endemic_mammals',
       'endemic_reptiles', 'nspecies_ter', 'total_endemic_ter',
       'filter_similar_ter', 'Marine', 'Pop2020_EEZ', 'hm_no_mar', 'hm_mar',
       'hm_vh_mar', 'SPI_mar', 'prop_protected_mar', 'protection_needed_mar',
       'mammals_mar', 'fishes_mar', 'endemic_mammals_mar',
       'endemic_fishes_mar', 'nspecies_mar', 'total_endemic_mar', 'Area_EEZ',
       'Global_SPI_mar'],
      dtype='object')

In [246]:
df = pd.merge(left = df, right = df_merge, left_on = "GID_0", right_on = "GID_0", how = "left")
df.head(1)

Unnamed: 0,GID_0,NAME_0,Area_Country,geometry,x,y,jpg_url,has_priority,has_raisg,GlobalID,...,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar,filter_similar_mar
0,ABW,Aruba,181.938403,"POLYGON ((-69.9782 12.4699, -69.97790000000001...",-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,...,99.8,19.0,1466.0,,,1485.0,,29970.299588,45,"{""filter_Area_Country"": [""CUW"", ""GRD"", ""LVA"", ..."


In [257]:
len(df[df['Pop2020_EEZ'].isnull()])

52

In [259]:
df = df.drop(columns='geometry')
df.head()

Unnamed: 0,GID_0,NAME_0,Area_Country,x,y,jpg_url,has_priority,has_raisg,GlobalID,max_highlited_sp,...,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar,filter_similar_mar
0,ABW,Aruba,181.9384,-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,4.0,...,99.8,19.0,1466.0,,,1485.0,,29970.299588,45.0,"{""filter_Area_Country"": [""CUW"", ""GRD"", ""LVA"", ..."
1,AFG,Afghanistan,643857.5,66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,5.0,...,,,,,,,,,,
2,AGO,Angola,1247422.0,17.578022,-12.338271,https://live.staticflickr.com/3787/13698381215...,1.0,0.0,174ce788-4f67-4ae0-922f-d2ddac87f8c3,24.0,...,,,,,,,,495859.762742,45.0,"{""filter_Area_Country"": [""NFK"", ""BMU"", ""CCK"", ..."
3,AIA,Anguilla,83.30331,-63.054023,18.214919,https://live.staticflickr.com/8063/8194570372_...,1.0,0.0,9f5f24d8-8b21-49a8-8f55-90b47cf63e7b,2.0,...,99.52,25.0,1494.0,,,1519.0,,90157.964205,45.0,"{""filter_Area_Country"": [""ERI"", ""AZE"", ""FIN"", ..."
4,ALA,Åland,1506.261,19.9677,60.241295,https://p1.pxfuel.com/preview/294/670/561/alan...,1.0,0.0,2b45351b-a335-490e-914e-7748d4f41f66,1.0,...,,,,,,,,,,


In [265]:
df.head()

Unnamed: 0,GID_0,NAME_0,Area_Country,x,y,jpg_url,has_priority,has_raisg,GlobalID,max_highlited_sp,...,protection_needed_mar,mammals_mar,fishes_mar,endemic_mammals_mar,endemic_fishes_mar,nspecies_mar,total_endemic_mar,Area_EEZ,Global_SPI_mar,filter_similar_mar
0,ABW,Aruba,181.9384,-69.970245,12.509136,https://live.staticflickr.com/1952/31416683438...,1.0,0.0,fe9f6eb0-f4f8-4f29-875a-5cbb3219e4e5,4.0,...,99.8,19.0,1466.0,,,1485.0,,29970.299588,45.0,"{""filter_Area_Country"": [""CUW"", ""GRD"", ""LVA"", ..."
1,AFG,Afghanistan,643857.5,66.029601,33.828432,https://p1.pxfuel.com/preview/967/12/53/afghan...,1.0,0.0,193ba976-0e5a-4cf6-9b09-d00bf83f4557,5.0,...,,,,,,,,,,
2,AGO,Angola,1247422.0,17.578022,-12.338271,https://live.staticflickr.com/3787/13698381215...,1.0,0.0,174ce788-4f67-4ae0-922f-d2ddac87f8c3,24.0,...,,,,,,,,495859.762742,45.0,"{""filter_Area_Country"": [""NFK"", ""BMU"", ""CCK"", ..."
3,AIA,Anguilla,83.30331,-63.054023,18.214919,https://live.staticflickr.com/8063/8194570372_...,1.0,0.0,9f5f24d8-8b21-49a8-8f55-90b47cf63e7b,2.0,...,99.52,25.0,1494.0,,,1519.0,,90157.964205,45.0,"{""filter_Area_Country"": [""ERI"", ""AZE"", ""FIN"", ..."
4,ALA,Åland,1506.261,19.9677,60.241295,https://p1.pxfuel.com/preview/294/670/561/alan...,1.0,0.0,2b45351b-a335-490e-914e-7748d4f41f66,1.0,...,,,,,,,,,,


In [266]:
df.to_csv(f'{path}/NRC_All_20220504.csv')