# Precalculated data for gadm0 (including SPS) - February 2023
In this notebook, we use the biodiversity and contextual data generated in ArcPro to create tables containing precalculated data for gadm0. We include global SPS values (stored in species lookup tables) and create SPS values specific to the AOI (SPS_aoi) using biodiversity data for the protected areas of each country.
## Setup
### Library import

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
import requests as re
from copy import deepcopy
from itertools import repeat
import functools

### Utils

**getHTfromId**

In [2]:
def getHTfromId(item_id):
    item = gis.content.get(item_id)
    flayer = item.tables[0]
    sdf = flayer.query().sdf
    return sdf

**format_df**

In [3]:
def format_df(path, file_name, lookups_id):
    df = pd.read_csv(f'{path}/{file_name}')
    
    ### Get information from lookup tables:
    lookup = getHTfromId(lookups_id)
    df = df.merge(lookup[['SliceNumber','range_area_km2', 'SPS', 'conservation_target']], how='left',on = 'SliceNumber')
    
    ### Get species area against global species range:
    df['per_global'] = round(df['SUM']/df['range_area_km2']*100,2)
    df.loc[df['per_global']> 100,'per_global'] = 100 ### make max presence 100%
    
    ### Get species area against aoi area (this is currently not needed on the platform):
    # df = df.merge(gadm0[['MOL_ID','AREA_KM2']])
    # df['per_aoi'] = round(df['SUM']/df['AREA_KM2']*100,2)
    # df.loc[df['per_aoi']> 100,'per_aoi'] = 100 ### make max presence 100%
    
    return df

### Connect to ArcGIS API

In [4]:
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

In [5]:
aol_password = env['ARCGIS_SOFIA_PASS']
aol_username = env['ARCGIS_SOFIA_USER']

In [6]:
gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "eowilson")

Keyring backend being used (keyring.backends.OS_X.Keyring (priority: 5)) either failed to install or is not recommended by the keyring project (i.e. it is not secure). This means you can not use stored passwords through GIS's persistent profiles. Note that extra system-wide steps must be taken on a Linux machine to use the python keyring module securely. Read more about this at the keyring API doc (http://bit.ly/2EWDP7B) and the ArcGIS API for Python doc (http://bit.ly/2CK2wG8).


### Set paths

In [7]:
path_in = '/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Inputs'
path_out = '/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Outputs'

## Prepare data:
### Get country boundaries
The dataset used here corresponds to gadm version 3.6. 

In [8]:
# read gadm0 (version 3.6) table
gadm0= gpd.read_file(f'{path_in}/gadm36_level0_original_simple.shp')
gadm0.head()

Unnamed: 0,GID_0,NAME_0,AREA_KM2,MOL_ID,InPoly_FID,SimPgnFlag,MaxSimpTol,MinSimpTol,geometry
0,ABW,Aruba,181.9384,1,1,0,0.005,0.005,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."
1,AFG,Afghanistan,643857.5,2,2,0,0.005,0.005,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,1247422.0,3,3,0,0.005,0.005,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -..."
3,AIA,Anguilla,83.30331,4,4,0,0.005,0.005,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ..."
4,ALA,Åland,1506.261,5,5,0,0.005,0.005,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59..."


In [9]:
# Create new dataset with only fields needed
dff = gadm0[['GID_0','NAME_0','MOL_ID','AREA_KM2','geometry']].copy() 
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -..."
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ..."
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59..."


### Update country names with newer version of gadm 
Since some countries have changed their name over the last years, we are going to use the names of a newer version of gadm (4.0 in this case)

In [10]:
# Open newer gadm version (4.0) to get the new names
gadm40 = gpd.read_file('/Users/sofia/Documents/HE_Data/gadm/gadm404-shp/gadm404.shp')

In [11]:
# Create new table with gadm40 in which we only have GID_0 and NAME_0 and rename those fields to GID and NAME
gadm40_GID = gadm40[['GID_0', 'NAME_0']]
gadm40_GID = gadm40_GID.groupby('GID_0')
gadm40_GID = gadm40_GID.first()
gadm40_GID = gadm40_GID.reset_index()
gadm40_GID = gadm40_GID.rename(columns={'GID_0':'GID', 'NAME_0':'NAME'})
gadm40_GID

Unnamed: 0,GID,NAME
0,ABW,Aruba
1,AFG,Afghanistan
2,AGO,Angola
3,AIA,Anguilla
4,ALA,Åland
...,...,...
258,Z09,Sang
259,ZAF,South Africa
260,ZMB,Zambia
261,ZNC,Northern Cyprus


In [12]:
# Merge GID and NAME columns grom gadm4.0 into the gadm3.6 dataset
dff = pd.merge(dff, gadm40_GID, how='left', left_on='GID_0', right_on='GID')
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,GID,NAME
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...",ABW,Aruba
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...",AFG,Afghanistan
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...",AGO,Angola
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...",AIA,Anguilla
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...",ALA,Åland


In [13]:
# Check which countries have different names in gadm36 and gadm40
dff2 = dff[dff.NAME_0!=dff.NAME]
dff2[['NAME_0', 'NAME']]

Unnamed: 0,NAME_0,NAME
47,Republic of Congo,Republic of the Congo
51,Cape Verde,Cabo Verde
58,Czech Republic,Czechia
96,Hong Kong,
135,Macao,
144,Macedonia,North Macedonia
184,Palestina,Palestine
187,Reunion,Réunion
196,Saint Helena,"Saint Helena, Ascension and Tristan da Cunha"
248,Northern Cyprus,


In [14]:
# Give to each country with NaN values in gadm40 the name they had in gadm36
dff.NAME.fillna(dff.NAME_0, inplace=True)

In [15]:
# Check again which countries have different names in gadm36 and gadm40
dff2 = dff[dff.NAME_0!=dff.NAME]
dff2[['NAME_0', 'NAME']]

Unnamed: 0,NAME_0,NAME
47,Republic of Congo,Republic of the Congo
51,Cape Verde,Cabo Verde
58,Czech Republic,Czechia
144,Macedonia,North Macedonia
184,Palestina,Palestine
187,Reunion,Réunion
196,Saint Helena,"Saint Helena, Ascension and Tristan da Cunha"


In [16]:
# Give to NAME_0 the new names (that is, we are changing the name in gadm3.6 to that in gadm4.0)
dff.NAME_0 = dff.NAME
dff = dff.drop(columns={'NAME', 'GID'})

In [17]:
# Check if now Czech Republic is Czechia
dff.NAME_0[dff.GID_0=='CZE']

58    Czechia
Name: NAME_0, dtype: object

In [18]:
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -..."
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ..."
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59..."


### Add alternative names to countries
Some countries have alternative names, that is, are also known by former names or abbreviatures. Here, we include a column containing other possible names for a small selection of countries

In [19]:
# Create a new field with no values
NaN = np.nan
dff["alternative_names"] = NaN
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,alternative_names
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...",
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...",
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...",
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...",
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...",


In [20]:
# Add alternative names for those countries that are known by their abbreviations or by different names
dff.loc[dff.NAME_0 == "United Kingdom", "alternative_names"] = "UK"
dff.loc[dff.NAME_0 == "United States", "alternative_names"] = "US, United States of America, USA"
dff.loc[dff.NAME_0 == "Swaziland", "alternative_names"] = "Eswatini"
dff.loc[dff.NAME_0 == "Timor-Leste", "alternative_names"] = "East Timor"
dff.loc[dff.NAME_0 == "North Macedonia", "alternative_names"] = "Republic of Macedonia"
dff.loc[dff.NAME_0 == "Czechia", "alternative_names"] = "Czech Republic"
dff.loc[dff.NAME_0 == "Côte d'Ivoire", "alternative_names"] = "Ivory Coast"
dff.loc[dff.NAME_0 == "Turkey", "alternative_names"] = "Türkiye"
dff.loc[dff.NAME_0 == "Myanmar", "alternative_names"] = "Burma"
dff.loc[dff.NAME_0 == "Cabo Verde", "alternative_names"] = "Cape Verde"

In [21]:
dff[dff.NAME_0=='United States']

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,alternative_names
234,USA,United States,235,9472976.0,"MULTIPOLYGON (((-154.99611 19.33694, -155.0713...","US, United States of America, USA"


### Add country names translated to Spanish, Portuguese and French

In [22]:
names = pd.read_csv((f'{path_in}/countrynames_translated.csv'))
names

Unnamed: 0.1,Unnamed: 0,GID_0,NAME_0_es,NAME_0_fr,NAME_0_pt
0,0,ABW,Aruba,Aruba,Aruba
1,1,AFG,Afganistán,Afghanistan,Afeganistão
2,2,AGO,Angola,Angola,Angola
3,3,AIA,Anguila,Anguilla,Anguila
4,4,ALA,Åland,Åland,Ilhas Åland
...,...,...,...,...,...
250,250,XSP,Islas Spratly,Îles Spratleys,Ilhas Spratly
251,251,YEM,Yemen,Yémen,Iémen
252,252,ZAF,Sudáfrica,Afrique du Sud,África do Sul
253,253,ZMB,Zambia,Zambie,Zâmbia


In [23]:
dff = pd.merge(dff, names, how='left', left_on='GID_0', right_on='GID_0')
dff.head()

Unnamed: 0.1,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,alternative_names,Unnamed: 0,NAME_0_es,NAME_0_fr,NAME_0_pt
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...",,0,Aruba,Aruba,Aruba
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...",,1,Afganistán,Afghanistan,Afeganistão
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...",,2,Angola,Angola,Angola
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...",,3,Anguila,Anguilla,Anguila
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...",,4,Åland,Åland,Ilhas Åland


In [24]:
dff = dff.drop(columns='Unnamed: 0')
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,alternative_names,NAME_0_es,NAME_0_fr,NAME_0_pt
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...",,Aruba,Aruba,Aruba
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...",,Afganistán,Afghanistan,Afeganistão
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...",,Angola,Angola,Angola
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...",,Anguila,Anguilla,Anguila
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...",,Åland,Åland,Ilhas Åland


## Calculate biodiversity data for countries
### Format biodiversity data

In [25]:
### Ids of lookup tables for each taxa in ArcGIS online

lookups = {'amphibians':'de2309ec6aa64223a8bea682c0200d34',
         'birds':'b5f5c8d693b74abd9b0d236915d8e739',
         'mammals':'1d3b50e3b8544730ae0e2a80f00b4119',
         'reptiles':'bc6de8b9b8df4fffb6aa4208f4bf1467'}
    

# Get species data generated in ArcPro and format it
amphibians = format_df(path_in, 'gadm0_amphibians.csv', lookups['amphibians'])
birds = format_df(path_in, 'gadm0_birds.csv', lookups['birds'])
mammals = format_df(path_in, 'gadm0_mammals.csv', lookups['mammals'])
reptiles = format_df(path_in, 'gadm0_reptiles.csv', lookups['reptiles'])

In [26]:
amphibians = amphibians.rename(columns = {'SPS': 'SPS_global'})
amphibians.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global
0,1,1,1819,1,151.0,24981,65,50,0.6


In [27]:
birds = birds.rename(columns = {'SPS': 'SPS_global'})
birds.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global
0,1,1,142,1,36.0,2656221,100,15,0.0


In [28]:
mammals = mammals.rename(columns = {'SPS': 'SPS_global'})
mammals.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global
0,1,1,329,1,55.0,1173243,100,15,0.0


In [29]:
reptiles = reptiles.rename(columns = {'SPS': 'SPS_global'})
reptiles.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global
0,1,1,353,1,164.0,1441180,100,15,0.01


### Calculate SPS_aoi

In [30]:
# To calculate the SPS_AOI we need to know the species found on the WDPAs (calculations done in Pro: AOI_Summaries_Precalculations.aprx)
wdpa_amph = pd.read_csv(f'{path_in}/WDPA_countries/Amphibians_wdpa.csv').astype(int).rename(columns={'SUM_amphibians': 'SUM_PA'})
wdpa_bird = pd.read_csv(f'{path_in}/WDPA_countries/Birds_wdpa.csv').astype(int).rename(columns={'SUM_birds': 'SUM_PA'})
wdpa_mamm = pd.read_csv(f'{path_in}/WDPA_countries/Mammals_wdpa.csv').astype(int).rename(columns={'SUM_presence': 'SUM_PA'})
wdpa_rept = pd.read_csv(f'{path_in}/WDPA_countries/Reptiles_wdpa.csv').astype(int).rename(columns={'SUM_reptiles': 'SUM_PA'})

In [31]:
wdpa_amph.head(1)

Unnamed: 0,OID_,MOL_ID,SliceNumber,FREQUENCY,SUM_PA,COUNTRY_ID
0,1,2,3318,1,2,171


In [32]:
# Aggregate data by country: Aggregate species (SliceNumber) located in different WDPA (MOL_ID) belonging to the same country (COUNTRY_ID)
wdpa_amph2 = wdpa_amph[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_amph2 = wdpa_amph2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()
wdpa_bird2 = wdpa_bird[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_bird2 = wdpa_bird2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()
wdpa_mamm2 = wdpa_mamm[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_mamm2 = wdpa_mamm2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()
wdpa_rept2 = wdpa_rept[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_rept2 = wdpa_rept2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()

In [33]:
# Add this information about the species found in WDPA to master tables with all species per country
amphibians2= pd.merge(amphibians, wdpa_amph2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
amphibians2 = amphibians2.fillna(0).drop(columns= 'COUNTRY_ID')
birds2= pd.merge(birds, wdpa_bird2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
birds2 = birds2.fillna(0).drop(columns= 'COUNTRY_ID')
mammals2= pd.merge(mammals, wdpa_mamm2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
mammals2 = mammals2.fillna(0).drop(columns= 'COUNTRY_ID')
reptiles2= pd.merge(reptiles, wdpa_rept2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
reptiles2 = reptiles2.fillna(0).drop(columns= 'COUNTRY_ID')

In [34]:
# Calculate SPS_aoi
amphibians2['SPS_aoi'] = (((amphibians2['SUM_PA']/amphibians2['SUM'])*100/amphibians2['conservation_target'])*100).astype(int)
birds2['SPS_aoi'] = (((birds2['SUM_PA']/birds2['SUM'])*100/birds2['conservation_target'])*100).astype(int)
mammals2['SPS_aoi'] = (((mammals2['SUM_PA']/mammals2['SUM'])*100/mammals2['conservation_target'])*100).astype(int)
reptiles2['SPS_aoi'] = (((reptiles2['SUM_PA']/reptiles2['SUM'])*100/reptiles2['conservation_target'])*100).astype(int)

In [35]:
amphibians2

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global,SUM_PA,SPS_aoi
0,1,1,1819,1,151.0,24981,65,50,0.60,37.0,49
1,2,1,4583,1,95.0,509216,100,15,0.02,26.0,182
2,3,1,5543,1,152.0,10573946,100,15,0.00,39.0,171
3,4,2,32,4,317.0,317,0,100,100.00,0.0,0
4,5,2,950,43,44105.0,275425,12,15,16.01,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...
12766,12767,255,6042,35,4313.0,865504,100,15,0.50,3015.0,466
12767,12768,255,6116,2,17.0,17,58,100,100.00,8.0,47
12768,12769,255,6148,42,347136.0,4276570,100,15,8.12,145522.0,279
12769,12770,255,6151,42,367211.0,5331630,100,15,6.89,171713.0,311


In [36]:
# Limit SPS_aoi over 100 to 100
amphibians2['SPS_aoi'].where(amphibians2['SPS_aoi'] < 100, 100, inplace=True)
birds2['SPS_aoi'].where(birds2['SPS_aoi'] < 100, 100, inplace=True)
mammals2['SPS_aoi'].where(mammals2['SPS_aoi'] < 100, 100, inplace=True)
reptiles2['SPS_aoi'].where(reptiles2['SPS_aoi'] < 100, 100, inplace=True)

### Create table with biodiversity data for countries

In [37]:
# Format biodiversity data in a string
amphibians_bio = amphibians2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('amphibians').reset_index()
birds_bio = birds2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('birds').reset_index()
mammals_bio = mammals2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('mammals').reset_index()
reptiles_bio = reptiles2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('reptiles').reset_index()


In [38]:
dff = pd.merge(dff, amphibians_bio, how='left', on = 'MOL_ID')
dff = pd.merge(dff, birds_bio, how='left', on = 'MOL_ID')
dff = pd.merge(dff, mammals_bio, how='left', on = 'MOL_ID')
dff = pd.merge(dff, reptiles_bio, how='left', on = 'MOL_ID')
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,alternative_names,NAME_0_es,NAME_0_fr,NAME_0_pt,amphibians,birds,mammals,reptiles
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...",,Aruba,Aruba,Aruba,"[{""SliceNumber"":1819,""per_global"":0.6,""SPS_glo...","[{""SliceNumber"":142,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":329,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":353,""per_global"":0.01,""SPS_glo..."
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...",,Afganistán,Afghanistan,Afeganistão,"[{""SliceNumber"":32,""per_global"":100.0,""SPS_glo...","[{""SliceNumber"":26,""per_global"":1.44,""SPS_glob...","[{""SliceNumber"":167,""per_global"":11.15,""SPS_gl...","[{""SliceNumber"":1,""per_global"":28.71,""SPS_glob..."
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...",,Angola,Angola,Angola,"[{""SliceNumber"":2,""per_global"":0.59,""SPS_globa...","[{""SliceNumber"":26,""per_global"":4.67,""SPS_glob...","[{""SliceNumber"":28,""per_global"":2.22,""SPS_glob...","[{""SliceNumber"":40,""per_global"":26.68,""SPS_glo..."
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...",,Anguila,Anguilla,Anguila,"[{""SliceNumber"":1819,""per_global"":0.23,""SPS_gl...","[{""SliceNumber"":142,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":329,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":316,""per_global"":25.32,""SPS_gl..."
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...",,Åland,Åland,Ilhas Åland,"[{""SliceNumber"":963,""per_global"":0.01,""SPS_glo...","[{""SliceNumber"":116,""per_global"":0.01,""SPS_glo...","[{""SliceNumber"":129,""per_global"":0.01,""SPS_glo...","[{""SliceNumber"":2500,""per_global"":0.01,""SPS_gl..."


In [39]:
dff.loc[dff['MOL_ID']==1,'amphibians'].values[0]

'[{"SliceNumber":1819,"per_global":0.6,"SPS_global":65,"SPS_aoi":49},{"SliceNumber":4583,"per_global":0.02,"SPS_global":100,"SPS_aoi":100},{"SliceNumber":5543,"per_global":0.0,"SPS_global":100,"SPS_aoi":100}]'

### Add nspecies

In [40]:
# Get data for all taxa
a = pd.read_csv('/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Inputs/gadm0_amphibians.csv')
b = pd.read_csv('/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Inputs/gadm0_birds.csv')
m = pd.read_csv('/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Inputs/gadm0_mammals.csv')
r = pd.read_csv('/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Inputs/gadm0_reptiles.csv')

In [41]:
# Count number of species for group
a_count = a.groupby('MOL_ID')['SliceNumber'].count().astype(int)
b_count = b.groupby('MOL_ID')['SliceNumber'].count().astype(int)
m_count = m.groupby('MOL_ID')['SliceNumber'].count().astype(int)
r_count = r.groupby('MOL_ID')['SliceNumber'].count().astype(int)

In [42]:
frame = { 'amph_nspecies': a_count, 'bird_nspecies': b_count, 'mamm_nspecies': m_count, 'rept_nspecies': r_count }
df = pd.DataFrame(frame).reset_index()
cols = ['amph_nspecies', 'bird_nspecies', 'mamm_nspecies', 'rept_nspecies']
df[cols] = df[cols].fillna(0)
df[cols] = df[cols].astype('int')
df['nspecies'] = df['amph_nspecies'] + df['bird_nspecies'] + df['mamm_nspecies'] + df['rept_nspecies']
df

Unnamed: 0,MOL_ID,amph_nspecies,bird_nspecies,mamm_nspecies,rept_nspecies,nspecies
0,1,3,82,48,30,163
1,2,8,316,140,134,598
2,3,128,891,322,335,1676
3,4,2,96,6,10,114
4,5,5,88,26,2,121
...,...,...,...,...,...,...
250,251,0,36,0,3,39
251,252,6,218,71,129,424
252,253,117,706,271,399,1493
253,254,93,695,275,218,1281


In [43]:
dff_nspecies = dff.merge(df, how='left', on = 'MOL_ID')
dff_nspecies

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,alternative_names,NAME_0_es,NAME_0_fr,NAME_0_pt,amphibians,birds,mammals,reptiles,amph_nspecies,bird_nspecies,mamm_nspecies,rept_nspecies,nspecies
0,ABW,Aruba,1,1.819384e+02,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...",,Aruba,Aruba,Aruba,"[{""SliceNumber"":1819,""per_global"":0.6,""SPS_glo...","[{""SliceNumber"":142,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":329,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":353,""per_global"":0.01,""SPS_glo...",3,82,48,30,163
1,AFG,Afghanistan,2,6.438575e+05,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...",,Afganistán,Afghanistan,Afeganistão,"[{""SliceNumber"":32,""per_global"":100.0,""SPS_glo...","[{""SliceNumber"":26,""per_global"":1.44,""SPS_glob...","[{""SliceNumber"":167,""per_global"":11.15,""SPS_gl...","[{""SliceNumber"":1,""per_global"":28.71,""SPS_glob...",8,316,140,134,598
2,AGO,Angola,3,1.247422e+06,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...",,Angola,Angola,Angola,"[{""SliceNumber"":2,""per_global"":0.59,""SPS_globa...","[{""SliceNumber"":26,""per_global"":4.67,""SPS_glob...","[{""SliceNumber"":28,""per_global"":2.22,""SPS_glob...","[{""SliceNumber"":40,""per_global"":26.68,""SPS_glo...",128,891,322,335,1676
3,AIA,Anguilla,4,8.330331e+01,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...",,Anguila,Anguilla,Anguila,"[{""SliceNumber"":1819,""per_global"":0.23,""SPS_gl...","[{""SliceNumber"":142,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":329,""per_global"":0.0,""SPS_glob...","[{""SliceNumber"":316,""per_global"":25.32,""SPS_gl...",2,96,6,10,114
4,ALA,Åland,5,1.506261e+03,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...",,Åland,Åland,Ilhas Åland,"[{""SliceNumber"":963,""per_global"":0.01,""SPS_glo...","[{""SliceNumber"":116,""per_global"":0.01,""SPS_glo...","[{""SliceNumber"":129,""per_global"":0.01,""SPS_glo...","[{""SliceNumber"":2500,""per_global"":0.01,""SPS_gl...",5,88,26,2,121
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,XSP,Spratly Islands,251,1.874750e+00,"MULTIPOLYGON (((114.35472 11.45139, 114.35167 ...",,Islas Spratly,Îles Spratleys,Ilhas Spratly,,"[{""SliceNumber"":141,""per_global"":0.0,""SPS_glob...",,"[{""SliceNumber"":3883,""per_global"":0.0,""SPS_glo...",0,36,0,3,39
251,YEM,Yemen,252,4.522035e+05,"MULTIPOLYGON (((53.26931 12.12875, 53.31011 12...",,Yemen,Yémen,Iémen,"[{""SliceNumber"":974,""per_global"":74.01,""SPS_gl...","[{""SliceNumber"":26,""per_global"":0.26,""SPS_glob...","[{""SliceNumber"":33,""per_global"":14.12,""SPS_glo...","[{""SliceNumber"":9,""per_global"":2.12,""SPS_globa...",6,218,71,129,424
252,ZAF,South Africa,253,1.219710e+06,"MULTIPOLYGON (((19.42097 -34.67903, 19.42208 -...",,Sudáfrica,Afrique du Sud,África do Sul,"[{""SliceNumber"":33,""per_global"":43.92,""SPS_glo...","[{""SliceNumber"":26,""per_global"":3.15,""SPS_glob...","[{""SliceNumber"":28,""per_global"":3.91,""SPS_glob...","[{""SliceNumber"":40,""per_global"":0.95,""SPS_glob...",117,706,271,399,1493
253,ZMB,Zambia,254,7.504919e+05,"POLYGON ((25.88753 -17.98407, 25.88284 -17.984...",,Zambia,Zambie,Zâmbia,"[{""SliceNumber"":34,""per_global"":36.77,""SPS_glo...","[{""SliceNumber"":26,""per_global"":3.52,""SPS_glob...","[{""SliceNumber"":28,""per_global"":10.72,""SPS_glo...","[{""SliceNumber"":40,""per_global"":11.6,""SPS_glob...",93,695,275,218,1281


### Save table with biodiversity data

In [44]:
dff_nspecies.to_csv((f'{path_out}/gadm0_precalculated_SPS_biodiversity_only.csv'))

---
## Add contextual data
The contextual data here comes in part from 

In [51]:
# Get contextual data (this data was downloaded from the previous precalculated tables, as we don't have the original datasets)
ctx = pd.read_csv(f'{path_in}/contextual_precalculations_2021.csv').drop(columns='Unnamed: 0')
ctx.head(1)

Unnamed: 0,MOL_ID,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,percent_irrigated,percent_rainfed,percent_rangeland,percent_urban,country_size
0,1,20.609302,115656.129532,335.0,Shrubland,Tropical Dry,,24.66,,6.39,5


In [52]:
ctx.columns

Index(['MOL_ID', 'percentage_protected', 'population_sum',
       'majority_land_cover_climate_regime', 'land_cover_majority',
       'climate_regime_majority', 'percent_irrigated', 'percent_rainfed',
       'percent_rangeland', 'percent_urban', 'country_size'],
      dtype='object')

In [56]:
ctx2 = ctx.rename(columns={'percent_irrigated': 'percent_energy', 'percent_rainfed': 'percent_urban','percent_rangeland':'percent_transportation', 'percent_urban':'percent_agriculture'})
ctx2 = ctx2.fillna(0)
ctx2['percent_human_intrusion'] = 100 - ctx2['percent_energy'] - ctx2['percent_urban'] - ctx2['percent_transportation'] - ctx2['percent_agriculture']
ctx2.head(5)

Unnamed: 0,MOL_ID,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,percent_energy,percent_urban,percent_transportation,percent_agriculture,country_size,percent_human_intrusion
0,1,20.609302,115656.1,335.0,Shrubland,Tropical Dry,0.0,24.66,0.0,6.39,5,68.95
1,2,3.785337,30389700.0,142.0,Grassland,Cool Temperate Dry,6.48,18.49,62.15,0.07,3,12.81
2,3,6.679349,36074950.0,268.0,Forest,Sub Tropical Moist,0.07,4.19,67.89,0.08,2,27.77
3,4,2.88,13601.96,393.0,Grassland,Tropical Moist,0.0,0.0,0.0,0.0,5,100.0
4,5,1.449518,29158.96,111.0,Forest,Cool Temperate Moist,0.0,50.27,0.0,0.0,5,49.73


### Merge contextual data to previous dataframe

In [58]:
dff2 = pd.merge(dff, ctx2, how='left', on = 'MOL_ID')
dff2.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,alternative_names,NAME_0_es,NAME_0_fr,NAME_0_pt,amphibians,...,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,percent_energy,percent_urban,percent_transportation,percent_agriculture,country_size,percent_human_intrusion
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...",,Aruba,Aruba,Aruba,"[{""SliceNumber"":1819,""per_global"":0.6,""SPS_glo...",...,115656.1,335.0,Shrubland,Tropical Dry,0.0,24.66,0.0,6.39,5,68.95
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...",,Afganistán,Afghanistan,Afeganistão,"[{""SliceNumber"":32,""per_global"":100.0,""SPS_glo...",...,30389700.0,142.0,Grassland,Cool Temperate Dry,6.48,18.49,62.15,0.07,3,12.81
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...",,Angola,Angola,Angola,"[{""SliceNumber"":2,""per_global"":0.59,""SPS_globa...",...,36074950.0,268.0,Forest,Sub Tropical Moist,0.07,4.19,67.89,0.08,2,27.77
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...",,Anguila,Anguilla,Anguila,"[{""SliceNumber"":1819,""per_global"":0.23,""SPS_gl...",...,13601.96,393.0,Grassland,Tropical Moist,0.0,0.0,0.0,0.0,5,100.0
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...",,Åland,Åland,Ilhas Åland,"[{""SliceNumber"":963,""per_global"":0.01,""SPS_glo...",...,29158.96,111.0,Forest,Cool Temperate Moist,0.0,50.27,0.0,0.0,5,49.73


In [59]:
dff2.to_file('/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Outputs/gadm0_fakecontextual.geojson',driver='GeoJSON')


In [60]:
dff2.columns

Index(['GID_0', 'NAME_0', 'MOL_ID', 'AREA_KM2', 'geometry',
       'alternative_names', 'NAME_0_es', 'NAME_0_fr', 'NAME_0_pt',
       'amphibians', 'birds', 'mammals', 'reptiles', 'percentage_protected',
       'population_sum', 'majority_land_cover_climate_regime',
       'land_cover_majority', 'climate_regime_majority', 'percent_energy',
       'percent_urban', 'percent_transportation', 'percent_agriculture',
       'country_size', 'percent_human_intrusion'],
      dtype='object')

In [61]:
dff2.percentage_protected[0]

20.6093023255814