# Precalculated data for gadm0
## Setup
### Library import
We import all the required Python libraries

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
import requests as re
from copy import deepcopy
from itertools import repeat
import functools

### Utils

**getHTfromId**

In [2]:
def getHTfromId(item_id):
    item = gis.content.get(item_id)
    flayer = item.tables[0]
    sdf = flayer.query().sdf
    return sdf

**format_df**

In [3]:
def format_df(path, file_name, lookups_id):
    df = pd.read_csv(f'{path}/{file_name}')
    
    ### Get information from lookup tables:
    lookup = getHTfromId(lookups_id)
    df = df.merge(lookup[['SliceNumber','range_area_km2', 'SPS', 'conservation_target']], how='left',on = 'SliceNumber')
    ### Get species area against global species range:
    df['per_global'] = round(df['SUM']/df['range_area_km2']*100,2)
    df.loc[df['per_global']> 100,'per_global'] = 100 ### make max presence 100%
    
    ### Get species area against aoi area:
    df = df.merge(gadm0[['MOL_ID','AREA_KM2']])
    df['per_aoi'] = round(df['SUM']/df['AREA_KM2']*100,2)
    df.loc[df['per_aoi']> 100,'per_aoi'] = 100 ### make max presence 100%
    
    return df

## Connect to ArcGIS API

In [4]:
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

In [5]:
aol_password = env['ARCGIS_SOFIA_PASS']
aol_username = env['ARCGIS_SOFIA_USER']

In [6]:
gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "eowilson")

Keyring backend being used (keyring.backends.OS_X.Keyring (priority: 5)) either failed to install or is not recommended by the keyring project (i.e. it is not secure). This means you can not use stored passwords through GIS's persistent profiles. Note that extra system-wide steps must be taken on a Linux machine to use the python keyring module securely. Read more about this at the keyring API doc (http://bit.ly/2EWDP7B) and the ArcGIS API for Python doc (http://bit.ly/2CK2wG8).


## Read data

In [7]:
path_in = '/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Inputs'
path_out = '/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Outputs'

### Get country boundaries: gadm0

In [8]:
# gadm0 table
gadm0= gpd.read_file(f'{path_in}/gadm36_level0_original_simple.shp')

In [9]:
gadm0.head()

Unnamed: 0,GID_0,NAME_0,AREA_KM2,MOL_ID,InPoly_FID,SimPgnFlag,MaxSimpTol,MinSimpTol,geometry
0,ABW,Aruba,181.9384,1,1,0,0.005,0.005,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."
1,AFG,Afghanistan,643857.5,2,2,0,0.005,0.005,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,1247422.0,3,3,0,0.005,0.005,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -..."
3,AIA,Anguilla,83.30331,4,4,0,0.005,0.005,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ..."
4,ALA,Åland,1506.261,5,5,0,0.005,0.005,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59..."


In [9]:
dff = gadm0[['GID_0','NAME_0','MOL_ID','AREA_KM2','geometry']].copy() 
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -..."
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ..."
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59..."


### Get biodiversity data for countries

In [10]:
### Ids of lookup tables for each taxa in ArcGIS online

lookups = {'amphibians':'de2309ec6aa64223a8bea682c0200d34',
         'birds':'b5f5c8d693b74abd9b0d236915d8e739',
         'mammals':'1d3b50e3b8544730ae0e2a80f00b4119',
         'reptiles':'bc6de8b9b8df4fffb6aa4208f4bf1467'}
    

# Get data for all taxa
amphibians = format_df(path_in, 'gadm0_amphibians.csv', lookups['amphibians'])
birds = format_df(path_in, 'gadm0_birds.csv', lookups['birds'])
mammals = format_df(path_in, 'gadm0_mammals.csv', lookups['mammals'])
reptiles = format_df(path_in, 'gadm0_reptiles.csv', lookups['reptiles'])

In [11]:
amphibians = amphibians.rename(columns = {'SPS': 'SPS_global'})
amphibians.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global,AREA_KM2,per_aoi
0,1,1,1819,1,151.0,24981,65,50,0.6,181.938403,83.0


In [12]:
birds = birds.rename(columns = {'SPS': 'SPS_global'})
birds.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global,AREA_KM2,per_aoi
0,1,1,142,1,36.0,2656221,100,15,0.0,181.938403,19.79


In [13]:
mammals = mammals.rename(columns = {'SPS': 'SPS_global'})
mammals.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global,AREA_KM2,per_aoi
0,1,1,329,1,55.0,1173243,100,15,0.0,181.938403,30.23


In [14]:
reptiles = reptiles.rename(columns = {'SPS': 'SPS_global'})
reptiles.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global,AREA_KM2,per_aoi
0,1,1,353,1,164.0,1441180,100,15,0.01,181.938403,90.14


#### Calculate SPS_aoi

In [15]:
wdpa_amph = pd.read_csv(f'{path_in}/WDPA_countries/Amphibians_wdpa.csv').astype(int).rename(columns={'SUM_amphibians': 'SUM_PA'})
wdpa_bird = pd.read_csv(f'{path_in}/WDPA_countries/Birds_wdpa.csv').astype(int).rename(columns={'SUM_birds': 'SUM_PA'})
wdpa_mamm = pd.read_csv(f'{path_in}/WDPA_countries/Mammals_wdpa.csv').astype(int).rename(columns={'SUM_presence': 'SUM_PA'})
wdpa_rept = pd.read_csv(f'{path_in}/WDPA_countries/Reptiles_wdpa.csv').astype(int).rename(columns={'SUM_reptiles': 'SUM_PA'})

In [16]:
wdpa_amph.head(1)

Unnamed: 0,OID_,MOL_ID,SliceNumber,FREQUENCY,SUM_PA,COUNTRY_ID
0,1,2,3318,1,2,171


In [18]:
# Aggregate data by country: Aggregate species (SliceNumber) located in different WDPA (MOL_ID) belonging to the same country (COUNTRY_ID)
wdpa_amph2 = wdpa_amph[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_amph2 = wdpa_amph2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()
wdpa_bird2 = wdpa_bird[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_bird2 = wdpa_bird2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()
wdpa_mamm2 = wdpa_mamm[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_mamm2 = wdpa_mamm2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()
wdpa_rept2 = wdpa_rept[['COUNTRY_ID', 'SliceNumber', 'SUM_PA']]
wdpa_rept2 = wdpa_rept2.groupby(['COUNTRY_ID', 'SliceNumber']).sum().reset_index()

In [19]:
# In species table add information for species in WDPA (SUM_PA)
amphibians2= pd.merge(amphibians, wdpa_amph2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
amphibians2 = amphibians2.fillna(0).drop(columns= 'COUNTRY_ID')
birds2= pd.merge(birds, wdpa_bird2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
birds2 = birds2.fillna(0).drop(columns= 'COUNTRY_ID')
mammals2= pd.merge(mammals, wdpa_mamm2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
mammals2 = mammals2.fillna(0).drop(columns= 'COUNTRY_ID')
reptiles2= pd.merge(reptiles, wdpa_rept2, how='left', left_on= ['MOL_ID', 'SliceNumber'], right_on=['COUNTRY_ID', 'SliceNumber']) 
reptiles2 = reptiles2.fillna(0).drop(columns= 'COUNTRY_ID')

In [21]:
# Calculate SPS_aoi
amphibians2['SPS_aoi'] = (((amphibians2['SUM_PA']/amphibians2['SUM'])*100/amphibians2['conservation_target'])*100).astype(int)
birds2['SPS_aoi'] = (((birds2['SUM_PA']/birds2['SUM'])*100/birds2['conservation_target'])*100).astype(int)
mammals2['SPS_aoi'] = (((mammals2['SUM_PA']/mammals2['SUM'])*100/mammals2['conservation_target'])*100).astype(int)
reptiles2['SPS_aoi'] = (((reptiles2['SUM_PA']/reptiles2['SUM'])*100/reptiles2['conservation_target'])*100).astype(int)

In [27]:
amphibians2

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,SPS_global,conservation_target,per_global,AREA_KM2,per_aoi,SUM_PA,SPS_aoi
0,1,1,1819,1,151.0,24981,65,50,0.60,181.938403,83.00,37.0,49
1,2,1,4583,1,95.0,509216,100,15,0.02,181.938403,52.22,26.0,182
2,3,1,5543,1,152.0,10573946,100,15,0.00,181.938403,83.54,39.0,171
3,4,2,32,4,317.0,317,0,100,100.00,643857.477165,0.05,0.0,0
4,5,2,950,43,44105.0,275425,12,15,16.01,643857.477165,6.85,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12766,12767,255,6042,35,4313.0,865504,100,15,0.50,390680.723213,1.10,3015.0,466
12767,12768,255,6116,2,17.0,17,58,100,100.00,390680.723213,0.00,8.0,47
12768,12769,255,6148,42,347136.0,4276570,100,15,8.12,390680.723213,88.85,145522.0,279
12769,12770,255,6151,42,367211.0,5331630,100,15,6.89,390680.723213,93.99,171713.0,311


In [28]:
# Limit SPS over 100 to 100
amphibians2['SPS_aoi'].where(amphibians2['SPS_aoi'] < 100, 100, inplace=True)
birds2['SPS_aoi'].where(birds2['SPS_aoi'] < 100, 100, inplace=True)
mammals2['SPS_aoi'].where(mammals2['SPS_aoi'] < 100, 100, inplace=True)
reptiles2['SPS_aoi'].where(reptiles2['SPS_aoi'] < 100, 100, inplace=True)

## Processing

In [34]:
# Format biodiversity data in a string
amphibians_bio = amphibians2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('amphibians').reset_index()
birds_bio = birds2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('birds').reset_index()
mammals_bio = mammals2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('mammals').reset_index()
reptiles_bio = reptiles2.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi', 'SPS_global', 'SPS_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('reptiles').reset_index()


In [35]:
dff = pd.merge(dff, amphibians_bio, how='left', on = 'MOL_ID')
dff = pd.merge(dff, birds_bio, how='left', on = 'MOL_ID')
dff = pd.merge(dff, mammals_bio, how='left', on = 'MOL_ID')
dff = pd.merge(dff, reptiles_bio, how='left', on = 'MOL_ID')
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,amphibians,birds,mammals,reptiles
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...","[{""SliceNumber"":1819,""per_global"":0.6,""per_aoi...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":353,""per_global"":0.01,""per_aoi..."
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...","[{""SliceNumber"":32,""per_global"":100.0,""per_aoi...","[{""SliceNumber"":26,""per_global"":1.44,""per_aoi""...","[{""SliceNumber"":167,""per_global"":11.15,""per_ao...","[{""SliceNumber"":1,""per_global"":28.71,""per_aoi""..."
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...","[{""SliceNumber"":2,""per_global"":0.59,""per_aoi"":...","[{""SliceNumber"":26,""per_global"":4.67,""per_aoi""...","[{""SliceNumber"":28,""per_global"":2.22,""per_aoi""...","[{""SliceNumber"":40,""per_global"":26.68,""per_aoi..."
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...","[{""SliceNumber"":1819,""per_global"":0.23,""per_ao...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":316,""per_global"":25.32,""per_ao..."
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...","[{""SliceNumber"":963,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":116,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":129,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":2500,""per_global"":0.01,""per_ao..."


In [36]:
dff.loc[dff['MOL_ID']==1,'birds'].values[0]

'[{"SliceNumber":142,"per_global":0.0,"per_aoi":19.79,"SPS_global":100,"SPS_aoi":100},{"SliceNumber":463,"per_global":0.0,"per_aoi":29.68,"SPS_global":90,"SPS_aoi":100},{"SliceNumber":466,"per_global":0.0,"per_aoi":4.95,"SPS_global":100,"SPS_aoi":100},{"SliceNumber":538,"per_global":0.0,"per_aoi":3.3,"SPS_global":0,"SPS_aoi":0},{"SliceNumber":539,"per_global":0.0,"per_aoi":1.65,"SPS_global":3,"SPS_aoi":0},{"SliceNumber":905,"per_global":0.0,"per_aoi":86.29,"SPS_global":56,"SPS_aoi":100},{"SliceNumber":970,"per_global":0.0,"per_aoi":19.79,"SPS_global":93,"SPS_aoi":100},{"SliceNumber":1099,"per_global":0.0,"per_aoi":1.1,"SPS_global":82,"SPS_aoi":0},{"SliceNumber":1179,"per_global":0.0,"per_aoi":13.74,"SPS_global":100,"SPS_aoi":100},{"SliceNumber":1302,"per_global":0.0,"per_aoi":8.79,"SPS_global":44,"SPS_aoi":0},{"SliceNumber":1306,"per_global":0.0,"per_aoi":31.88,"SPS_global":64,"SPS_aoi":100},{"SliceNumber":1309,"per_global":0.01,"per_aoi":38.47,"SPS_global":48,"SPS_aoi":100},{"SliceNum

**Save table**

In [22]:
dff.to_csv((f'{path_out}/gadm0_precalculated_biodiversity_only.csv'))

### Get contextual data from old table

In [24]:
ctx = gpd.read_file(f'{path_in}/all_precalc_gadm0.geojson')
ctx.head(1)

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,birds,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,country_size,percent_irrigated,percent_rainfed,percent_rangeland,percent_urban,geometry
0,ABW,Aruba,1,181.938403,"[ { ""SliceNumber"": 142.0, ""percentage_presence...",20.609302,115656.129532,335.0,Shrubland,Tropical Dry,5,,24.66,,6.39,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."


In [28]:
ctx2 = ctx[['MOL_ID','percentage_protected','population_sum','majority_land_cover_climate_regime', 'land_cover_majority','climate_regime_majority',
            'percent_irrigated', 'percent_rainfed','percent_rangeland', 'percent_urban','country_size']].copy() 

In [29]:
ctx2.head()

Unnamed: 0,MOL_ID,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,percent_irrigated,percent_rainfed,percent_rangeland,percent_urban,country_size
0,1,20.609302,115656.1,335.0,Shrubland,Tropical Dry,,24.66,,6.39,5
1,2,3.785337,30389700.0,142.0,Grassland,Cool Temperate Dry,6.48,18.49,62.15,0.07,3
2,3,6.679349,36074950.0,268.0,Forest,Sub Tropical Moist,0.07,4.19,67.89,0.08,2
3,4,2.88,13601.96,393.0,Grassland,Tropical Moist,,,,,5
4,5,1.449518,29158.96,111.0,Forest,Cool Temperate Moist,,50.27,,,5


### Merge contextual data to WDPA dataframe

In [30]:
dff2 = pd.merge(dff, ctx2, how='left', on = 'MOL_ID')
dff2.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,amphibians,birds,mammals,reptiles,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,percent_irrigated,percent_rainfed,percent_rangeland,percent_urban,country_size
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...","[{""SliceNumber"":1819,""per_global"":0.6,""per_aoi...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":353,""per_global"":0.01,""per_aoi...",20.609302,115656.1,335.0,Shrubland,Tropical Dry,,24.66,,6.39,5
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...","[{""SliceNumber"":32,""per_global"":100.0,""per_aoi...","[{""SliceNumber"":26,""per_global"":1.44,""per_aoi""...","[{""SliceNumber"":167,""per_global"":11.15,""per_ao...","[{""SliceNumber"":1,""per_global"":28.71,""per_aoi""...",3.785337,30389700.0,142.0,Grassland,Cool Temperate Dry,6.48,18.49,62.15,0.07,3
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...","[{""SliceNumber"":2,""per_global"":0.59,""per_aoi"":...","[{""SliceNumber"":26,""per_global"":4.67,""per_aoi""...","[{""SliceNumber"":28,""per_global"":2.22,""per_aoi""...","[{""SliceNumber"":40,""per_global"":26.68,""per_aoi...",6.679349,36074950.0,268.0,Forest,Sub Tropical Moist,0.07,4.19,67.89,0.08,2
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...","[{""SliceNumber"":1819,""per_global"":0.23,""per_ao...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":316,""per_global"":25.32,""per_ao...",2.88,13601.96,393.0,Grassland,Tropical Moist,,,,,5
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...","[{""SliceNumber"":963,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":116,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":129,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":2500,""per_global"":0.01,""per_ao...",1.449518,29158.96,111.0,Forest,Cool Temperate Moist,,50.27,,,5


In [31]:
dff2.to_file('/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Outputs/gadm0_precalculated_20220224.geojson',driver='GeoJSON')


In [None]:
gadm1= pd.read_csv('/Users/sofia/Documents/HE_Data/Precalculated/gadm1/')