# Precalculated data for gadm0
## Setup
### Library import
We import all the required Python libraries

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import arcgis
from arcgis.gis import GIS
import json
import pandas as pd
from arcgis.features import FeatureLayerCollection
import requests as re
from copy import deepcopy
from itertools import repeat
import functools

### Utils

**getHTfromId**

In [2]:
def getHTfromId(item_id):
    item = gis.content.get(item_id)
    flayer = item.tables[0]
    sdf = flayer.query().sdf
    return sdf

**format_df**

In [4]:
def format_df(path, file_name, lookups_id):
    #save_steps= np.arange(1,217486,step=5000)
    ### Loop over each set of data and add all results to one dictionary that will be a text field in the Feature Service

    df = pd.read_csv(f'{path}/{file_name}')
    # col_name = [col for col in df.columns if col in ['amphibians','birds','presence','reptiles']]
    # df.rename(columns={'SliceNumbe':'SliceNumber',col_name[0]:'SUM'}, inplace=True)

    ### Get species area against global species range:
    lookup = getHTfromId(lookups_id)
    df = df.merge(lookup[['SliceNumber','range_area_km2']], how='left',on = 'SliceNumber')
    df['per_global'] = round(df['SUM']/df['range_area_km2']*100,2)
    df.loc[df['per_global']> 100,'per_global'] = 100 ### make max presence 100%
    
    ### Get species area against aoi area:
    df = df.merge(gadm0[['MOL_ID','AREA_KM2']])
    df['per_aoi'] = round(df['SUM']/df['AREA_KM2']*100,2)
    df.loc[df['per_aoi']> 100,'per_aoi'] = 100 ### make max presence 100%
    
    return df

## Connect to ArcGIS API

In [5]:
env_path = ".env"
with open(env_path) as f:
   env = {}
   for line in f:
       env_key, _val = line.split("=")
       env_value = _val.split("\n")[0]
       env[env_key] = env_value

In [6]:
aol_password = env['ARCGIS_GRETA_PASS']
aol_username = env['ARCGIS_GRETA_USER']

In [7]:
gis = GIS("https://eowilson.maps.arcgis.com", aol_username, aol_password, profile = "eowilson")

Keyring backend being used (keyring.backends.OS_X.Keyring (priority: 5)) either failed to install or is not recommended by the keyring project (i.e. it is not secure). This means you can not use stored passwords through GIS's persistent profiles. Note that extra system-wide steps must be taken on a Linux machine to use the python keyring module securely. Read more about this at the keyring API doc (http://bit.ly/2EWDP7B) and the ArcGIS API for Python doc (http://bit.ly/2CK2wG8).


## Read data

In [8]:
path_in = '/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Inputs'
path_out = '/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Outputs'

### gadm0

In [9]:
# wdpa table
gadm0= gpd.read_file(f'{path_in}/gadm36_level0_original_simple.shp')
# gadm0.rename(columns={'AREA_KM':'AREA_KM2'},inplace=True)

In [10]:
gadm0.head()

Unnamed: 0,GID_0,NAME_0,AREA_KM2,MOL_ID,InPoly_FID,SimPgnFlag,MaxSimpTol,MinSimpTol,geometry
0,ABW,Aruba,181.9384,1,1,0,0.005,0.005,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."
1,AFG,Afghanistan,643857.5,2,2,0,0.005,0.005,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,1247422.0,3,3,0,0.005,0.005,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -..."
3,AIA,Anguilla,83.30331,4,4,0,0.005,0.005,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ..."
4,ALA,Åland,1506.261,5,5,0,0.005,0.005,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59..."


In [11]:
dff = gadm0[['GID_0','NAME_0','MOL_ID','AREA_KM2','geometry']].copy() 
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -..."
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ..."
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59..."


### gadm0's biodiversity data

In [12]:
### Ids of lookup tables for each taxa in ArcGIS online

lookups = {'amphibians':'c221a727e12048b2a6ec8e762bc5f478',
         'birds':'bcb31fd9091446a0af3cfdaed334a8da',
         'mammals':'212a3dd4665845deb5d2adf5b597aae0',
         'reptiles':'5b606a03b3fc431e8d4b9191c88bc2b9'}

# Get data for all taxa
amphibians = format_df(path_in, 'gadm0_amphibians.csv', lookups['amphibians'])
birds = format_df(path_in, 'gadm0_birds.csv', lookups['birds'])
mammals = format_df(path_in, 'gadm0_mammals.csv', lookups['mammals'])
reptiles = format_df(path_in, 'gadm0_reptiles.csv', lookups['reptiles'])

In [13]:
amphibians.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,per_global,AREA_KM2,per_aoi
0,1,1,1819,1,151.0,24981,0.6,181.938403,83.0


In [14]:
birds.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,per_global,AREA_KM2,per_aoi
0,1,1,142,1,36.0,2656221,0.0,181.938403,19.79


In [15]:
mammals.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,per_global,AREA_KM2,per_aoi
0,1,1,329,1,55.0,1173243,0.0,181.938403,30.23


In [16]:
reptiles.head(1)

Unnamed: 0,OBJECTID,MOL_ID,SliceNumber,FREQUENCY,SUM,range_area_km2,per_global,AREA_KM2,per_aoi
0,1,1,353,1,164.0,1441180,0.01,181.938403,90.14


## Processing

In [17]:
amphibians = amphibians.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('amphibians').reset_index()
birds = birds.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('birds').reset_index()
mammals = mammals.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('mammals').reset_index()
reptiles = reptiles.groupby('MOL_ID')[['SliceNumber', 'per_global', 'per_aoi']].apply(lambda x: x.to_json(orient='records')).to_frame('reptiles').reset_index()


In [18]:
amphibians.head(1)

Unnamed: 0,MOL_ID,amphibians
0,1,"[{""SliceNumber"":1819,""per_global"":0.6,""per_aoi..."


In [19]:
dff = pd.merge(dff, amphibians, how='left', on = 'MOL_ID')
dff = pd.merge(dff, birds, how='left', on = 'MOL_ID')
dff = pd.merge(dff, mammals, how='left', on = 'MOL_ID')
dff = pd.merge(dff, reptiles, how='left', on = 'MOL_ID')
dff.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,amphibians,birds,mammals,reptiles
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...","[{""SliceNumber"":1819,""per_global"":0.6,""per_aoi...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":353,""per_global"":0.01,""per_aoi..."
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...","[{""SliceNumber"":32,""per_global"":100.0,""per_aoi...","[{""SliceNumber"":26,""per_global"":1.44,""per_aoi""...","[{""SliceNumber"":167,""per_global"":11.15,""per_ao...","[{""SliceNumber"":1,""per_global"":28.71,""per_aoi""..."
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...","[{""SliceNumber"":2,""per_global"":0.59,""per_aoi"":...","[{""SliceNumber"":26,""per_global"":4.67,""per_aoi""...","[{""SliceNumber"":28,""per_global"":2.22,""per_aoi""...","[{""SliceNumber"":40,""per_global"":26.68,""per_aoi..."
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...","[{""SliceNumber"":1819,""per_global"":0.23,""per_ao...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":316,""per_global"":25.32,""per_ao..."
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...","[{""SliceNumber"":963,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":116,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":129,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":2500,""per_global"":0.01,""per_ao..."


In [21]:
dff.loc[dff['MOL_ID']==1,'birds'].values[0]

'[{"SliceNumber":142,"per_global":0.0,"per_aoi":19.79},{"SliceNumber":463,"per_global":0.0,"per_aoi":29.68},{"SliceNumber":466,"per_global":0.0,"per_aoi":4.95},{"SliceNumber":538,"per_global":0.0,"per_aoi":3.3},{"SliceNumber":539,"per_global":0.0,"per_aoi":1.65},{"SliceNumber":905,"per_global":0.0,"per_aoi":86.29},{"SliceNumber":970,"per_global":0.0,"per_aoi":19.79},{"SliceNumber":1099,"per_global":0.0,"per_aoi":1.1},{"SliceNumber":1179,"per_global":0.0,"per_aoi":13.74},{"SliceNumber":1302,"per_global":0.0,"per_aoi":8.79},{"SliceNumber":1306,"per_global":0.0,"per_aoi":31.88},{"SliceNumber":1309,"per_global":0.01,"per_aoi":38.47},{"SliceNumber":1310,"per_global":0.0,"per_aoi":39.57},{"SliceNumber":1312,"per_global":0.0,"per_aoi":68.15},{"SliceNumber":1314,"per_global":0.0,"per_aoi":40.12},{"SliceNumber":1449,"per_global":0.02,"per_aoi":66.51},{"SliceNumber":1599,"per_global":0.0,"per_aoi":1.1},{"SliceNumber":1680,"per_global":0.0,"per_aoi":16.49},{"SliceNumber":1774,"per_global":0.0,"pe

**Save table**

In [22]:
dff.to_csv((f'{path_out}/gadm0_precalculated_biodiversity_only.csv'))

### Get contextual data from old table

In [24]:
ctx = gpd.read_file(f'{path_in}/all_precalc_gadm0.geojson')
ctx.head(1)

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,birds,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,country_size,percent_irrigated,percent_rainfed,percent_rangeland,percent_urban,geometry
0,ABW,Aruba,1,181.938403,"[ { ""SliceNumber"": 142.0, ""percentage_presence...",20.609302,115656.129532,335.0,Shrubland,Tropical Dry,5,,24.66,,6.39,"POLYGON ((-69.97820 12.46986, -70.02847 12.503..."


In [28]:
ctx2 = ctx[['MOL_ID','percentage_protected','population_sum','majority_land_cover_climate_regime', 'land_cover_majority','climate_regime_majority',
            'percent_irrigated', 'percent_rainfed','percent_rangeland', 'percent_urban','country_size']].copy() 

In [29]:
ctx2.head()

Unnamed: 0,MOL_ID,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,percent_irrigated,percent_rainfed,percent_rangeland,percent_urban,country_size
0,1,20.609302,115656.1,335.0,Shrubland,Tropical Dry,,24.66,,6.39,5
1,2,3.785337,30389700.0,142.0,Grassland,Cool Temperate Dry,6.48,18.49,62.15,0.07,3
2,3,6.679349,36074950.0,268.0,Forest,Sub Tropical Moist,0.07,4.19,67.89,0.08,2
3,4,2.88,13601.96,393.0,Grassland,Tropical Moist,,,,,5
4,5,1.449518,29158.96,111.0,Forest,Cool Temperate Moist,,50.27,,,5


### Merge contextual data to WDPA dataframe

In [30]:
dff2 = pd.merge(dff, ctx2, how='left', on = 'MOL_ID')
dff2.head()

Unnamed: 0,GID_0,NAME_0,MOL_ID,AREA_KM2,geometry,amphibians,birds,mammals,reptiles,percentage_protected,population_sum,majority_land_cover_climate_regime,land_cover_majority,climate_regime_majority,percent_irrigated,percent_rainfed,percent_rangeland,percent_urban,country_size
0,ABW,Aruba,1,181.9384,"POLYGON ((-69.97820 12.46986, -70.02847 12.503...","[{""SliceNumber"":1819,""per_global"":0.6,""per_aoi...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":353,""per_global"":0.01,""per_aoi...",20.609302,115656.1,335.0,Shrubland,Tropical Dry,,24.66,,6.39,5
1,AFG,Afghanistan,2,643857.5,"POLYGON ((68.52644 31.75435, 68.53852 31.75457...","[{""SliceNumber"":32,""per_global"":100.0,""per_aoi...","[{""SliceNumber"":26,""per_global"":1.44,""per_aoi""...","[{""SliceNumber"":167,""per_global"":11.15,""per_ao...","[{""SliceNumber"":1,""per_global"":28.71,""per_aoi""...",3.785337,30389700.0,142.0,Grassland,Cool Temperate Dry,6.48,18.49,62.15,0.07,3
2,AGO,Angola,3,1247422.0,"MULTIPOLYGON (((11.73347 -16.67255, 11.74014 -...","[{""SliceNumber"":2,""per_global"":0.59,""per_aoi"":...","[{""SliceNumber"":26,""per_global"":4.67,""per_aoi""...","[{""SliceNumber"":28,""per_global"":2.22,""per_aoi""...","[{""SliceNumber"":40,""per_global"":26.68,""per_aoi...",6.679349,36074950.0,268.0,Forest,Sub Tropical Moist,0.07,4.19,67.89,0.08,2
3,AIA,Anguilla,4,83.30331,"MULTIPOLYGON (((-63.42375 18.58903, -63.42847 ...","[{""SliceNumber"":1819,""per_global"":0.23,""per_ao...","[{""SliceNumber"":142,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":329,""per_global"":0.0,""per_aoi""...","[{""SliceNumber"":316,""per_global"":25.32,""per_ao...",2.88,13601.96,393.0,Grassland,Tropical Moist,,,,,5
4,ALA,Åland,5,1506.261,"MULTIPOLYGON (((21.32195 59.74986, 21.32472 59...","[{""SliceNumber"":963,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":116,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":129,""per_global"":0.01,""per_aoi...","[{""SliceNumber"":2500,""per_global"":0.01,""per_ao...",1.449518,29158.96,111.0,Forest,Cool Temperate Moist,,50.27,,,5


In [31]:
dff2.to_file('/Users/sofia/Documents/HE_Data/Precalculated/gadm0/Outputs/gadm0_precalculated_20220224.geojson',driver='GeoJSON')


In [None]:
gadm1= pd.read_csv('/Users/sofia/Documents/HE_Data/Precalculated/gadm1/')