# ES50 Index (Hulbert's Index)

In [1]:
try:
    from pyobis import occurrences
except:
    !pip install -q pyobis
    from pyobis import occurrences
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import math
import numpy as np
from scipy.special import loggamma

[0m

## data

In [2]:
query = occurrences.search(
    geometry="POLYGON ((58.3301 19.0935, 69.8145 19.0381, 69.8145 9.5161, 58.6230 9.6316, 58.3301 19.0935))", # this is a geometry in the Arabian Sea (right of India)
)

In [3]:
query.api_url

'https://api.obis.org/v3/occurrence?geometry=POLYGON+%28%2858.3301+19.0935%2C+69.8145+19.0381%2C+69.8145+9.5161%2C+58.6230+9.6316%2C+58.3301+19.0935%29%29&offset=0&mof=False'

In [4]:
query.mapper_url

'https://mapper.obis.org/?geometry=POLYGON+%28%2858.3301+19.0935%2C+69.8145+19.0381%2C+69.8145+9.5161%2C+58.6230+9.6316%2C+58.3301+19.0935%29%29&offset=0&mof=False'

In [5]:
query.execute()

Fetching: [████████████████████████████████████████████████████████████████████████████████████████████████████] 64210/64210
Fetched 64210 records.


Unnamed: 0,country,brackish,date_year,scientificNameID,year,scientificName,absence,dropped,aphiaID,decimalLatitude,...,identificationID,locationRemarks,verbatimSRS,georeferenceVerificationStatus,previousIdentifications,georeferencedBy,minimumElevationInMeters,maximumElevationInMeters,georeferenceProtocol,islandGroup
0,SOVIET UNION,True,1980.0,urn:lsid:marinespecies.org:taxname:101,1980,Gastropoda,False,False,101,11.000000,...,,,,,,,,,,
1,UNITED STATES,,1995.0,urn:lsid:marinespecies.org:taxname:345515,1995,Prochlorococcus,False,False,345515,17.199600,...,,,,,,,,,,
2,,True,,urn:lsid:marinespecies.org:taxname:1137,,Cumacea,False,False,1137,12.030000,...,,,,,,,,,,
3,SOVIET UNION,,1970.0,urn:lsid:marinespecies.org:taxname:534090,1970,Fiona,False,False,138007,10.100000,...,,,,,,,,,,
4,,True,1995.0,urn:lsid:marinespecies.org:taxname:393148,1995,Rhizobiales,False,False,393148,16.050667,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64205,UNITED STATES,,1995.0,urn:lsid:marinespecies.org:taxname:345515,1995,Prochlorococcus,False,False,345515,17.686200,...,,,,,,,,,,
64206,,,1997.0,urn:lsid:marinespecies.org:taxname:418106,1997,Globigerinella calida,False,False,418106,16.272300,...,,,,,,,,,,
64207,SOVIET UNION,True,1980.0,urn:lsid:marinespecies.org:taxname:1078,1980,Ostracoda,False,False,1078,17.000000,...,,,,,,,,,,
64208,,,1995.0,urn:lsid:marinespecies.org:taxname:113455,1995,Globigerinoides tenellus,False,False,418107,14.443400,...,,,,,,,,,,


In [6]:
df = query.data

## generating the ES50 index

In [7]:
df = query.data.dropna(subset=["species"])[["decimalLongitude","decimalLatitude","species","id"]]
df.index = range(len(df.index))
df

Unnamed: 0,decimalLongitude,decimalLatitude,species,id
0,68.338333,18.317222,Climacodium frauenfeldianum,0006a61f-5217-4840-b0bf-a2028aa5a199
1,60.266700,16.216500,Hoeglundina elegans,000a58e8-06b6-43c5-8576-50d5f23c2a25
2,65.000300,10.034000,Globigerinoides ruber,000b4ba4-4b7b-4261-b411-dd5b67b69cae
3,64.996000,10.013600,Globorotalia eastropacia,000c3d98-5f5d-467e-9c54-8b2641bcae75
4,61.558333,15.340000,Chauliodus pammelas,000d83d2-7ab7-4a56-9e9f-a60ef1e523ef
...,...,...,...,...
25863,65.333300,11.000000,Pareucalanus attenuatus,fff16c01-1df8-4e74-a0f6-4d689ef908c7
25864,64.850000,10.016700,Acrocalanus gibber,fff2acee-c356-488d-804d-d80f294f40e3
25865,60.000000,17.000000,Clausocalanus arcuicornis,fffa34eb-ae61-4d6f-a675-bf98648a6ae6
25866,60.263300,16.272300,Globigerinella calida,fffe0dbc-7064-41b0-9722-fc9e64b48182


In [8]:
_df = df[["decimalLongitude","decimalLatitude"]].round(3)
_df["species"] = df["species"]
_df["id"] = df["id"]
_df

Unnamed: 0,decimalLongitude,decimalLatitude,species,id
0,68.338,18.317,Climacodium frauenfeldianum,0006a61f-5217-4840-b0bf-a2028aa5a199
1,60.267,16.216,Hoeglundina elegans,000a58e8-06b6-43c5-8576-50d5f23c2a25
2,65.000,10.034,Globigerinoides ruber,000b4ba4-4b7b-4261-b411-dd5b67b69cae
3,64.996,10.014,Globorotalia eastropacia,000c3d98-5f5d-467e-9c54-8b2641bcae75
4,61.558,15.340,Chauliodus pammelas,000d83d2-7ab7-4a56-9e9f-a60ef1e523ef
...,...,...,...,...
25863,65.333,11.000,Pareucalanus attenuatus,fff16c01-1df8-4e74-a0f6-4d689ef908c7
25864,64.850,10.017,Acrocalanus gibber,fff2acee-c356-488d-804d-d80f294f40e3
25865,60.000,17.000,Clausocalanus arcuicornis,fffa34eb-ae61-4d6f-a675-bf98648a6ae6
25866,60.263,16.272,Globigerinella calida,fffe0dbc-7064-41b0-9722-fc9e64b48182


In [9]:
es_df = pd.DataFrame(_df.groupby(["decimalLongitude","decimalLatitude"]).id.count() - _df.groupby(["decimalLongitude","decimalLatitude","species"]).id.count())
es_df = es_df.rename(columns={'id':'n-ni'})
es_df["n"] = _df.groupby(["decimalLongitude","decimalLatitude"]).id.count()
es_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n-ni,n
decimalLongitude,decimalLatitude,species,Unnamed: 3_level_1,Unnamed: 4_level_1
58.360,18.66,Grampus griseus,0,1
58.380,18.77,Aidanosagitta neglecta,43,44
58.380,18.77,Aidanosagitta regularis,43,44
58.380,18.77,Alacia alata,42,44
58.380,18.77,Conchoecetta giesbrechti,42,44
...,...,...,...,...
69.733,16.90,Orbulina universa,10,11
69.733,16.90,Pulleniatina obliquiloculata,10,11
69.733,16.90,Siriella gracilis,9,11
69.733,16.90,Trilobatus sacculifer,10,11


In [10]:
es_df["esi"] = 1 - np.exp((es_df[es_df["n-ni"]>=50]["n-ni"]+1).apply(loggamma) + (es_df[es_df["n-ni"]>=50]['n']-50+1).apply(loggamma) - (es_df[es_df["n-ni"]>=50]["n-ni"]-50+1).apply(loggamma) - (es_df[es_df["n-ni"]>=50]['n']+1).apply(loggamma))

In [11]:
es_df.loc[es_df["n"]==50, "esi"] = 1

In [12]:
pd.DataFrame(es_df.reset_index().groupby(["decimalLongitude","decimalLatitude"]).esi.sum()).reset_index()

Unnamed: 0,decimalLongitude,decimalLatitude,esi
0,58.360,18.660,0.000000
1,58.380,18.770,0.000000
2,58.394,17.310,18.857728
3,58.399,18.028,0.000000
4,58.421,18.099,0.000000
...,...,...,...
823,69.708,18.995,0.000000
824,69.730,16.900,0.000000
825,69.733,16.867,0.000000
826,69.733,16.900,0.000000


## 2. composing everything into a function

In [13]:
def es50(data, precision):
    df = data.dropna(subset=["species"])[["decimalLongitude","decimalLatitude","species","id"]].round({"decimalLongitude":precision,"decimalLatitude":precision})
    es_df = pd.DataFrame(df.groupby(["decimalLongitude","decimalLatitude"]).id.count() - df.groupby(["decimalLongitude","decimalLatitude","species"]).id.count())
    es_df.rename(columns={'id':'n-ni'}, inplace=True)
    es_df["n"] = df.groupby(["decimalLongitude","decimalLatitude"]).id.count()
    es_df["esi"] = 1 - np.exp((es_df[es_df["n-ni"]>=50]["n-ni"]+1).apply(loggamma) + (es_df[es_df["n-ni"]>=50]['n']-50+1).apply(loggamma) - (es_df[es_df["n-ni"]>=50]["n-ni"]-50+1).apply(loggamma) - (es_df[es_df["n-ni"]>=50]['n']+1).apply(loggamma))
    es_df.loc[es_df["n"]==50, "esi"] = 1
    return pd.DataFrame(es_df.reset_index().groupby(["decimalLongitude","decimalLatitude"]).esi.sum()).reset_index()

In [14]:
es50(query.data, 3)

Unnamed: 0,decimalLongitude,decimalLatitude,esi
0,58.360,18.660,0.000000
1,58.380,18.770,0.000000
2,58.394,17.310,18.857728
3,58.399,18.028,0.000000
4,58.421,18.099,0.000000
...,...,...,...
823,69.708,18.995,0.000000
824,69.730,16.900,0.000000
825,69.733,16.867,0.000000
826,69.733,16.900,0.000000
