# Setup

In [1]:
import json
import os
import sys

sys.path.append('..')

import pandas as pd
import numpy as np

from utils import read_csv_non_utf

In [2]:
# Loading in general configuration
with open('../config.json', 'r') as f:
    config = json.load(f)

# Getting filepaths
gdrive_fp = config['gdrive_path']
LIFE_fp = config['LIFE_folder']
dataset_fp = config['datasets_path']

#  elton traits data path
elton_traits_base = config['indiv_data_paths']['elton_traits']['base_path']
elton_traits_birds = config['indiv_data_paths']['elton_traits']['birds']
elton_traits_mammals = config['indiv_data_paths']['elton_traits']['mammals']

traits_bird_path = os.path.join(gdrive_fp, LIFE_fp, dataset_fp, elton_traits_base, elton_traits_birds)
traits_mam_path = os.path.join(gdrive_fp, LIFE_fp, dataset_fp, elton_traits_base, elton_traits_mammals)

#  hunting effects data path
benitez_lopez2019 = config['indiv_data_paths']['benitez_lopez2019']
ben_lop_path = os.path.join(gdrive_fp, LIFE_fp, dataset_fp, benitez_lopez2019)

In [3]:
# Reading in hunting data as a pandas dataframe
ben_lop2019 = read_csv_non_utf(ben_lop_path)
ben_lop2019.head()

Unnamed: 0,Reference,Study,Order,Family,Species,ratio,RR,X,Y,Region,Country,BM,Diet,DistKm,Reserve,TravTime,LivestockBio,Stunting,PopDens,Literacy
0,"Laurance et al., 2006",1,Cetartiodactyla,Bovidae,"Cephalophus callipygus, C. dorsalis, C. leucog...",0.377193,-0.974594,9.839,-1.916,Africa,Gabon,17.07,Herb,0.05,No,755.8,39.25948,22.0,0.86,81.8
1,"Laurance et al., 2006",1,Proboscidea,Elephantidae,Loxodonta africana,0.86569,-0.144053,9.839,-1.916,Africa,Gabon,3940.03,Herb,0.05,No,755.8,39.25948,22.0,0.86,81.8
2,"Laurance et al., 2006",1,Cetartiodactyla,Bovidae,"Cephalophus callipygus, C. dorsalis, C. leucog...",0.833333,-0.182139,9.839,-1.916,Africa,Gabon,17.07,Herb,0.3,No,755.8,39.25948,22.0,0.86,81.8
3,"Laurance et al., 2006",1,Proboscidea,Elephantidae,Loxodonta africana,0.900862,-0.104234,9.839,-1.916,Africa,Gabon,3940.03,Herb,0.3,No,755.8,39.25948,22.0,0.86,81.8
4,"Laurance et al., 2006",1,Cetartiodactyla,Bovidae,"Cephalophus callipygus, C. dorsalis, C. leucog...",0.95614,-0.044691,9.839,-1.916,Africa,Gabon,17.07,Herb,0.6,No,755.8,39.25948,22.0,0.86,81.8


In [4]:
# Reading in EltonTraits data - dropping empty rows at the end of the file!
bird_traits = read_csv_non_utf(traits_bird_path, delimiter = '\t').dropna(axis = 0, how = 'all')
mammal_traits = read_csv_non_utf(traits_mam_path, delimiter = '\t').dropna(axis = 0, how = 'all')

mammal_traits.head()

Unnamed: 0,MSW3_ID,Scientific,MSWFamilyLatin,Diet-Inv,Diet-Vend,Diet-Vect,Diet-Vfish,Diet-Vunk,Diet-Scav,Diet-Fruit,...,ForStrat-Certainty,ForStrat-Comment,Activity-Nocturnal,Activity-Crepuscular,Activity-Diurnal,Activity-Source,Activity-Certainty,BodyMass-Value,BodyMass-Source,BodyMass-SpecLevel
0,1.0,Tachyglossus aculeatus,Tachyglossidae,100.0,0.0,0.0,0.0,0.0,0.0,0.0,...,A,,1.0,1.0,0.0,Ref_1,ABC,3025.0,Ref_117,1.0
1,2.0,Zaglossus attenboroughi,Tachyglossidae,100.0,0.0,0.0,0.0,0.0,0.0,0.0,...,A,,1.0,0.0,0.0,Ref_1,ABC,8532.39,"Ref_2, Ref_3",0.0
2,3.0,Zaglossus bartoni,Tachyglossidae,100.0,0.0,0.0,0.0,0.0,0.0,0.0,...,A,,1.0,0.0,0.0,Ref_1,ABC,7180.0,Ref_131,1.0
3,4.0,Zaglossus bruijni,Tachyglossidae,100.0,0.0,0.0,0.0,0.0,0.0,0.0,...,A,,1.0,0.0,0.0,Ref_1,ABC,10139.5,Ref_117,1.0
4,5.0,Ornithorhynchus anatinus,Ornithorhynchidae,80.0,0.0,0.0,20.0,0.0,0.0,0.0,...,A,,1.0,1.0,1.0,Ref_1,ABC,1484.25,Ref_117,1.0


# Extracting body mass values for Benitez-Lopez et al. (2019)

Looks like there are a sizable number of species in our dataset that don't appear in EltonTraits
- some are misspelled in Benitez-Lopez (e.g., "Cebus capuchinus")
- some use outdated names in EltonTraits (e.g., "Sapajus apella" is "Cebus apella")
- similarly, some Benitez-Lopez use outdated names (e.g., "Procolobus pennantii" should be "Piliocolobus pennantii")
- some species have a domesticated form that is in EltonTraits (e.g., "Bos frontalis" is the domesticated version "Bos gaurus")

In [5]:
# For now, I'm going to work with a subset that doesn't have multiple species listed
exclude_str = [',', ' and ', ' or ', ' sp']
exclude = np.repeat(False, len(ben_lop2019))
for e in exclude_str:
    exclude = exclude | ben_lop2019['Species'].str.contains(e)
exclude = ~exclude

ben_lop_sub = ben_lop2019[exclude]
ben_lop_sub = ben_lop_sub[['Reference', 'Species', 'ratio', 'X', 'Y']].copy(deep = True)

body_mass_mammals = mammal_traits[['Scientific', 'BodyMass-Value']].copy(deep = True)
body_mass_mammals = body_mass_mammals.rename(columns = {'Scientific' : 'Species', 'BodyMass-Value' : 'BodyMass'})

In [6]:
in_dataset = np.array([s in body_mass_mammals['Species'].tolist() for s in ben_lop_sub['Species']])
ben_lop_sub[~in_dataset]['Species'].value_counts()

Species
Sapajus apella              54
Cebus capuchinus            27
Procolobus pennantii        17
Bos gaurus                  14
Cephalophus harveyi         14
Procolobus gordonorum       11
Cebuella pygmaea            11
Sapajus cay                  9
Cephalophus nigrifons        7
Tragelaphus oryx             6
Mazama nemorivaga            6
Pseudalopex culpaeus         6
Caracal aurata               4
Smutsia gigantea             2
Rhynchocyon udzungwensis     2
Dendrohyrax validus          2
Pliocolobus badius           2
Marmosa demerarae            2
Bassaricyon medius           2
Muntiacus vaginalis          2
Name: count, dtype: int64

In [7]:
# A left join gets us most of the way there, but leaves us with some un-matched species...
merged_dfs = pd.merge(ben_lop_sub, body_mass_mammals, how = 'left', on = 'Species')
merged_dfs.head()

Unnamed: 0,Reference,Species,ratio,X,Y,BodyMass
0,"Laurance et al., 2006",Loxodonta africana,0.86569,9.839,-1.916,3940034.28
1,"Laurance et al., 2006",Loxodonta africana,0.900862,9.839,-1.916,3940034.28
2,"Laurance et al., 2006",Loxodonta africana,1.099138,9.839,-1.916,3940034.28
3,"Laurance et al., 2006",Loxodonta africana,1.146552,9.839,-1.916,3940034.28
4,"Laurance et al., 2006",Loxodonta africana,1.0,9.839,-1.916,3940034.28
