In [53]:
import requests
import matplotlib
import pandas as pd
from urllib.parse import urlencode
import json

pd.options.display.max_rows  = 1000
pd.options.display.max_columns = 1000

In [54]:
def getOrderFamily(taxon_id):
    url = 'https://api.inaturalist.org/v1/taxa/{}'.format(taxon_id)
    response = requests.get(url)
    j = json.loads(response.text)       
    
    order = ''
    family = ''
    for ancestor in j['results'][0]['ancestors']:
        if ancestor['rank'] == 'order':
            order = ancestor['name']
        if ancestor['rank'] == 'family':
            family = ancestor['name']
    return '{}: {}'.format(order, family)

#getOrderFamily(358893)

In [55]:
# Helpful site: https://api.inaturalist.org/v1/docs/#!/Observations/get_observations
# look for the taxon ids and place ids in the urls on the web site.

q = {
    #'project_id': 'insects-of-micronesia',
    'd1': '2019-08-01',
    'd2': '2019-12-31',
    'user_login': 'thomascamacho',
    'per_page': 200,
}

url = "https://api.inaturalist.org/v1/observations?"

df = pd.DataFrame()

page = 1
while True:
    q['page'] = page
    r = requests.get(url, params = q)
    
    results = r.json()["results"]
    df = pd.concat([df, pd.DataFrame(r.json()["results"])])

    if r.json()["results"] == []:
        print("no more results, stopping")
        break
    if page > 999:
        break
        
    if page == 1:
        num_pages = int(r.json()["total_results"] / r.json()["per_page"])+1
        print(r.json()["total_results"])
        print(num_pages)
        print("Results:", r.json()["total_results"], ", ", num_pages, " pages total")
    print("Retrieving page ", page)
    page += 1

23
1
Results: 23 ,  1  pages total
Retrieving page  1
no more results, stopping


In [56]:
# The 'user' column contains dicts.
# The following expands these dicts into columns and then drops the original column

df = pd.concat([df.drop(['user'], axis=1), df['user'].apply(pd.Series)], axis=1)

# The 'taxon' column contains dicts.
# The following expands these dicts into columns and then drops the original column

df = pd.concat([df.drop(['taxon'], axis=1), df['taxon'].apply(pd.Series)], axis=1)

# Unfortunately, we end up with multiple columns with identical names
# The following renamer class fixes this.

class renamer():
    
    def __init__(self):
        self.d = dict()

    def __call__(self, x):
        if x not in self.d:
            self.d[x] = 0
            return x
        else:
            self.d[x] += 1
        return "%s_%d" % (x, self.d[x])

df = df.rename(columns=renamer())

In [59]:
# lookup Order and Family

df['order_family'] = df['id_2'].apply(getOrderFamily)

0      Lepidoptera: Nymphalidae
1                            : 
2                            : 
3     Coleoptera: Coccinellidae
4      Coleoptera: Scarabaeidae
5      Coleoptera: Scarabaeidae
6         Odonata: Libellulidae
7          Blattodea: Blattidae
8         Odonata: Libellulidae
9     Orthoptera: Tettigoniidae
10        Hemiptera: Rhopalidae
11                 Orthoptera: 
12                 Orthoptera: 
13    Orthoptera: Tettigoniidae
14        Hymenoptera: Vespidae
15                Hymenoptera: 
16     Lepidoptera: Nymphalidae
17     Lepidoptera: Hesperiidae
18     Lepidoptera: Nymphalidae
19    Lepidoptera: Papilionidae
20        Lepidoptera: Pieridae
21     Lepidoptera: Nymphalidae
22                Hymenoptera: 
Name: id_2, dtype: object

In [57]:
print(df.dtypes)

annotations                           object
cached_votes_total                     int64
captive                                 bool
comments                              object
comments_count                         int64
community_taxon_id                   float64
context_geoprivacy                    object
context_taxon_geoprivacy              object
context_user_geoprivacy               object
created_at                            object
created_at_details                    object
created_time_zone                     object
description                           object
faves                                 object
faves_count                            int64
flags                                 object
geojson                               object
geoprivacy                            object
id                                     int64
id_please                               bool
ident_taxon_ids                       object
identifications                       object
identifica

In [58]:
df[['id', 'id_2', 'name', 'name_1', 'observed_on', 'location', 'place_guess', 'order_family']]

Unnamed: 0,id,id_2,name,name_1,observed_on,location,place_guess
0,36090342,358893,Thomas Camacho,Euploea eunice,2019-11-26,"13.4588805298,144.7514570649","Guam, Hagatna, GU, US"
1,36048664,47792,Thomas Camacho,Odonata,2019-11-25,"13.4306019588,144.8007993958","University of Guam, Mangilao, GU, US"
2,35909225,47208,Thomas Camacho,Coleoptera,2019-11-21,"13.4305741596,144.8007145043","University of Guam, Mangilao, GU, US"
3,35909199,319910,Thomas Camacho,Cheilomenes sexmaculata,2019-11-18,"13.4608354743,144.7540747228","Guam, Sinajana, GU, US"
4,35909177,320058,Thomas Camacho,Oryctes rhinoceros,2019-11-21,"13.4305565811,144.8006473084","University of Guam, Mangilao, GU, US"
5,35909169,91347,Thomas Camacho,Protaetia,2019-11-21,"13.4305570409,144.800650176","University of Guam, Mangilao, GU, US"
6,35909154,51556,Thomas Camacho,Sympetrum,2019-11-21,"13.4305654426,144.8006106028","University of Guam, Mangilao, GU, US"
7,35909151,82231,Thomas Camacho,Periplaneta americana,2019-11-21,"13.4305554516,144.8006536617","University of Guam, Mangilao, GU, US"
8,35909147,108344,Thomas Camacho,Pantala flavescens,2019-11-21,"13.430557195,144.8006120647","University of Guam, Mangilao, GU, US"
9,35909137,347888,Thomas Camacho,Euconocephalus nasutus,2019-11-21,"13.4305461427,144.8006431959","University of Guam, Mangilao, GU, US"
