In [28]:
import requests
import pandas as pd

In [29]:
def get_inat_obs(q):
    """
    Helpful site: https://api.inaturalist.org/v1/docs/#!/Observations/get_observations
    look for the taxon ids and place ids in the urls on the web site.
    
    q is a dict like this:

    q = {
        #'project_id': 'insects-of-micronesia',
        'd1': '2019-08-01',
        'd2': '2019-12-31',
        'user_login': 'thomascamacho',
        'per_page': 200,
    }
    """
    
    url = "https://api.inaturalist.org/v1/observations?"

    df = pd.DataFrame()

    page = 1
    while True:
        q['page'] = page
        r = requests.get(url, params = q)

        results = r.json()["results"]
        df = pd.concat([df, pd.DataFrame(r.json()["results"])])

        if r.json()["results"] == []:
            print("no more results, stopping")
            break
        if page > 999:
            break

        if page == 1:
            num_pages = int(r.json()["total_results"] / r.json()["per_page"])+1
            print(r.json()["total_results"])
            print(num_pages)
            print("Results:", r.json()["total_results"], ", ", num_pages, " pages total")
        print("Retrieving page ", page)
        page += 1
    return df

"""
df = get_inat_obs({
        #'project_id': 'insects-of-micronesia',
        'd1': '2019-08-01',
        'd2': '2019-12-31',
        'user_login': 'thomascamacho',
        'per_page': 200,})
"""

"\ndf = get_inat_obs({\n        #'project_id': 'insects-of-micronesia',\n        'd1': '2019-08-01',\n        'd2': '2019-12-31',\n        'user_login': 'thomascamacho',\n        'per_page': 200,})\n"

In [30]:
df = get_inat_obs({'project_id': 'insects-of-micronesia', 'd1': '2018-06-15', 'per_page': 200,})
obs_ids = set() # A set for obs ids

1068
6
Results: 1068 ,  6  pages total
Retrieving page  1
Retrieving page  2
Retrieving page  3
Retrieving page  4
Retrieving page  5
Retrieving page  6
no more results, stopping


In [31]:
# Search 'description' field for the string 'new'

for i, r in df.iterrows():     
    desc = r['description']   
    if desc and ('new island record' in desc.lower()):
        print(r['id'], r['species_guess'])
        print(desc)
        obs_ids.add(r['id'])
        print('-----')

36285968 Megymenum affine
Three specimens from a cucumber farm collected by Leonard Sigrah.

Appears to be a new island record for Kosrae.

This appears to be the "Truk stink bug" listed in <a href="https://aubreymoore.github.io/crop-pest-list/list.html#Cucumber">Nafus 1997. An Insect Survey of the Federate States of Micronesia and Palau</a>. Also listed in <a href="https://books.google.com/books?id=VD0rAAAAYAAJ">Bryan 1949. Economic Insects of Micronesia: Report of the Insect Control Committee for Micronesia, 1947-1948.
-----
31326484 Chelonus formosanus
Collected by Tedi Mary at Country Club of the Pacific, Guam.

Numerous above turf on golf course. This is a nuisance pest flying into faces of golfers.

Body length 5 mm.

Looks like <i>Chelonus formosanus</i>. See http://www.nbair.res.in/Featured_insects/Chelonus-formosanus.php

Possibly a new island record as a fortuitously introduced biocontrol agent. This is an egg-larval parasitoid of <i>Spodoptera</i>.

-----
293332

In [32]:
# Search comments for the string 'new'

for i, r in df.iterrows():
    if r['comments_count'] > 0:
        for comment in r['comments']:
            body = comment['body']
            if 'new island record' in body.lower():
                print(r['id'], r['species_guess'])
                print(body)
                obs_ids.add(r['id'])
                print('-----')

36470788 Gulf Fritillary
new island record for Tinian
-----
35845152 Gulf Fritillary
new island record for Saipan
-----
32572967 Canegrub
Probably a new island record for Guam.
-----
18166461 Gulf Fritillary
This is a new island record for Guam.
-----


In [33]:
obs_ids = sorted(obs_ids)

In [39]:
# Generate bibtex items.

template = """    
@online{{ x,
  title = {{ {} }},
  author = {{ iNaturalist }},
  date = {{ {} }},
  url = {{ {} }},
}}"""

bibtex = ''
for obs_id in obs_ids:
    df = get_inat_obs({'id': obs_id})
    for i, r in df.iterrows():
        title = 'iNaturalist observation {}: {}'.format(r['id'], r['species_guess']) 
        date = r['observed_on']
        url = 'https://www.inaturalist.org/observations/{}'.format(r['id'])

        bibtex_item = template.format(title, date, url)        
        bibtex += bibtex_item
print(bibtex)

1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping
    
@online{ x,
  title = { iNaturalist observation 13466275: Citripestis eutraphera },
  author = { iNaturalist },
  date = { 2018-06-15 },


In [38]:
obs_ids

[13466275,
 15067449,
 15747194,
 16734728,
 18166461,
 29333274,
 31326484,
 32572967,
 35845152,
 36285968,
 36470788]

In [35]:
for column in df.columns:
    print(column)

annotations
cached_votes_total
captive
comments
comments_count
community_taxon_id
context_geoprivacy
context_taxon_geoprivacy
context_user_geoprivacy
created_at
created_at_details
created_time_zone
description
faves
faves_count
flags
geojson
geoprivacy
id
id_please
ident_taxon_ids
identifications
identifications_count
identifications_most_agree
identifications_most_disagree
identifications_some_agree
license_code
location
map_scale
mappable
non_owner_ids
num_identification_agreements
num_identification_disagreements
oauth_application_id
obscured
observation_photos
observed_on
observed_on_details
observed_on_string
observed_time_zone
ofvs
out_of_range
outlinks
owners_identification_from_vision
photos
place_guess
place_ids
positional_accuracy
preferences
project_ids
project_ids_with_curator_id
project_ids_without_curator_id
project_observations
public_positional_accuracy
quality_grade
quality_metrics
reviewed_by
site_id
sounds
spam
species_guess
tags
taxon
taxon_geoprivacy
time_observed_

In [36]:
df = get_inat_obs({'id': 3584515})

1
1
Results: 1 ,  1  pages total
Retrieving page  1
no more results, stopping


In [37]:
df

Unnamed: 0,annotations,cached_votes_total,captive,comments,comments_count,community_taxon_id,context_geoprivacy,context_taxon_geoprivacy,context_user_geoprivacy,created_at,...,tags,taxon,taxon_geoprivacy,time_observed_at,time_zone_offset,updated_at,uri,user,uuid,votes
0,[],0,False,[],0,79383,,,,2016-07-01T21:16:01-05:00,...,[],"{'threatened': False, 'introduced': False, 'ra...",,2016-07-01T08:00:00-05:00,-06:00,2016-07-01T21:49:07-05:00,http://www.inaturalist.org/observations/3584515,"{'roles': ['curator'], 'login_autocomplete': '...",7b9da3f9-d2e4-4db6-88ce-c0f6583d87aa,[]
