In [42]:
import requests
import matplotlib
import pandas as pd
from urllib.parse import urlencode
import json
import time

pd.options.display.max_rows  = 1000
pd.options.display.max_columns = 1000

In [43]:
def getOrderFamily(taxon_id):
    delay = 1 # Delay in seconds before sending request to iNat server 
    order = ''
    family = ''
    
    try:
        taxon_id = int(taxon_id)
    except:
        return 'Cannot get taxon_id'
    
    print('taxon_id: {}'.format(taxon_id))
    time.sleep(delay)
    url = 'https://api.inaturalist.org/v1/taxa/{}'.format(taxon_id)
    response = requests.get(url)
    
    if response.status_code != 200:
        print('response.status_code: {}'.format(response.status_code))
        return 'response.status:{}'.format(response.status)
    
    j = json.loads(response.text)       
    
    if 'results' in j:
        for ancestor in j['results'][0]['ancestors']:
            if ancestor['rank'] == 'order':
                order = ancestor['name']
            if ancestor['rank'] == 'family':
                family = ancestor['name']
    return '{}: {}'.format(order, family)

#getOrderFamily(358893)

In [44]:
def make_strings_unique(strings):
    """
    Takes a list of strings as input and adds suffixes to make each string unique.   
    Input: ['a','b','c','a','x','a','b']    
    Returns: ['a', 'b', 'c', 'a_1', 'x', 'a_2', 'b_1']
    
    This function is useful in ensuring unique column names for a pandas data frame:    
    df.columns = make_strings_unique(df.columns.values.tolist())
    """

    n = len(strings)
    for i in range(0, n):
        d = 1
        for j in range(i+1, n):
            if strings[j]==strings[i]:
                strings[j] = '{}_{}'.format(strings[j], d)
                d += 1
    return strings
    
#make_strings_unique(['a','b','c','a','x','a','b'])

In [45]:
def get_inat_obs(q):
    """
    Helpful site: https://api.inaturalist.org/v1/docs/#!/Observations/get_observations
    look for the taxon ids and place ids in the urls on the web site.
    
    q is a dict like this:

    q = {
        #'project_id': 'insects-of-micronesia',
        'd1': '2019-08-01',
        'd2': '2019-12-31',
        'user_login': 'thomascamacho',
        'per_page': 200,
    }
    """
    
    url = "https://api.inaturalist.org/v1/observations?"

    df = pd.DataFrame()

    page = 1
    while True:
        q['page'] = page
        r = requests.get(url, params = q)

        results = r.json()["results"]
        df = pd.concat([df, pd.DataFrame(r.json()["results"])])

        if r.json()["results"] == []:
            print("no more results, stopping")
            break
        if page > 999:
            break

        if page == 1:
            num_pages = int(r.json()["total_results"] / r.json()["per_page"])+1
            print(r.json()["total_results"])
            print(num_pages)
            print("Results:", r.json()["total_results"], ", ", num_pages, " pages total")
        print("Retrieving page ", page)
        page += 1
    return df

"""
df = get_inat_obs({
        #'project_id': 'insects-of-micronesia',
        'd1': '2019-08-01',
        'd2': '2019-12-31',
        'user_login': 'thomascamacho',
        'per_page': 200,})
"""

"\ndf = get_inat_obs({\n        #'project_id': 'insects-of-micronesia',\n        'd1': '2019-08-01',\n        'd2': '2019-12-31',\n        'user_login': 'thomascamacho',\n        'per_page': 200,})\n"

## Get a list of users who added observations to the Insects of Micronesia project between 2019-08-01 and 2019-12-31

In [46]:
df = get_inat_obs({'project_id': 'insects-of-micronesia',
                   'd1': '2019-08-01',
                   'd2': '2019-12-31',
                   'per_page': 200,})

650
4
Results: 650 ,  4  pages total
Retrieving page  1
Retrieving page  2
Retrieving page  3
Retrieving page  4
no more results, stopping


In [47]:
# The 'user' column contains dicts.
# The following expands these dicts into columns and then drops the original column

df = pd.concat([df.drop(['user'], axis=1), df['user'].apply(pd.Series)], axis=1)

In [48]:
df_obs_counts = df.groupby('login').size()
df_obs_counts

login
ajilai                     27
aregee1                    26
arruii_                    28
aubreymoore                 8
chuukese_swag              18
darren126                  21
entomologist_of_mystery    78
eungel6                    25
florentino1                30
hoplog                     22
ippeio                     34
jakemanuel                  5
jayciasoulmai              21
jeffreycentino             49
jessihannah                28
jomasantos                 32
leeroysablan                2
lord_of_diptera            32
lovenny                    31
maddy95                     8
potassiumiodinesodium      35
rosarioc1640                1
stormo10g                  26
thomascamacho              30
vanhalen_671               32
ziabc                       1
dtype: int64

# Create a catalog for each user

In [49]:
def generate_catalog_for_user(user_login, date1, date2):
    """
    """
    df = get_inat_obs({'d1': date1,
                       'd2': date2,
                       'user_login': user_login,
                       'per_page': 200,})

    # The 'user' column contains dicts.
    # The following expands these dicts into columns and then drops the original column

    df = pd.concat([df.drop(['user'], axis=1), df['user'].apply(pd.Series)], axis=1)

    # The 'taxon' column contains dicts.
    # The following expands these dicts into columns and then drops the original column

    df = pd.concat([df.drop(['taxon'], axis=1), df['taxon'].apply(pd.Series)], axis=1)

    # Unfortunately, we end up with multiple columns with identical names
    # The following line fixes this.

    df.columns = make_strings_unique(df.columns.values.tolist())

    # lookup Order and Family

    df['order_family'] = df['id_2'].apply(getOrderFamily)

    def doit(project_observations):
        """ 
        KLUDGE: Finding a string '1627' in the project_observation object to not absolutely indicate
        that the observation is included in the Insects of Micronesia project
        """
        if project_observations and ('1627' in str(df['project_observations'][0])):
            return 'in Insects of Micronesia project'
        else:
            return  'NOT IN Insects of Micronesia project'

    df['in_IOM'] = df['project_observations'].apply(doit)

    df_out = df[['id', 'id_2', 'name', 'name_1', 'observed_on', 'location', 'place_guess', 'order_family', 'in_IOM']]
    df_out.to_html('{}.htm'.format(user_login))
    
#generate_catalog_for_user('aubreymoore', '2019-08-01', '201912-31')

In [50]:
# MAIN

for user_id, count in df_obs_counts.items():
    print(user_id)
    generate_catalog_for_user(user_id, '2019-08-01', '201912-31')
    print()

ajilai
28
1
Results: 28 ,  1  pages total
Retrieving page  1
no more results, stopping
taxon_id: 53225
taxon_id: 47651
taxon_id: 47201
taxon_id: 82177
taxon_id: 184884
taxon_id: 49150
taxon_id: 225604
taxon_id: 47822
taxon_id: 53248
taxon_id: 47157
taxon_id: 47157
taxon_id: 47158
taxon_id: 48128
taxon_id: 184884
taxon_id: 132335
taxon_id: 118903
taxon_id: 318793
taxon_id: 320058
taxon_id: 243700
taxon_id: 121850
taxon_id: 338693
taxon_id: 51615
taxon_id: 328037
taxon_id: 47927
taxon_id: 358893
taxon_id: 51588
taxon_id: 119344
taxon_id: 123519

aregee1
32
1
Results: 32 ,  1  pages total
Retrieving page  1
no more results, stopping
taxon_id: 52756
taxon_id: 54328
taxon_id: 69143
taxon_id: 82231
taxon_id: 53248
taxon_id: 143043
taxon_id: 553417
taxon_id: 48128
taxon_id: 125816
taxon_id: 121821
taxon_id: 320058
taxon_id: 245840
taxon_id: 52585
taxon_id: 51276
taxon_id: 53225
taxon_id: 132335
taxon_id: 52044
taxon_id: 49150
taxon_id: 47649
taxon_id: 448836
taxon_id: 120489
taxon_id: 52747
t

taxon_id: 52747
taxon_id: 52031
taxon_id: 82231
taxon_id: 113503
taxon_id: 51588
taxon_id: 88304
taxon_id: 49150
taxon_id: 51614
taxon_id: 125349
taxon_id: 125833
taxon_id: 120489
taxon_id: 201638
taxon_id: 53248
taxon_id: 52402
taxon_id: 132449
taxon_id: 357479
taxon_id: 82231
taxon_id: 47651
taxon_id: 893823
taxon_id: 52756
taxon_id: 113503
taxon_id: 52747
taxon_id: 49150
taxon_id: 244223
taxon_id: 124467
taxon_id: 47649
taxon_id: 523283
taxon_id: 328037
taxon_id: 328037
taxon_id: 213322
taxon_id: 121183
taxon_id: 51588
taxon_id: 68139
taxon_id: 51614
taxon_id: 125349
taxon_id: 125833
taxon_id: 47219

jessihannah
35
1
Results: 35 ,  1  pages total
Retrieving page  1
no more results, stopping
taxon_id: 125816
taxon_id: 47157
taxon_id: 47822
taxon_id: 47208
taxon_id: 49470
taxon_id: 83201
taxon_id: 47792
taxon_id: 81769
taxon_id: 328037
taxon_id: 118903
taxon_id: 141725
taxon_id: 67373
taxon_id: 347934
taxon_id: 154552
taxon_id: 199595
taxon_id: 122321
taxon_id: 343248
taxon_id: 343248