In [None]:
import requests
import matplotlib
import pandas as pd
from urllib.parse import urlencode
import json
import time
import os

pd.options.display.max_rows  = 1000
pd.options.display.max_columns = 1000
pd.set_option('display.max_colwidth',1000)

In [None]:
def getOrderFamily(taxon_id):
    delay = 1 # Delay in seconds before sending request to iNat server 
    order = ''
    family = ''
    
    try:
        taxon_id = int(taxon_id)
    except:
        return 'Cannot get taxon_id'
    
    print('taxon_id: {}'.format(taxon_id))
    time.sleep(delay)
    url = 'https://api.inaturalist.org/v1/taxa/{}'.format(taxon_id)
    response = requests.get(url)
    
    if response.status_code != 200:
        print('response.status_code: {}'.format(response.status_code))
        return 'response.status: {}'.format(response.status)
    
    j = json.loads(response.text) 
    
    if 'results' in j:
                
        for ancestor in j['results'][0]['ancestors']:
            if ancestor['rank'] == 'order':
                order = ancestor['name']
            if ancestor['rank'] == 'family':
                family = ancestor['name']
                
        name = j['results'][0]['name']
        rank = j['results'][0]['rank']

        # The following code comes into play when the taxon rank is 'order' or 'family'
        
        if rank == 'order':
            order = name
            
        if rank == 'family':
            family = name
        
    return order, family

#getOrderFamily(47208)

In [None]:
def make_strings_unique(strings):
    """
    Takes a list of strings as input and adds suffixes to make each string unique.   
    Input: ['a','b','c','a','x','a','b']    
    Returns: ['a', 'b', 'c', 'a_1', 'x', 'a_2', 'b_1']
    
    This function is useful in ensuring unique column names for a pandas data frame:    
    df.columns = make_strings_unique(df.columns.values.tolist())
    """

    n = len(strings)
    for i in range(0, n):
        d = 1
        for j in range(i+1, n):
            if strings[j]==strings[i]:
                strings[j] = '{}_{}'.format(strings[j], d)
                d += 1
    return strings
    
#make_strings_unique(['a','b','c','a','x','a','b'])

In [None]:
def get_inat_obs(q):
    """
    Helpful site: https://api.inaturalist.org/v1/docs/#!/Observations/get_observations
    look for the taxon ids and place ids in the urls on the web site.
    
    q is a dict like this:

    q = {
        #'project_id': 'insects-of-micronesia',
        'd1': '2019-08-01',
        'd2': '2019-12-31',
        'user_login': 'thomascamacho',
        'per_page': 200,
    }
    """
    
    url = "https://api.inaturalist.org/v1/observations?"

    df = pd.DataFrame()

    page = 1
    while True:
        q['page'] = page
        r = requests.get(url, params = q)

        results = r.json()["results"]
        df = pd.concat([df, pd.DataFrame(r.json()["results"])])

        if r.json()["results"] == []:
            print("no more results, stopping")
            break
        if page > 999:
            break

        if page == 1:
            num_pages = int(r.json()["total_results"] / r.json()["per_page"])+1
            print(r.json()["total_results"])
            print(num_pages)
            print("Results:", r.json()["total_results"], ", ", num_pages, " pages total")
        print("Retrieving page ", page)
        page += 1
    return df

"""
df = get_inat_obs({
        #'project_id': 'insects-of-micronesia',
        'd1': '2019-08-01',
        'd2': '2019-12-31',
        'user_login': 'thomascamacho',
        'per_page': 200,})
"""

## Get a list of users who added observations to the Insects of Micronesia project between 2019-08-01 and 2019-12-31

In [None]:
def list_project_users(project, date1, date2):
    """
    Returns a pandas series containing a list of iNat user names and number of obs submitted to a project between
    date1 and date2 inclusive.
    
    To iterate over this series, use:   
        for user_id, count in obs_counts.items():
    """
    df = get_inat_obs({'project_id': project,
                       'd1': date1,
                       'd2': date2,
                       'per_page': 200,})

    # The 'user' column contains dicts.
    # The following expands these dicts into columns and then drops the original column

    df = pd.concat([df.drop(['user'], axis=1), df['user'].apply(pd.Series)], axis=1)

    df_obs_counts = df.groupby('login').size()
    return df_obs_counts

#obs_counts = list_project_users('insects-of-micronesia', '2019-08-01', '2019-12-31')


# Create a catalog for each user

In [None]:
def generate_catalog_for_user(user_login, date1, date2):
    """
    """
    df = get_inat_obs({'d1': date1,
                       'd2': date2,
                       'user_login': user_login,
                       'per_page': 200,})

    # The 'user' column contains dicts.
    # The following expands these dicts into columns and then drops the original column

    df = pd.concat([df.drop(['user'], axis=1), df['user'].apply(pd.Series)], axis=1)

    # The 'taxon' column contains dicts.
    # The following expands these dicts into columns and then drops the original column

    df = pd.concat([df.drop(['taxon'], axis=1), df['taxon'].apply(pd.Series)], axis=1)
    
    # The 'project_observations' column contains dicts.
    # The following expands these dicts into columns and then drops the original column

    df = pd.concat([df.drop(['project_observations'], axis=1), df['project_observations'].apply(pd.Series)], axis=1)

    # Unfortunately, we end up with multiple columns with identical names
    # The following line fixes this.

    df.columns = make_strings_unique(df.columns.values.tolist())

    # lookup Order and Family
    
    #df_temp = df.id_2.apply(lambda s: pd.Series(lookup_order_family(s)))
    
    df = pd.concat([df, df.id_2.apply(lambda s: pd.Series(lookup_order_family(s)))], axis=1)
    
    # Create a boolean column to indicate which records are in the insects-of-micronesia project (project_id = 1627)

    df['in_IOM'] = df.project_ids.apply(lambda x: 1627 in x)
    
    # Create latitude and longitude columns
    
    def doit(x):
        try:
            return'{:.6f}'.format(float(x.split(',')[0]))
        except:
            return ''
    
    df['latitude'] = df.location.apply(lambda x: doit(x))
    df['longitude'] = df.location.apply(lambda x: doit(x))
    
    # Create obs_link column
    
    df['obs_link'] = df.id.apply(
        lambda x: '<a href="https://www.inaturalist.org/observations/{}">observation</a>'.format(x))

    # Create tax_link column
    
    df['tax_link'] = df.id_2.apply(
        lambda x: '<a href="https://www.inaturalist.org/taxa/{}">taxon</a>'.format(x))

    # Output catalog as an HTML document in a directory created for user
    
    df_out = df[['id', 'obs_link', 'id_2', 'tax_link', 'name', 'name_1', 'order', 'family', 'observed_on',
                  'place_guess', 'latitude', 'longitude','in_IOM']]
    
    if not os.path.exists(user_login):
        os.mkdir(user_login)    
    
    df_out.to_html('{}/catalog.htm'.format(user_login), escape=False, table_id=user_login)
    return df

df = generate_catalog_for_user('jakemanuel', '2019-08-01', '201912-31')

In [None]:
def save_order_family_dict(filepath, ofd):
    """
    Saves a dict with this format: {'954232': {'order': 'Coleoptera', 'family': 'Callirhipidae'}}
    """
    with open(filepath, "w") as f:
        json.dump(ofd, f)
    return ofd

In [None]:
def lookup_order_family(tid):
    """
    Returns a dict in the form of: {'family': 'Scarabaeidae', 'order': 'Coleoptera' }    
    """
    if pd.isna(tid):
        return {'order': '', 'family': ''}
    print(tid)
    tid = str(int(tid)) #Ensure that tid is a string representation of an integer
    if tid not in ofd:
        print('tid not in ofd')
        order, family = getOrderFamily(tid)
        ofd[tid] = {'order': order, 'family': family}
    return ofd[tid]   

#lookup_order_family('48201')

In [None]:
def load_order_family_dict(filepath): 
    """
    
    """
    
    # If the file does not exit, create it.
    if not os.path.isfile(filepath):
        save_order_family_dict(filepath, {'954232': {'order': 'Coleoptera', 'family': 'Callirhipidae'}})
                               
    with open(filepath, "r") as f:
        ofd = json.load(f)
    return ofd

#load_order_family_dict('order_family_dict.json')

In [None]:
# MAIN

ofd_filepath = 'order_family_dict.json'
ofd = load_order_family_dict(ofd_filepath)

obs_counts = list_project_users('insects-of-micronesia', '2019-08-01', '2019-12-31')
for user_id, count in obs_counts.items():
    print(user_id)
    generate_catalog_for_user(user_id, '2019-08-01', '201912-31')
    print()
    
save_order_family_dict(ofd_filepath, ofd)

print('FINIS')

# TESTING 1,2,3