In [14]:
from datetime import datetime

import json
import pandas as pd
import requests

In [15]:
BASE_URL = 'https://api.inaturalist.org/v1/observations'

OREGON_PLACE_ID = 10
WASHINGTON_PLACE_ID = 46

CAUDATA_TAXON_ID = 26718

In [16]:
headers = [
    'inaturalist_id', 'observed_date', 'observed_month', 'observed_hour', 'observed_week', 
    'observed_year', 'observed_day', 'species_guess', 'identifications_most_disagree', 'place_ids', 
    'location', 'endemic', 'native', 'introduced', 'threatened', 
    'name', 'rank', 'taxon_id', 'wikipedia_url', 'preferred_common_name'
]

In [17]:


def query_get_observations(place_id, taxon_id=CAUDATA_TAXON_ID):
    
    salamander_dict = {k: [] for k in headers}
    
    params = {
    'taxon_id': CAUDATA_TAXON_ID,
    'place_id': place_id,
    'captive': 'false',
    'per_page': 100
    }
    
    request = requests.get(BASE_URL, params=params)
    request_json = request.json()
    
    print(request.status_code)

    total_results = request_json['total_results']
    results_per_page = request_json['per_page']
    num_pages = int(total_results / results_per_page) + 1
    print(f'{num_pages=}')
    
    params['page'] = 0 # add a starting page to the params dict
    num_pages = 1 # remove this 
    for n in range(num_pages):

        params['page'] += 1
        print(params['page'])

        request = requests.get(BASE_URL, params=params)
        print(request.url)
        print(request.json().keys())
        request_json = request.json()['results']
        

        for r in request_json:
            salamander_dict['inaturalist_id'].append(r.get('id'))

            observed_on_details = r.get('observed_on_details', {})
            # there are times when the 'observed_on_details' key is present, but with a None value
            if observed_on_details is None: 
                observed_on_details = {}

            salamander_dict['observed_date'].append(observed_on_details.get('date'))
            salamander_dict['observed_month'].append(observed_on_details.get('month'))
            salamander_dict['observed_hour'].append(observed_on_details.get('hour'))
            salamander_dict['observed_week'].append(observed_on_details.get('week'))
            salamander_dict['observed_year'].append(observed_on_details.get('year'))
            salamander_dict['observed_day'].append(observed_on_details.get('day'))

            salamander_dict['species_guess'].append(r.get('species_guess'))
            salamander_dict['identifications_most_disagree'].append(r.get('identifications_most_disagree'))
            salamander_dict['place_ids'].append(r.get('place_ids'))
            salamander_dict['location'].append(r.get('location'))

            taxon = r.get('taxon', {})
            salamander_dict['endemic'].append(taxon.get('endemic'))
            salamander_dict['native'].append(taxon.get('native'))
            salamander_dict['introduced'].append(taxon.get('introduced'))
            salamander_dict['threatened'].append(taxon.get('threatened'))
            salamander_dict['name'].append(taxon.get('name'))
            salamander_dict['rank'].append(taxon.get('rank'))
            salamander_dict['taxon_id'].append(taxon.get('id'))
            salamander_dict['wikipedia_url'].append(taxon.get('wikipedia_url'))
            salamander_dict['preferred_common_name'].append(taxon.get('preferred_common_name'))

    return salamander_dict

# or_data = query_get_observations(place_id=10)    
wa_data = query_get_observations(place_id=46)

200
num_pages=64
1
https://api.inaturalist.org/v1/observations?taxon_id=26718&place_id=46&captive=false&per_page=100&page=1
dict_keys(['total_results', 'page', 'per_page', 'results'])


In [17]:
wa_df = pd.DataFrame(wa_data)
wa_df['observed_date'] = pd.to_datetime(wa_df['observed_date'])
wa_df.dtypes

inaturalist_id                            int64
observed_date                    datetime64[ns]
observed_month                          float64
observed_hour                           float64
observed_week                           float64
observed_year                           float64
observed_day                            float64
species_guess                            object
identifications_most_disagree              bool
place_ids                                object
location                                 object
endemic                                    bool
native                                     bool
introduced                                 bool
threatened                                 bool
name                                     object
rank                                     object
taxon_id                                  int64
wikipedia_url                            object
preferred_common_name                    object
dtype: object

# Response Notes:

* 'id' = # the iNaturalist ID
* 'observed_on_details' =  # a dict with the date, week, month, hour, year, and day
* 'location' = 'lat, lon',
* 'geojson' = # a dict with the coordinates and the type of observation, e.g. {'coordinates': [lng, lat], 'type': 'type'} 

* d1 - Must be observed on or after this date
* d2 - Must be observed on or before this date

# Documentation 
https://api.inaturalist.org/v1/docs/#/

## OBSERVATIONS
Given zero to many of following parameters, returns observations matching the search criteria. 
The large size of the observations index prevents us from supporting the page parameter when retrieving records from large result sets. 
If you need to retrieve large numbers of records, use the per_page and id_above or id_below parameters instead.
https://api.inaturalist.org/v1/docs/#!/Observations/get_observations

## TAXA
Given an ID, or an array of IDs in comma-delimited format, returns corresponding taxa. A maximum of 30 results will be returned
Returns an object with metadata and a results array of taxa
https://api.inaturalist.org/v1/taxa/26718

## PLACES
Given an ID, or an array of IDs in comma-delimited format, returns corresponding places. A maximum of 500 results will be returned
Returns an object with metadata and an results array of places
https://api.inaturalist.org/v1/places/6712,6883,9853,27591,57637



In [10]:
from db_connection import ENGINE
import pandas as pd


In [11]:
max_observed_date = '''
    SELECT 
         MAX(observed_date) as max_observed_date
    FROM washington_oregon_salamanders
    '''

In [12]:
try:
    df = pd.read_sql(
            max_observed_date,
            con=ENGINE,
            index_col=None
        )
    date_after = df['max_observed_date'][0].strftime('%Y-%m-%d')
    
except ProgrammingError as pe:
    # the table has not been created, so we're starting from "scratch"
    date_after = None

Exception Caught: (psycopg2.errors.UndefinedTable) relation "washington_oregon_salamanders" does not exist
LINE 4:     FROM washington_oregon_salamanders
                 ^

[SQL: 
    SELECT 
         MAX(observed_date) as max_observed_date
    FROM washington_oregon_salamanders
    ]
(Background on this error at: https://sqlalche.me/e/14/f405)


In [9]:
e

NameError: name 'e' is not defined

In [8]:
df

Unnamed: 0,max_observed_date
0,2022-12-28


In [17]:
df['max_observed_date'][0].strftime('%Y-%m-%d')

'2022-12-28'