# Insect Pin Labels from iNaturalist Project Data
 
This iPython notebook provides code for generating PDF pages of small labels for pinned insect specimens from data stored in an iNaturalist project. 

It was originally developed for my University of Guam entomology students who use iNaturalist to catalog their insect collections.

## Step 1. Download iNaturalist Project Data as a CSV

All observations associated with an iNaturalist project are downloaded as a CSV file. 

This can be done manually by visiting an iNaturalist project page and pressing the CSV link. On the form that appears,  select the fields listed in the **selected_checkboxes** dict below. Place the downloaded **\*.csv.zip file** in the same directory as this iPython notebook. Skip to **Step 2**.

Alternatively, you can run the following code which uses **selenium** and the **FireFox browser** to automate the download.

In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import getpass
from time import sleep
import time
import os
import glob

In [2]:
# Get iNat user name, password, and project name.

def default_input( message, defaultVal ):
    if defaultVal:
        return raw_input( "%s [%s]:" % (message,defaultVal) ) or defaultVal
    else:
        return raw_input( "%s " % (message) )
    
default_inat_user_name = 'aubreymoore'
default_inat_project_name = 'ag-bi-345-f15'

inat_user_name = default_input('iNaturalist user name: ', default_inat_user_name)
inat_password = getpass.getpass('iNaturalist password: ')
inat_project_name = default_input('iNaturalist project name: ', default_inat_project_name)

iNaturalist user name:  [aubreymoore]:
iNaturalist password: ········
iNaturalist project name:  [ag-bi-345-f15]:


In [3]:
def select_fields_to_download():
    """ Selects and deselects check boxes on iNat CSV export page to determine which fields to download.
    """
    selected_checkboxes = {'id':True,
                           'observed_on_string':False, 
                           'observed_on':True, 
                           'time_observed_at':False,
                           'time_zone':False,
                           'out_of_range':False,
                           'user_login':True,
                           'created_at':False,
                           'updated_at':False,
                           'quality_grade':False,
                           'license':False,
                           'url':False,
                           'image_url':False,
                           'tag_list':False,
                           'description':True,
                           'id_please':False,
                           'num_identification_agreements':False,
                           'num_identification_disagreements':False,
                           'captive_cultivated':False,
                           'oauth_application_id':False,
                           'place_guess':True,
                           'latitude':True,
                           'longitude':True,
                           'positional_accuracy':True,
                           'private_latitude':False,
                           'private_longitude':False,
                           'private_positional_accuracy':False,
                           'geoprivacy':False,
                           'positioning_method':False,
                           'positioning_device':False,
                           'place_town_name':False,
                           'place_county_name':False,
                           'place_state_name':False,
                           'place_country_name':False,
                           'species_guess':False,
                           'scientific_name':True,
                           'common_name':True,
                           'iconic_taxon_name':False,
                           'taxon_id':True,
                           'taxon_kingdom_name':False,
                           'taxon_phylum_name':False,
                           'taxon_subphylum_name':False,
                           'taxon_superclass_name':False,
                           'taxon_class_name':False,
                           'taxon_subclass_name':False,
                           'taxon_superorder_name':False,
                           'taxon_order_name':True,
                           'taxon_suborder_name':False,
                           'taxon_superfamily_name':False,
                           'taxon_family_name':True,
                           'taxon_subfamily_name':False,
                           'taxon_supertribe_name':False,
                           'taxon_tribe_name':False,
                           'taxon_subtribe_name':False,
                           'taxon_genus_name':False,
                           'taxon_genushybrid_name':False,
                           'taxon_species_name':False,
                           'taxon_hybrid_name':False,
                           'taxon_subspecies_name':False,
                           'taxon_variety_name':False,
                           'taxon_form_name':False}
    for key, value in selected_checkboxes.iteritems():
        elem = driver.find_element_by_id("observations_export_flow_task_options_columns_" + key)
        if value != elem.is_selected():
            elem.click()

In [4]:
# Request CSV export for an iNat project

# Set the FireFox profile to automatically download zip files into the
# current working directory.
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList",2)
fp.set_preference("browser.download.manager.showWhenStarting",False)
fp.set_preference("browser.download.dir", os.getcwd())
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/zip")

# Open iNat in the browser and log in
driver = webdriver.Firefox(firefox_profile=fp)
driver.get('http://www.inaturalist.org/login')
driver.maximize_window()
driver.find_element_by_name("user[email]").send_keys(inat_user_name)
driver.find_element_by_name("user[password]").send_keys(inat_password)
driver.find_element_by_name("commit").click()
sleep(10)

# Open the project page and click on the CSV export link
driver.get('http://www.inaturalist.org/projects/{}'.format(inat_project_name))
driver.find_element_by_class_name("csvlink").click()
sleep(10)

# Select fields to be included in the CSV export submit the export request
select_fields_to_download()
driver.find_element_by_name("commit").click()
sleep(60)
print('Sleeping for 60 seconds.')

# Check for successful export
driver.find_element_by_xpath("id('success')/table/tbody/tr/td[1]/a").click()
sleep(10)

Sleeping for 60 seconds.


In [5]:
# Do some house cleaning. Delete all *.csv.zip files except for the 2 most recent ones.

files = glob.glob('*.csv.zip')
files.sort(key=os.path.getmtime)
print(files)
for f in files[:-2]:
    os.remove(f)
files = glob.glob('*.csv.zip')
files.sort(key=os.path.getmtime)   
print(files)

['observations-8066.csv.zip', 'observations-8067.csv.zip', 'observations-8068.csv.zip']
['observations-8067.csv.zip', 'observations-8068.csv.zip']


## Step 2. Generate PDF sheet(s) containing insect pin labels
If all goes right, a preview of the PDF file containing insect pin labels will be 
displayed at the end of this process.

In [6]:
# Get the filename of the most recent *.csv.zip file in the current working directory.

inat_project_data = max(glob.iglob('*.csv.zip'), key=os.path.getmtime)
print('Most recently downloaded iNat project data:')
print('    ' + inat_project_data)
print('    Last modified: ' + time.ctime(os.path.getmtime(inat_project_data)))

Most recently downloaded iNat project data:
    observations-8068.csv.zip
    Last modified: Sat Jan  2 10:06:28 2016


In [7]:
# Uncompress the CSV zip file and import into a pandas data frame

import pandas as pd
import zipfile

zf = zipfile.ZipFile(inat_project_data)
data = zf.open(inat_project_data.replace('.zip', ''))
df = pd.read_csv(data, parse_dates=[1])
df.fillna('', inplace=True)
df.tail(3)

Unnamed: 0,id,observed_on,user_id,user_login,quality_grade,description,place_guess,latitude,longitude,positional_accuracy,scientific_name,common_name,taxon_id,taxon_order_name,taxon_family_name
352,2493373,2015-12-21,126404,juliaschwierking,needs_id,,"Naval Station Apra, Asan, GU, US",13.461106,144.730564,5,Hemiptera,"True Bugs, Cicadas, Hoppers, Aphids and Allies",47744,Hemiptera,
353,2495543,2015-12-22,7547,aubreymoore,needs_id,brought in by pco,Guam,13.444304,144.793731,29336,Chloropidae,grass flies,127593,Diptera,Chloropidae
354,2516176,2015-12-30,7547,aubreymoore,needs_id,,"Yigo, Guam",13.532259,144.87161,1,Psilogramma menephron,Privet Hawk Moth,121854,Lepidoptera,Sphingidae


In [8]:
# Generate insect pin labels

import labels
from reportlab.graphics import shapes
import datetime

sheet_width = 8.5 * 25.4
sheet_height = 11 * 25.4
columns = 7
rows = 20
label_width = 30
label_size = 12
corner_radius = 1

specs = labels.Specification(sheet_width, sheet_height, columns, rows, label_width, label_size, 
                             corner_radius=corner_radius)

def filter_non_printable(str):
  return ''.join([c for c in str if ord(c) > 31 or ord(c) == 9])

def label_text(row):
    s = []
    s.append('iNaturalist {}'.format(row.id))
    s.append(row.place_guess)
    s.append('lat = {} lon = {} acc = {}m'.format(row.latitude, row.longitude, row.positional_accuracy))
    
    description = filter_non_printable(row.description)    
    s.append(description)
    
    observed_on = datetime.date.strftime(row.observed_on, "%d %b %Y").upper()
    s.append('Coll. {} {}'.format(row.user_login, observed_on))
    
    s.append('')
    s.append('{}: {}'.format(row.taxon_order_name, row.taxon_family_name))
    s.append(row.scientific_name)
    s.append('Det. {} {}'.format(row.user_login, observed_on))
    return(s)

def draw_insect_label(label, width, height, obj):
    font_size = 3.5
    x = 2
    y = 30
    for item in obj:
        label.add(shapes.String(x, y, item, fontName="Helvetica", fontSize=font_size))
        y -= font_size

sheet = labels.Sheet(specs, draw_insect_label, border=True)
for index, row in df.iterrows():
    sheet.add_label(label_text(row))
pdf_file_name = 'insect_pin_labels_for_{}.pdf'.format(inat_project_name)
sheet.save(pdf_file_name)
print("{0:d} label(s) output on {1:d} page(s).".format(sheet.label_count, sheet.page_count))

# Preview the PDF file in Evince. Modify to use your favorite PDF viewer.
!evince {pdf_file_name}

355 label(s) output on 3 page(s).
