# SCAN/Symbiota Insect Pin Label Generator

This Jupyter notebook generates insect pin labels for a set of specimen records downloaded from the University of Guam Insect Collection online catalog stored on the SCAN Symbiota portal.

As an initial test, we download records and generate labels for insects collected by Benita Laird-Hopkins during her seed predation study.

## Steps

1. Download the records for which we want to make labels as a Darwin Core Archive (DwCA).
2. Save the DwCA as **DwCA.zip** and extract the specimen data as **occurrences.csv**.
3. For each specimen record in **occurrences.csv**, generate a collection label, an identification label, and a barcode label.
4. Save all the labels in **labels.pdf**.

![](labels_screenshot.png)

## Notes

1. There is a problem with the barcodes: FPDF adds a checksum character by default.

In [1]:
from fpdf import FPDF
import os
import barcode
import requests
import zipfile
import io
import pandas as pd
import time

In [2]:
rootURL = 'http://scan-bugs.org/portal/webservices/dwc/dwcapubhandler.php'
downloadURL = rootURL+'?cond=recordedby:Benita%20Laird-Hopkins'

# If we wanted to download the whole UOG Insect Collection database, we would use:
# downloadURL = rootURL+'?collid=180'

# Reference: https://github.com/Symbiota/Symbiota/blob/master/webservices/dwc/dwcapubhandler.php

## FUNCTIONS

In [3]:
def download_DwCA(downloadURL):
    # Downloads the UOG insect collection records from SCAN as a DwCA, saves it to the working directory in
    # 'DwCA.zip' and extracts 'occurrences.csv'.
    
    zip_file_url = downloadURL
    r = requests.get(zip_file_url)

    # save a copy of the DwCA zip file on disk
    # not totally necessary, but might be useful if something goes wrong
    with open('DwCA.zip', 'wb') as f:
        f.write(r.content)

    # extract occurrences.csv    
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extract('occurrences.csv')
    return

def make_dataframe():
    """
    reads occurrence.csv into a pandas dataframe.
    """
    
    column_types = {'accessRights': 'object',
     'associatedTaxa': 'object',
     'basisOfRecord': 'object',
     'catalogNumber': 'object',
     'class': 'object',
     'collectionCode': 'object',
     'collectionID': 'object',
     'coordinateUncertaintyInMeters': 'object',
     'country': 'object',
     'county': 'object',
     'dataGeneralizations': 'object',
     'dateIdentified': 'object',
     'day': 'object',
     'decimalLatitude': 'float32',
     'decimalLongitude': 'float32',
     'disposition': 'object',
     'dynamicProperties': 'object',
     'endDayOfYear': 'object',
     'establishmentMeans': 'object',
     'eventDate': 'object',
     'family': 'object',
     'fieldNumber': 'object',
     'genus': 'object',
     'geodeticDatum': 'object',
     'georeferenceProtocol': 'object',
     'georeferenceRemarks': 'object',
     'georeferenceSources': 'object',
     'georeferenceVerificationStatus': 'object',
     'georeferencedBy': 'object',
     'habitat': 'object',
     'id': 'int64',
     'identificationQualifier': 'object',
     'identificationReferences': 'object',
     'identificationRemarks': 'object',
     'identifiedBy': 'object',
     'individualCount': 'object',
     'informationWithheld': 'object',
     'infraspecificEpithet': 'object',
     'institutionCode': 'object',
     'kingdom': 'object',
     'language': 'object',
     'lifeStage': 'object',
     'locality': 'object',
     'locationRemarks': 'object',
     'maximumDepthInMeters': 'object',
     'maximumElevationInMeters': 'object',
     'minimumDepthInMeters': 'object',
     'minimumElevationInMeters': 'object',
     'modified': 'object',
     'month': 'object',
     'municipality': 'object',
     'occurrenceID': 'object',
     'occurrenceRemarks': 'object',
     'order': 'object',
     'otherCatalogNumbers': 'object',
     'ownerInstitutionCode': 'object',
     'phylum': 'object',
     'preparations': 'object',
     'recordEnteredBy': 'object',
     'recordId': 'object',
     'recordNumber': 'object',
     'recordedBy': 'object',
     'references': 'object',
     'reproductiveCondition': 'object',
     'rights': 'object',
     'rightsHolder': 'object',
     'samplingEffort': 'object',
     'samplingProtocol': 'object',
     'scientificName': 'object',
     'scientificNameAuthorship': 'object',
     'sex': 'object',
     'specificEpithet': 'object',
     'startDayOfYear': 'object',
     'stateProvince': 'object',
     'taxonID': 'object',
     'taxonRank': 'object',
     'taxonRemarks': 'object',
     'typeStatus': 'object',
     'verbatimCoordinates': 'object',
     'verbatimDepth': 'object',
     'verbatimElevation': 'object',
     'verbatimEventDate': 'object',
     'year': 'object'}

    df = pd.read_csv(
        'occurrences.csv', 
        #low_memory=False, 
        #nrows=100,
        #na_filter=False, # NaNs will be represented as ''
        encoding='utf-8',
        dtype = column_types
    )
    return df

def format_date(date_str):
    # Formats date string in the form '2018-05-01' to '01MAY2018'
    try:
        dsplit = date_str.split('-')
        month_string = {'01':'JAN', '02':'FEB', '03':'MAR', '04':'APR', '05':'MAY', '06':'JUN',
                        '07':'JUL', '08':'AUG', '09':'SEP', '10':'OCT', '11':'NOV', '12': 'DEC'}
        date_str = '{}{}{}'.format(dsplit[2], month_string[dsplit[1]], dsplit[0])
    except:
        return ''
    return date_str

# format_date('2018-05-01')   

def make_collection_label(row):
    sl = []
    sl.append(row.catalogNumber)
    sl.append(format_coordinates(row.decimalLatitude, row.decimalLongitude))
    sl.append('ex {}'.format(row.associatedTaxa))
    sl.append('coll. {} {}'.format(row.recordedBy, format_date(row.eventDate)))
    return sl

def make_id_label(row):
    sl = []
    sl.append(row.catalogNumber)
    sl.append('{}: {}'.format(row.order.upper(), row.family.upper()))
    sl.append('{}'.format(row.scientificName))
    sl.append('det. {} {}'.format(row.identifiedBy, format_date(row.dateIdentified)))
    return sl

def make_barcode_label(row):
    bc = barcode.get('code39', row.catalogNumber, writer=barcode.writer.ImageWriter())
    filename = bc.save(row.catalogNumber)
    while not os.path.exists(filename):
        print('waiting')
        time.sleep(0.1)
    return filename

def print_label(label, label_type):
    # label_type can be 'collection', 'id', or 'barcode'
    global c, r, il
    if (il%labels_per_page)==0:
        pdf.add_page()
        pdf.set_font_size(10)
        pdf.text(lm, tm/2, 'Moore Beta Insect Pin Label Generator v2018.04.29')
        pdf.set_font_size(label_font_size)
        c=0; r=0
        
    x = c*w+lm
    y = r*h+tm
    pdf.rect(x, y, w, h)
    
    if label_type=='barcode':
        pdf.image(label, x+lpad, y+tpad, w-2*lpad, h-2*tpad)
    else:
        for i, line in enumerate(label):
            if (i==2) & (label_type=='id'):   # Italicize scientific name
                pdf.set_font('Helvetica', 'I')
                pdf.text(x+lpad, y+tpad+i*spc, label[i])
                pdf.set_font('Helvetica', '')
            else:                
                pdf.text(x+lpad, y+tpad+i*spc, label[i])
    
    il += 1    
    c += 1
    if c == cols:
        c=0; r+=1
    return

def format_coordinates(lat, lon):
    if lat < 0:
        lat_suffix = '°S'
        lat = -lat
    else:
        lat_suffix = '°N'

    if lon < 0:
        lon_suffix = '°W'
        lon = -lon
    else:
        lon_suffix = '°E'
    return '{:.6f}{} {:.6f}{}'.format(lat, lat_suffix, lon, lon_suffix)

# format_coordinates(14.123456789, 144.123456789)

## MAIN

In [4]:
# Download specimen records from the UOG Insect Collection catalog as a Darwin Core Archive 
# and load them into a pandas data frame
download_DwCA(downloadURL)
df = make_dataframe()

In [5]:
# Set parameters for labels
tm = 0.5     # top margin in inches
lm = 0.5     # left margin in inches
rows = 20    # number of rows of labels on each page
cols = 6     # number of columns of labels on each page
labels_per_page = rows*cols
w = 1.0      # width of label in inches
h = 0.5      # height of label in inches
tpad = 0.075 # space above text within a label
lpad = 0.05  # space to left of text within a label 
spc = 0.075  # line spacing within label
label_font_size = 4

#Create a PDF document
pdf = FPDF(orientation='P', unit='in', format='letter')
pdf.set_font('Helvetica', '', label_font_size)

c, r, il = 0,0,0   # Initialize column, row, and label number counters; 
                   # these are accessed ss globals in print_label()

# Make a collection label, id label and barcode label for each record
for i, row in df.iterrows():
    label = make_collection_label(row)
    print_label(label, 'collection')
    label = make_id_label(row)
    print_label(label, 'id')
    label = make_barcode_label(row)
    print_label(label, 'barcode')
    
# Save the PDF in labels.pdf and view this file in Evince
pdf.output('labels.pdf', 'F') 
os.system('evince labels.pdf');