# Read files summarising field work and update database
These Excel workbooks were imported on February 2022.

The scripts documented here have been created to:

- Read data from spreadsheets with field-work data
- Create records for data import into the database
- Insert or update records in the database


## Set-up
Load libraries 

In [1]:
import openpyxl
from pathlib import Path
import os
from datetime import datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor
from psycopg2.extensions import AsIs
#import postgis

Define path to workbooks

In [2]:
repodir = Path("../../") 
inputdir = repodir / "data" / "field-form"

 ### DB connection parameters and helper functions
 
 Database credentials are stored in a database.ini file

In [3]:
filename = repodir / 'secrets' / 'database.ini'
section = 'aws-lght-sl'

# create a parser
parser = ConfigParser()
# read config file
parser.read(filename)

# get section, default to postgresql
db = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        db[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

params = db

Get updated vocabularies from database

In [4]:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params)
cur = conn.cursor()
#valid_organ=('Epicormic', 'Apical', 'Lignotuber', 'Basal','Tuber','Tussock','Short rhizome', 'Long rhizome or root sucker', 'Stolon', 'None', 'Other')
#valid_seedbank=('Soil-persistent', 'Transient', 'Canopy','Non-canopy','Other')

cur.execute("SELECT enumlabel FROM pg_enum e LEFT JOIN pg_type t ON e.enumtypid=t.oid where typname='resprout_organ_vocabulary';")
valid_organ_list = cur.fetchall()
organ_vocab = [item for t in valid_organ_list for item in t]

cur.execute("SELECT enumlabel FROM pg_enum e LEFT JOIN pg_type t ON e.enumtypid=t.oid where typname='seedbank_vocabulary';")
valid_seedbank_list = cur.fetchall()
seedbank_vocab = [item for t in valid_seedbank_list for item in t]

cur.close()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Connecting to the PostgreSQL database...
Database connection closed.


Define a function to batch process insert or update queries:

In [5]:
def batch_upsert(params,table,records,keycol,idx, execute=False,useconn=None):
    if useconn is None:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
    else:
        conn = useconn
    cur = conn.cursor()
    #postgis.register(cur)
    updated_rows=0

    for record in records:
        if len(record.keys())>len(keycol):
            if 'geom' in record.keys():
                the_geom=record['geom']
                record['geom']='GEOMSTR'
            if idx is not None:
                qrystr = "INSERT INTO %s (%s) values %s ON CONFLICT ON CONSTRAINT %s DO UPDATE SET %s"
                upd=list()
                for k in record.keys():
                    if k not in keycol:
                        upd.append("{col}=EXCLUDED.{col}".format(col=k))
                qry = cur.mogrify(qrystr, (AsIs(table),
                                AsIs(','.join(record.keys())),
                                tuple(record.values()),
                                AsIs(idx),
                                AsIs(','.join(upd))
                               ))
            else:
                qrystr = "INSERT INTO %s (%s) values %s ON CONFLICT DO NOTHING"
                qry = cur.mogrify(qrystr, (AsIs(table),
                                AsIs(','.join(record.keys())),
                                tuple(record.values())
                               ))

            if 'geom' in record.keys():
                qry=qry.decode('utf-8')
                qry=qry.replace("'GEOMSTR'",the_geom)
                record['geom']=the_geom

            if execute:
                cur.execute(qry)
                if cur.rowcount > 0:
                    updated_rows = updated_rows + cur.rowcount
            else:
                print(qry)
            
    conn.commit()        
    cur.close()
    print("%s rows updated" % (updated_rows))
        
    if useconn is None and conn is not None:
        conn.close()
        print('Database connection closed.')

        

Just a test with random data, use `execute=False` to print the query:

In [6]:
record={'site_label':'test','geom':"ST_GeomFromText('POINT(1 2)', 4326)"}
batch_upsert(params,"form.field_site",(record,),keycol=('site_label',), idx='field_site_pkey1',execute=False)

Connecting to the PostgreSQL database...
INSERT INTO form.field_site (site_label,geom) values ('test', ST_GeomFromText('POINT(1 2)', 4326)) ON CONFLICT ON CONSTRAINT field_site_pkey1 DO UPDATE SET geom=EXCLUDED.geom
0 rows updated
Database connection closed.


In [7]:
batch_upsert(params,"form.field_site",(record,),keycol=('site_label',), idx=None,execute=False)

Connecting to the PostgreSQL database...
INSERT INTO form.field_site (site_label,geom) values ('test', ST_GeomFromText('POINT(1 2)', 4326)) ON CONFLICT DO NOTHING
0 rows updated
Database connection closed.


## Read workbooks
Each spreadsheet has a slightly different structure, so these scripts have to be adapted for each case.

### List of workbooks/spreadsheets in directory

In [8]:
os.listdir(inputdir)

['~$Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm',
 'SthnNSWRF_data_bionet2.xlsx',
 '~$UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
 'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
 '~$UNSW_VegFireResponse_KNP AlpAsh.xlsx',
 'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
 'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
 '~$UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
 'UNSW_VegFireResponse_KNP AlpAsh.xlsx',
 'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
 'RobertsonRF_data_bionet2.xlsx',
 'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm']

In [9]:
valid_files = ['SthnNSWRF_data_bionet2.xlsx',
               'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
               'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
               'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
               'UNSW_VegFireResponse_KNP AlpAsh.xlsx',
               'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
               'RobertsonRF_data_bionet2.xlsx',
               'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm']

Here we create an index of worksheets and column headers for each file

In [19]:
wbindex=dict()
for workbook_name in valid_files:
    inputfile=inputdir / workbook_name
    # using data_only=True to get the calculated cell values
    wb = openpyxl.load_workbook(inputfile,data_only=True)
    wbindex[workbook_name]=dict()
    for ws in wb.worksheets:
        wbindex[workbook_name][ws._WorkbookChild__title]=[list(),list()]
        for k in range(1,ws.max_column):
            wbindex[workbook_name][ws._WorkbookChild__title][0].append(ws.cell(row=1,column=k).value)
            wbindex[workbook_name][ws._WorkbookChild__title][1].append(ws.cell(row=2,column=k).value)
        

### Functions to read records and upload to database

#### Functions to read records in a workbook

We need a wrapping function to apply a lower level function (`create_record_function`) to all rows in a `worksheet` of the selected `workbook` using a dictionary `col_dictionary`, we add a `**kwargs` to pass additional arguments to the lower level function:

In [22]:
def import_records_from_workbook(filepath, workbook, worksheet, col_dictionary, create_record_function, **kwargs):
    wb = openpyxl.load_workbook(filepath / workbook, data_only=True)
    ws=wb[worksheet]
    row_count = ws.max_row
    records=list()
    for k in range(2,row_count):
        item=ws[k]
        record=create_record_function(item,col_dictionary,**kwargs)
        if record is not None:
            if type(record)==list:
                records.extend(record)
            elif type(record)==dict:
                records.append(record)
    return records

#### Create field sample records

This is a lower level function that will create a field sample record from an `item` (a row in the spreadsheet), using the dictionary or "switch" in `sw`:

In [23]:
def create_field_sample_record(item,sw):
    visit_id=item[sw['visit_id']].value
    visit_date = item[sw['date']].value 
    if visit_id is not None and visit_id not in ('Site Number'):
        record={'visit_id': visit_id, 'replicate_nr': item[sw['replicate_nr']].value, 'sample_nr': item[sw['sample_nr']].value}
        if isinstance(visit_date,datetime):
            record['visit_date'] = visit_date.date()
        return(record)

#### Create quadrat sample records

This is a lower level function that will create a quadrat sample record from an `item` (a row in the spreadsheet), using the dictionary or "switch" in `sw`. It uses a lookup table to fill information about the visit, and the vocabularies for seedbank and regenerative organ to translate raw values of these variables:

In [24]:
def create_quadrat_sample_record(item,sw,lookup,valid_seedbank,valid_organ):
    species=item[sw['species']].value
    spcode=item[sw['spcode']].value
    visit_date = item[sw['date']].value 
    visit_id =  item[sw['visit_id']].value
    if species is not None:
        record={'visit_id': visit_id, 'sample_nr': item[sw['sample_nr']].value,
                'species': species}
        comms=list()
        if 'workbook' in sw.keys():
            comms.append("Imported from workbook %s using python script" % sw['workbook'])
        if 'worksheet' in sw.keys():
            comms.append("Imported from spreadsheet %s" % sw['worksheet'])
    
        if isinstance(visit_date,datetime):
            record['visit_date'] = visit_date.date()
        else:
            if 'replicate_nr' in sw.keys():
                replicate_nr = item[sw['replicate_nr']]
                p=filter(lambda n: n['visit_id'] == visit_id and  n['replicate_nr'] == replicate_nr, lookup)
                found=list(p)
                if len(found)==1 and 'visit_date' in found[0].keys():
                    visit_date=found[0]['visit_date']
                    if isinstance(visit_date,datetime):
                        record['visit_date'] = visit_date.date()
                        comms.append("visit date not provided, matched by replicate nr %s" % replicate_nr)
                        
        if (isinstance(spcode, str) and spcode.isnumeric()) or isinstance(spcode,int):
            record['species_code']=spcode
         
        for k in ('species_notes', 'resprout_organ', 'seedbank', 'adults_unburnt','resprouts_live', 'resprouts_died', 'resprouts_kill', 'resprouts_reproductive',
                  'recruits_live', 'recruits_reproductive', 'recruits_died','notes'):
            if k in sw.keys():
                vals=item[sw[k]].value
                if vals is not None and vals not in ('na','NA'):
                    if k == 'resprout_organ':
                        if vals in valid_organ:
                            record[k]=vals
                        elif vals.capitalize() in valid_organ:
                            record[k]=vals.capitalize()
                        else:
                            comms.append("resprout organ written as %s" % vals)
                    elif k == 'seedbank':
                        if vals in valid_seedbank:
                            record[k]=vals
                        elif vals.capitalize() in valid_seedbank:
                            record[k]=vals.capitalize()
                        else:
                            comms.append("seedbank written as %s" % vals)
                    elif k == 'notes':
                        comms.append(vals)
                    elif k in ('adults_unburnt', 'resprouts_live', 'resprouts_died', 'resprouts_kill', 'resprouts_reproductive',
                  'recruits_live', 'recruits_reproductive', 'recruits_died'):
                        if isinstance(vals,int):
                            record[k]=vals   
                        else:
                            comms.append("%s written as %s" % (k,vals))
                    else:
                        record[k]=vals        
        
        if len(comms)>0:
            record["comments"]=comms 
        
        return(record)

#### Validate and update site/quadrat records to database

This function filters a list of `records` to find unique records and then validate them against the information in table `field_visit` (visit_id, visit_date and replicate_nr). Any valid but missing records are inserted in table `field_visit` and the samples are inserted in table `field_sample`.


In [25]:
def validate_and_update_site_records(records, useconn=None):
    if useconn is None:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
    else:
        conn = useconn
    conn = psycopg2.connect(**params)

    cur = conn.cursor(cursor_factory=DictCursor)
    unique_records = list()
    sites = list()
    for record in records:
        if record not in unique_records:
            unique_records.append(record)
            if record['visit_id'] not in sites:
                sites.append(record['visit_id'])
    #alternative
    #from psycopg2 import sql
    #qry= sql.SQL('SELECT DISTINCT visit_id,visit_date,replicate_nr FROM form.field_visit WHERE visit_id IN ({}) ORDER by visit_id, visit_date;').format(
    #    sql.SQL(',').join(map(sql.Literal, sites))
    #)
    qryvisits= cur.mogrify('SELECT DISTINCT visit_id,visit_date,replicate_nr FROM form.field_visit WHERE visit_id IN %s ORDER by visit_id, visit_date;',(tuple(sites),))
    cur.execute(qryvisits)
    ##print(qry)
    visits = cur.fetchall()
    updated_rows=0
    for record in unique_records:
        if any(d['visit_id'] == record['visit_id'] for d in visits):
            if 'visit_date' in record.keys():
                p=filter(lambda n: n['visit_id'] == record['visit_id'] and  n['visit_date'] == record['visit_date'], visits)
                found=list(p)
                record['found']=len(found)
            elif 'replicate_nr' in record.keys():
                p=filter(lambda n: n['visit_id'] == record['visit_id'] and  n['replicate_nr'] == record['replicate_nr'], visits)
                found=list(p)
                print(found)
                record['found']=len(found)
                record['visit_date']=found[0][1]
            cur.execute('INSERT INTO form.field_visit(visit_id,visit_date) values %s ON CONFLICT DO NOTHING',
                        (tuple([record['visit_id'],record['visit_date']]),))
            if cur.rowcount > 0:
                updated_rows = updated_rows + cur.rowcount
            cur.execute('INSERT INTO form.field_samples(visit_id,visit_date,sample_nr) values %s ON CONFLICT DO NOTHING',
                        (tuple([record['visit_id'],record['visit_date'],record['sample_nr']]),))
            if cur.rowcount > 0:
                updated_rows = updated_rows + cur.rowcount        
        else:
            print("%s not found" % record['visit_id'])
            record['found']=0

    print("%s rows updated" % updated_rows)
    conn.commit()
    
    cur.execute(qryvisits)
    ##print(qry)
    updated_visits = cur.fetchall()

    cur.close()

    if useconn is None and conn is not None:
        conn.close()
        print('Database connection closed.')
    return(updated_visits)



#### Wrapping all steps together
The following function will the above functions `import_records_from_workbook`, `create_field_sample_record`, `validate_and_update_site_records`, and `create_quadrat_sample_record` to process data from a workbook into records that are then imported into the database using `batch_upsert`.

In [26]:
def read_and_import_species_data(filepath,workbook,worksheet,col_dictionary,valid_seedbank,valid_organ):
    quadrats = import_records_from_workbook(filepath, workbook, worksheet, col_dictionary,
                                       create_field_sample_record)
    valid_visits = validate_and_update_site_records(quadrats)
    
    records=import_records_from_workbook(filepath, workbook, worksheet, col_dictionary,
                                         create_quadrat_sample_record,
                                         lookup=valid_visits, valid_seedbank=valid_seedbank, valid_organ=valid_organ)
    valid_records=list()
    invalid_records=list()
    for record in records:
        p=filter(lambda n: n['visit_id'] == record['visit_id'] and  n['visit_date'] == record['visit_date'], valid_visits)
        found=list(p)
        if (len(found)==1):
            valid_records.append(record)
        else:
            invalid_records.append(record)

    print("%s valid records and %s invalid records" % (len(valid_records), len(invalid_records)))
    
    batch_upsert(params,table='form.quadrat_samples',records=valid_records,keycol=('visit_id','visit_date','sample_nr'),
             idx=None, execute=True)


## Processing data from all workbooks

In the following section, I proceed to iterate through all the workbooks, adjusting code for each case. 

Here is the list of available workbooks:

In [27]:
wbindex.keys()

dict_keys(['SthnNSWRF_data_bionet2.xlsx', 'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx', 'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx', 'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx', 'UNSW_VegFireResponse_KNP AlpAsh.xlsx', 'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx', 'RobertsonRF_data_bionet2.xlsx', 'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm'])

If we select one workbook, we can retrieve a list of column names that we will use in our column definitions for each function:

### Southern NSW Rainforest

- This validates 247 records out of 250
- UppClydeRF2,UppClydeRF3,UppClydeRF4 are not validated
- No information is added to UppClyde1

In [29]:
cols=wbindex['SthnNSWRF_data_bionet2.xlsx']['Floristics']
for k in range(1,len(cols[0])):
    print("%s :: %s // %s" % (k-1,cols[0][k-1],cols[1][k-1]))

0 :: Updated 14/10/2019 // Entry Order
1 :: None // Site Number
2 :: None // Replicate
3 :: First Date // Date of sighting (dd/mm/yyyy hh:mm:ss).
4 :: Last Date // If more than 1 day (dd/mm/yyyy hh:mm:ss).
5 :: Sub plot // SubplotID
6 :: Type // Fauna (FA) or flora (FL).
7 :: Species code // Species code can be assigned by OEH, or see the reference worksheet.
8 :: Common Name // None
9 :: Scientific Name // None
10 :: Cover score // See reference worksheet for definitions
11 :: Abundance score // CV18A See reference worksheet for definitions
12 :: Stratum // See reference worksheet for definitions
13 :: Growth form // See reference worksheet for definitions
14 :: Height min // Flora only; height (in metres)
15 :: Height max // Flora only; height (in metres)
16 :: % Cover actual // None
17 :: Recovery organ // None
18 :: Seedbank // None
19 :: None // Count of unburnt individuals
20 :: Abund actual // Count of resprouting individuals.
21 :: None //  # resprouted & died post-fire
22 :: N

In [30]:
filename='SthnNSWRF_data_bionet2.xlsx'
worksheet='Floristics'
col_dict={'visit_id':1, 'sample_nr':5, 'replicate_nr':2,'species':9, 'spcode':7, 'date':3, 'resprout_organ':17, 'seedbank':18,
          'adults_unburnt':19,'resprouts_live':20,'resprouts_died':21,'resprouts_kill':22,
          'resprouts_reproductive':23,'recruits_live':24, 'recruits_died':25, 'recruits_reproductive':26,
                 'notes':32,'workbook':filename,'worksheet':worksheet}

read_and_import_species_data(filepath=inputdir,
                             workbook=filename,
                             worksheet=worksheet,
                             col_dictionary=col_dict,
                             valid_seedbank=seedbank_vocab,
                             valid_organ=organ_vocab)


Connecting to the PostgreSQL database...
UppClydeRF2 not found
UppClydeRF3 not found
UppClydeRF4 not found
0 rows updated
Database connection closed.
247 valid records and 3 invalid records
Connecting to the PostgreSQL database...
247 rows updated
Database connection closed.


### RMK
Does not have visit_date or replicate nr. We will have to assume they all belong to replicate 1:

In [31]:
cols=wbindex['UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx']['Floristics']
for k in range(1,len(cols[0])):
    print("%s :: %s // %s" % (k-1,cols[0][k-1],cols[1][k-1]))


0 :: None // Site
1 :: Species responses // Subquadrat #
2 :: None // Label
3 :: Type // Fauna (FA) or flora (FL).
4 :: Species code // Species code can be assigned by OEH, or see the reference worksheet.
5 :: Common Name // Common name
6 :: None // Species (edits in red)
7 :: None // CAPS #
8 :: None // resprout organ (epicormic,ligno, crown, basal, tuber,rhiz,stol)
9 :: None // seedbank type (canopy, soil, transient, other(not canopy)
10 :: None // # Live unburnt (no response to fire)
11 :: Adults // # resprouted & live
12 :: None //  # resprouted & died post-fire
13 :: None // # killed in fire
14 :: None // #  reproductive
15 :: Recruits // # live
16 :: None // # died post-fire
17 :: None // #  reproductive
18 :: None // notes


In [None]:
filename='UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx'
# does not have visit_date or replicate nr, assuming 1?

### KNP Alpine Ash

In [36]:
cols=wbindex['UNSW_VegFireResponse_KNP AlpAsh.xlsx']['Floristics']
for k in range(1,len(cols)):
    print("%s :: %s" % (k-1,cols[k-1]))


0 :: Updated 14/10/2019
1 :: None
2 :: None
3 :: First Date
4 :: Last Date
5 :: Sub plot
6 :: Type
7 :: Species code
8 :: Common Name
9 :: Scientific Name
10 :: Cover score
11 :: Abundance score
12 :: Stratum
13 :: Growth form
14 :: Height min
15 :: Height max
16 :: % Cover actual
17 :: Recovery organ
18 :: Seedbank
19 :: 0
20 :: Abund actual
21 :: 0
22 :: None
23 :: Number reproductive
24 :: None
25 :: None
26 :: Number reproductive
27 :: Estimate Code
28 :: Source Code
29 :: Specimen Rego
30 :: Specimen Location
31 :: External Key


Another function to follow all steps for reading data and importing to database

In [49]:
worksheet='Floristics'
filename='UNSW_VegFireResponse_KNP AlpAsh.xlsx'
col_dict={'visit_id':1, 'replicate_nr':2, 'date':3,
          'sample_nr':5, 'spcode':7, 'species':9,   
          'resprout_organ':17, 'seedbank':18,
          'adults_unburnt':19,'resprouts_live':20,'resprouts_died':21,'resprouts_kill':22,
          'resprouts_reproductive':23,'recruits_live':24, 'recruits_died':25, 'recruits_reproductive':26,
                 'notes':32,'workbook':filename,'worksheet':worksheet}

read_and_import_species_data(filepath=inputdir,
                             workbook=filename,
                             worksheet=worksheet,
                             col_dictionary=col_dict,
                             valid_seedbank=seedbank_vocab,
                             valid_organ=organ_vocab)


Connecting to the PostgreSQL database...
AlpAsh_26 not found
AlpAsh_26 not found
AlpAsh_26 not found
AlpAsh_26 not found
0 rows updated
Database connection closed.
661 valid records and 109 invalid records
Connecting to the PostgreSQL database...
661 rows updated
Database connection closed.


In [54]:
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
worksheet='Floristics'
cols=wbindex[filename][worksheet]
for k in range(1,len(cols)):
    print("%s :: %s" % (k-1,cols[k-1]))

0 :: Updated 14/10/2019
1 :: Site Number
2 :: Replicate
3 :: First Date
4 :: Last Date
5 :: Sub plot
6 :: Type
7 :: Species code
8 :: Common Name
9 :: Scientific Name
10 :: Cover score
11 :: Abundance score
12 :: Stratum
13 :: Growth form
14 :: Height min
15 :: Height max
16 :: % Cover actual
17 :: Recovery organ
18 :: Seedbank
19 :: None
20 :: Abund actual
21 :: None
22 :: Number reproductive
23 :: None
24 :: None
25 :: None
26 :: Estimate Code
27 :: Source Code
28 :: Specimen Rego
29 :: Specimen Location
30 :: External Key


In [51]:

col_dict={'visit_id':1, 'replicate_nr':2, 'date':3,
          'sample_nr':5, 'spcode':7, 'species':9,   
          'resprout_organ':17, 'seedbank':18,
          'adults_unburnt':19,'resprouts_live':20,'resprouts_died':21,'resprouts_kill':22,
          'resprouts_reproductive':23,'recruits_live':24, 'recruits_died':25, 'recruits_reproductive':26,
                 'notes':32,'workbook':filename,'worksheet':worksheet}

records=import_records_from_workbook(filepath=inputdir,
                             workbook=filename,
                             worksheet=worksheet,
                             col_dictionary=col_dict,
                             create_record_function=create_quadrat_sample_record,
                             lookup=valid_visits,
                             valid_seedbank=seedbank_vocab,
                             valid_organ=organ_vocab)


In [53]:
records[10]


{'visit_id': 'CRC09B7UVH',
 'sample_nr': 1,
 'species': 'Polyscias sambucifolia',
 'visit_date': datetime.date(2021, 2, 3),
 'species_code': '1211',
 'seedbank': 'Soil-persistent',
 'adults_unburnt': 0,
 'resprouts_live': 0,
 'resprouts_kill': 0,
 'resprouts_reproductive': 34,
 'recruits_died': 0,
 'comments': ['Imported from workbook UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx using python script',
  'Imported from spreadsheet Floristics',
  'recruits_reproductive written as X']}

Connect to database and update list of visits, valid organs and seedbank vocabulary:

In [None]:
#count of fully scorched & resprouting individuals
#count of fully scorched & fire-killed individuals
#count of partially scorched & resprouting individuals
#count of partially scorched & fire-killed individuals


In [389]:
item = ws[k]

In [415]:
(inputdir / "field-form" / filename).name

'SthnNSWRF_data_bionet2.xlsx'

In [71]:
conn

<connection object at 0x12c31e500; dsn: 'user=dbmasteruser password=xxx dbname=dbfireveg host=ls-7fe027d971eb206bbd525d61db53cc81a8d11b34.ctgi8o2ak5qy.ap-southeast-2.rds.amazonaws.com port=5432', closed: 1>

1

In [73]:
dir(conn)

['DataError',
 'DatabaseError',
 'Error',
 'IntegrityError',
 'InterfaceError',
 'InternalError',
 'NotSupportedError',
 'OperationalError',
 'ProgrammingError',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'async',
 'async_',
 'autocommit',
 'binary_types',
 'cancel',
 'close',
 'closed',
 'commit',
 'cursor',
 'cursor_factory',
 'deferrable',
 'dsn',
 'encoding',
 'fileno',
 'get_backend_pid',
 'get_dsn_parameters',
 'get_native_connection',
 'get_parameter_status',
 'get_transaction_status',
 'info',
 'isexecuting',
 'isolation_level',
 'lobject',
 'notices',
 'notifies',
 'pgconn_ptr',
 'poll',
 'protocol_version',
 'readonly',
 'reset',
 'rollback',
 'server_version',
 'set_cli