# Read spreadsheets from David and update database
These spreadsheets were imported on February 2022, I need to update the database.
## load libraries and DB connection parameters

In [1]:
import openpyxl
from pathlib import Path
import os
from datetime import datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extensions import AsIs
import postgis

In [2]:
repodir = Path("../") 
inputdir = repodir / "data" / "field-form"

Database credentials are stored in a database.ini file

In [3]:
filename = repodir / 'secrets' / 'database.ini'
section = 'aws-lght-sl'

# create a parser
parser = ConfigParser()
# read config file
parser.read(filename)

# get section, default to postgresql
db = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        db[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

params = db

Define a function to format update or insert ('upserts') queries conditional on key being present in a lookup table.

In [4]:
def format_upsert(table,records,keycol,lookup):
    qries = list()
    for record in records:
        if record[keycol].strip("'") in lookup:
            l = list()
            for k in record.keys():
                if k != keycol:
                    l.append("%s = %s" % (k,record[k]))
            cols = ", ".join(l)
            ins = "UPDATE {table} SET {cols} WHERE {keycol}={keyval};".format(table=table,cols=cols,keycol=keycol,keyval=record[keycol])
        else:
            cols = ", ".join(record.keys())
            vals = ", ".join(record.values())
            ins = "INSERT INTO {table} ({cols}) values ({vals}) ON CONFLICT DO NOTHING".format(table=table,cols=cols,vals=vals)
        qries.append(ins)
    return(qries)

Define a function to batch process insert or update queries:

In [161]:
def batch_upsert(params,table,records,keycol,idx, execute=False):
    # connect to the PostgreSQL server
    print('Connecting to the PostgreSQL database...')
    conn = psycopg2.connect(**params)
    cur = conn.cursor()
    postgis.register(cur)
    updated_rows=0
    
    for record in records:
        
        qrystr = "INSERT INTO %s (%s) values %s ON CONFLICT ON CONSTRAINT %s DO UPDATE SET %s"
        if 'geom' in record.keys():
            the_geom=record['geom']
            record['geom']='GEOMSTR'
        upd=list()
        for k in record.keys():
            if k not in keycol:
                upd.append("{col}=EXCLUDED.{col}".format(col=k))
                           
        qry = cur.mogrify(qrystr, (AsIs(table),
                            AsIs(','.join(record.keys())),
                            tuple(record.values()),
                            AsIs(idx),
                            AsIs(','.join(upd))
                           ))
      
        
        if 'geom' in record.keys():
            qry=qry.decode('utf-8')
            qry=qry.replace("'GEOMSTR'",the_geom)
            record['geom']=the_geom
        
        if execute:
            cur.execute(qry)
            if cur.rowcount > 0:
                updated_rows = updated_rows + cur.rowcount
        else:
            print(qry)
            
    conn.commit()        
    cur.close()
    print("%s rows updated" % (updated_rows))
        
    if conn is not None:
        conn.close()
        print('Database connection closed.')

        

In [162]:
record={'site_label':'test','geom':"ST_GeomFromText('POINT(1 2)', 4326)"}
batch_upsert(params,"form.field_site",(record,),keycol='site_label', idx='field_site_pkey1',execute=False)

Connecting to the PostgreSQL database...
INSERT INTO form.field_site (site_label,geom) values ('test', ST_GeomFromText('POINT(1 2)', 4326)) ON CONFLICT ON CONSTRAINT field_site_pkey1 DO UPDATE SET geom=EXCLUDED.geom
0 rows updated
Database connection closed.


## Read spreadsheets
Each spreadsheet has a slightly different structure, so these scripts have to be adapted for each case.
### List of spreadsheets in directory

In [70]:
os.listdir(inputdir)

['~$Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm',
 'SthnNSWRF_data_bionet2.xlsx',
 '~$UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
 '~$UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
 'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
 'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
 'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
 '~$UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
 'UNSW_VegFireResponse_KNP AlpAsh.xlsx',
 'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
 'RobertsonRF_data_bionet2.xlsx',
 'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm']

In [71]:
valid_files = ['SthnNSWRF_data_bionet2.xlsx',
               'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
               'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
               'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
               'UNSW_VegFireResponse_KNP AlpAsh.xlsx',
               'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
               'RobertsonRF_data_bionet2.xlsx',
               'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm']

Here we create an index of worksheets and column headers for each file

In [72]:
wbindex=dict()
for workbook_name in valid_files:
    inputfile=inputdir / workbook_name
    # using data_only=True to get the calculated cell values
    wb = openpyxl.load_workbook(inputfile,data_only=True)
    wbindex[workbook_name]=dict()
    for ws in wb.worksheets:
        wbindex[workbook_name][ws._WorkbookChild__title]=list()
        for k in range(1,ws.max_column):
            wbindex[workbook_name][ws._WorkbookChild__title].append(ws.cell(row=1,column=k).value)
        

## List of sites and site visits

We will define a couple of functions to create list of records and a function to upload the records in the database.


### Insert into field_site table

This function will transform create an insert records from one row of the spreadsheet (`item`) using a column dictionary (`sw`). 

We need to consider:
- geom might be single or multiple points
- projection (SRID) is UTM GDA zone 55 or 56, latlong WGS84, or a different format
- elevation in m, or NULL 
- GPS uncertainty in meters, or NULL
- text description of GPS location, or NULL

In [97]:
def create_field_site_record(item,sw):
    site_label = item[sw['site_label']].value
    if site_label is not None and site_label != "Site":
        record={'site_label': site_label}
    
        for column in ('elevation','location_description', 'gps_uncertainty_m', 'gps_geom_description'):
            if column in sw.keys():
                val=item[sw[column]].value
                if val is not None and val not in ('na','NA'):
                    record[column] =  val


        if 'lons' in sw.keys():
            for xs in sw['lons']:
                xlon = item[lons].value
            for ys in sw['lats']:
                ylat = item[lats].value
            srid = 4326
            record['geom'] = "ST_GeomFromText('POINT({xlon} {ylat})', {srid})".format(xlon=xlon,ylat=ylat,srid=srid)

        if 'xs' in sw.keys():
            for xs in sw['xs']:
                xlon = item[xs].value
            for ys in sw['ys']:
                ylat = item[ys].value

            if 'fixed_utm_zone' in sw.keys():
                utm_zone=sw['fixed_utm_zone']
            else:
                utm_zone=item[sw['utm_zone']].value
            if  utm_zone == 56:
                srid = 28356
            elif utm_zone == 55:
                srid = 28355

            if srid is not None:
                 record['geom'] = "ST_GeomFromText('POINT({xlon} {ylat})', {srid})".format(xlon=xlon,ylat=ylat,srid=srid)

        return(record)

def import_records_from_workbook(filepath,workbook,worksheet,col_dictionary,create_record_function):
    wb = openpyxl.load_workbook(filepath / workbook, data_only=True)
    ws=wb[worksheet]
    row_count = ws.max_row
    records=list()
    for k in range(2,row_count):
        item=ws[k]
        record=create_record_function(item,col_dictionary)
        if record is not None:
            if type(record)==list:
                records.extend(record)
            elif type(record)==dict:
                records.append(record)
    return records
    

Test this function with one workbook:

In [98]:
worksheet='Site'
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
col_definitions={'site_label':0, 'location_description':10,'utm_zone':11, 'xs':(12,), 'ys':(13,),
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'elevation':38, 'visit_date':(2,4,5)}
survey="UplandBasalt"

records = import_records_from_workbook(inputdir,filename,worksheet,col_definitions,create_field_site_record) 

len(records)

28

In [99]:
records[1]

{'site_label': 'CRC13B7C',
 'elevation': 1276,
 'location_description': 'Mt Coricudgy, near towers',
 'gps_geom_description': '30 m transect for woody plants >2m tall, with two 5x5m subplots at either end (subplots 1 & 2) with 20x5m subplot in middle (subplot 3); non-woody spp and woodplants <2m tall counted in the two 5x5m subplots',
 'geom': "ST_GeomFromText('POINT(251947 6364871)', 28356)"}

### Insert into field_visits table

This function will create an insert record from one row of the spreadsheet (`item`) using a column dictionary (`sw`). 

We need to consider:
- iterate over multiple visit dates in different columns
- add survey name to the record
- text description of visit, or NULL
- observerlist to be split into multiple names (list or array)

In [100]:
def create_field_visit_record(item,sw):
    site_label = item[sw['site_label']].value
    records = list()
    for k in sw['visit_date']:
        visit_date = item[k].value
        if site_label is not None and site_label != "Site":
            if isinstance(visit_date, datetime):
                record = {'visit_id': site_label, 'visit_date': visit_date}
                if 'survey' in sw.keys():
                    record['survey_name'] = sw['survey']
                for column in ('visit_description', 'mainobserver', 'observerlist'):
                    if column in sw.keys():
                        val=item[sw[column]].value
                        if val is not None and val not in ('na','NA'):
                            if column=='observerlist':
                                val=val.split(',')
                            record[column] =  val
                records.append(record)
    return records

In [101]:
worksheet='Site'
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
col_definitions={'site_label':0, 'location_description':10,'utm_zone':11, 'xs':(12,), 'ys':(13,),
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':3,
                 'elevation':38, 'visit_date':(2,4,5,6,7,8,9),
                 'survey':"UplandBasalt"}

records = import_records_from_workbook(inputdir,filename,worksheet,col_definitions,create_field_visit_record) 

len(records)

42

In [102]:
records[1]

{'visit_id': 'CRC13B7C',
 'visit_date': datetime.datetime(2021, 2, 4, 0, 0),
 'survey_name': 'UplandBasalt',
 'observerlist': ['Alexandria Thomsen', ' Charlotte Mills']}

### Run trough all workbooks

In [103]:
wbindex.keys()

dict_keys(['SthnNSWRF_data_bionet2.xlsx', 'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx', 'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx', 'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx', 'UNSW_VegFireResponse_KNP AlpAsh.xlsx', 'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx', 'RobertsonRF_data_bionet2.xlsx', 'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm'])

In [104]:
cols=wbindex['SthnNSWRF_data_bionet2.xlsx']['Site']
for k in range(1,len(cols)):
    print("%s :: %s" % (k-1,cols[k-1]))
               

0 :: Site
1 :: Replicate
2 :: Observers (comma sep if >1)
3 :: Date of samping
4 :: Survey Date Replicate 1
5 :: Survey Date Replicate 2
6 :: Survey Date Replicate 3
7 :: Survey Date Replicate 4
8 :: Survey Date Replicate 5
9 :: Survey Date Replicate 6
10 :: Location text
11 :: Zone
12 :: Easting
13 :: Northing
14 :: GPS Precision (m)
15 :: Latitude
16 :: Longitude
17 :: Layout & GPS marker position
18 :: 2nd ref point Zone
19 :: 2nd ref point Easting
20 :: 2nd ref point Northing
21 :: 2nd ref point Position of GPS
22 :: 3rd ref point Zone
23 :: 3rd ref point Easting
24 :: 3rd ref point Northing
25 :: 3rd ref point Position of GPS
26 :: 4th ref point Zone
27 :: 4th ref point Easting
28 :: 4th ref point Northing
29 :: 4th ref point Position of GPS
30 :: Total sample area (sq.m)
31 :: Subquadrat area (sq.m)
32 :: # subquadrats
33 :: Substrate
34 :: Notes
35 :: Slope
36 :: Aspect
37 :: Elevation
38 :: Disturbance notes
39 :: Cwth TEC
40 :: NSW TEC
41 :: variant
42 :: Vegetation formation


In [165]:
worksheet='Site'
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
col_definitions={'site_label':0, 'location_description':10,'utm_zone':11, 'xs':(12,), 'ys':(13,),
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':3,
                 'elevation':38, 'visit_date':(2,4,5,6,7,8,9),
                 'survey':"UplandBasalt"}


records = import_records_from_workbook(inputdir,filename,worksheet,col_definitions,create_field_site_record) 
# function to create upsert queries with plain substitution to handle geom string
batch_upsert(params,"form.field_site",records,keycol=('site_label',), idx='field_site_pkey1',execute=True)

records = import_records_from_workbook(inputdir,filename,worksheet,col_definitions,create_field_visit_record) 
# this should work also without problem
batch_upsert(params,"form.field_visit",records,keycol=('visit_id','visit_date'), 
             idx='field_visit_pkey2',execute=True)



Connecting to the PostgreSQL database...
28 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
42 rows updated
Database connection closed.


In [307]:
col_definitions={'site_label':0, 'utm_zone':11, 'x':12, 'y':13,'elevation':37, 'visit_date':range(3,9), 
                 'survey':"RMK"}
filename='UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx'
insert_site_info_from_file(inputdir / "field-form" / filename,  worksheet, col_definitions, survey)


Connecting to the PostgreSQL database...
6 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
16 rows updated
Database connection closed.


In [334]:
col_definitions={'site_label':0, 'utm_zone':11, 'x':12, 'y':13,'elevation':37, 'visit_date':range(3,9)}
filename='SthnNSWRF_data_bionet2.xlsx'
worksheet='Site'
survey="SthnNSWRF"
insert_site_info_from_file(inputdir / "field-form" / filename,  worksheet, col_definitions, survey)


Connecting to the PostgreSQL database...
0 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
5 rows updated
Database connection closed.


In [308]:
filename='UNSW_VegFireResponse_KNP AlpAsh.xlsx'
survey="KNP AlpAsh"
insert_site_info_from_file(inputdir / "field-form" / filename,  worksheet, col_definitions, survey)


Connecting to the PostgreSQL database...
0 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
9 rows updated
Database connection closed.


In [309]:
filename='UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx'
survey="Alpine Bogs"
insert_site_info_from_file(inputdir / "field-form" / filename,  worksheet, col_definitions, survey)


Connecting to the PostgreSQL database...
6 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
6 rows updated
Database connection closed.


In [310]:
filename='RobertsonRF_data_bionet2.xlsx'
survey="Robertson RF"
insert_site_info_from_file(inputdir / "field-form" / filename,  worksheet, col_definitions, survey)


Connecting to the PostgreSQL database...
0 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
3 rows updated
Database connection closed.


In [325]:
filename='Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm'
col_definitions={'site_label':0, 'fixed_utm_zone':56, 'x':1, 'y':2,'elevation':4, 'visit_date':(8,)}
survey="NEWNES"
insert_site_info_from_file(inputdir / "field-form" / filename,  worksheet, col_definitions, survey)


Connecting to the PostgreSQL database...
0 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
54 rows updated
Database connection closed.


This does not work for the Yatteyattah workbook, need to reformat the data to make it work

In [336]:
filename='UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx'
worksheet='Site'
col_definitions={'site_label':0, 'utm_zone':11, 'x':12, 'y':13,'elevation':37, 'visit_date':range(3,9)}
survey="Yatteyattah"
insert_site_info_from_file(inputdir / "field-form" / filename,  worksheet, col_definitions, survey)


Connecting to the PostgreSQL database...
4 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
2 rows updated
Database connection closed.


## Quadrat samples

First we will define some helper functions to prepare records.

In [422]:

def create_field_sample_record(item,sw):
    species=item[sw['species']].value
    visit_date = item[sw['date']].value 
    if species is not None and isinstance(visit_date,datetime) :
        record={'visit_id': item[sw['visit_id']].value, 'sample_nr': item[sw['sample_nr']].value,
                'visit_date' : visit_date.date()}
        return(record)

def create_quadrat_sample_record(item,sw,add_cols):
    species=item[sw['species']].value
    spcode=item[sw['spcode']].value
    visit_date = item[sw['date']].value 
    if species is not None and isinstance(visit_date,datetime) :
        record={'visit_id': item[sw['visit_id']].value, 'sample_nr': item[sw['sample_nr']].value,
                'visit_date' : visit_date.date(), 'species': species, 'comments': list()}
        comms=list()
        #if filename.name is not None:
        #    comms.append("Imported from workbook %s using python script" % filename.name)
        #if worksheet is not None:
        #    comms.append("Imported from spreadsheet %s" % worksheet)
            
        if (isinstance(spcode, str) and spcode.isnumeric()) or isinstance(spcode,int):
            record['species_code']=spcode
        for k in add_cols.keys():
            if item[add_cols[k]].value is not None and item[add_cols[k]].value not in ('na','NA'):
                record[k]=item[add_cols[k]].value
        ro=item[sw['resprout_organ']].value
        if ro is not None and ro not in ('na','NA'):
            if ro in valid_organ:
                record['resprout_organ']=ro
            elif ro.capitalize() in valid_organ:
                record['resprout_organ']=ro.capitalize()
            else:
                comms.append("resprout organ written as %s" % ro)
        sb=item[sw['seedbank']].value
        if sb is not None and sb not in ('na','NA'):
            if sb in valid_seedbank:
                record['seedbank']=sb
            elif sb.capitalize() in valid_seedbank:
                record['seedbank']=sb.capitalize()
            else:
                comms.append("seedbank written as %s" % sb)
        
        if len(comms)>0:
            record["comments"]=comms 
        return(record)

Another function to follow all steps for reading data and importing to database

In [423]:
def insert_species_info_from_file(params,filename, worksheet, col_dictionary, extra_col_dictionary):
    wb = openpyxl.load_workbook(filename, data_only=True)
    ws=wb[worksheet]
    row_min = 3
    row_count = ws.max_row

    print('Connecting to the PostgreSQL database...')
    conn = psycopg2.connect(**params)
    cur = conn.cursor()
    updated_rows=0

    insert_site = 'insert into form.field_site (site_label) values (%s) ON CONFLICT DO NOTHING'
    insert_visit = 'insert into form.field_visit (visit_id,visit_date,createdbysystemuserid,datecreated,updatedbysystemuserid,dateupdated) values (%s,%s,1,CURRENT_TIMESTAMP(0),1,CURRENT_TIMESTAMP(0)) ON CONFLICT DO NOTHING'
    insert_fs = 'insert into form.field_samples (%s) values %s ON CONFLICT DO NOTHING'
    insert_qs = 'insert into form.quadrat_samples (%s) values %s ON CONFLICT DO NOTHING'

    for k in range(row_min,row_count):
        item = ws[k]
        record_qs = create_quadrat_sample_record(item,col_dictionary,extra_col_dictionary) 
        if record_qs is not None:
            if (record_qs['visit_id'],record_qs['visit_date']) not in visits:
                if record_qs['visit_id'] not in sites:
                    #print(insert_site % record_qs['visit_id'])
                    cur.execute(insert_site,(record_qs['visit_id'],))
                    cur.execute(insert_visit,(record_qs['visit_id'],record_qs['visit_date']))
                record_fs = create_field_sample_record(item,col_definitions)
                cur.execute(insert_fs, (AsIs(','.join(record_fs.keys())), tuple(record_fs.values())))
                updated_rows = updated_rows+cur.rowcount
            cur.execute(insert_qs, (AsIs(','.join(record_qs.keys())), tuple(record_qs.values())))
            updated_rows = updated_rows+cur.rowcount

    conn.commit()        
    cur.close()
    print("%s lines scanned %s updated rows" % (row_max-row_min,updated_rows))      
    if conn is not None:
        conn.close()
        print('Database connection closed.')

Connect to database and update list of visits, valid organs and seedbank vocabulary:

In [382]:

# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params)
cur = conn.cursor()
cur.execute('SELECT DISTINCT visit_id,visit_date FROM form.field_visit ORDER by visit_id, visit_date')
visits = cur.fetchall()
sites = list()
for visit in visits:
    if visit[0] not in sites:
        sites.append(visit[0])
#valid_organ=('Epicormic', 'Apical', 'Lignotuber', 'Basal','Tuber','Tussock','Short rhizome', 'Long rhizome or root sucker', 'Stolon', 'None', 'Other')
#valid_seedbank=('Soil-persistent', 'Transient', 'Canopy','Non-canopy','Other')

cur.execute("SELECT enumlabel FROM pg_enum e LEFT JOIN pg_type t ON e.enumtypid=t.oid where typname='resprout_organ_vocabulary';")
valid_organ_list = cur.fetchall()
valid_organ = [item for t in valid_organ_list for item in t]

cur.execute("SELECT enumlabel FROM pg_enum e LEFT JOIN pg_type t ON e.enumtypid=t.oid where typname='seedbank_vocabulary';")
valid_seedbank_list = cur.fetchall()
valid_seedbank = [item for t in valid_seedbank_list for item in t]


cur.close()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Connecting to the PostgreSQL database...
Database connection closed.


In [383]:
cols=wbindex['SthnNSWRF_data_bionet2.xlsx']['Floristics']
for k in range(1,len(cols)):
    print("%s :: %s" % (k-1,cols[k-1]))

0 :: Updated 14/10/2019
1 :: None
2 :: None
3 :: First Date
4 :: Last Date
5 :: Sub plot
6 :: Type
7 :: Species code
8 :: Common Name
9 :: Scientific Name
10 :: Cover score
11 :: Abundance score
12 :: Stratum
13 :: Growth form
14 :: Height min
15 :: Height max
16 :: % Cover actual
17 :: Recovery organ
18 :: Seedbank
19 :: None
20 :: Abund actual
21 :: None
22 :: None
23 :: Number reproductive
24 :: None
25 :: None
26 :: Number reproductive
27 :: Estimate Code
28 :: Source Code
29 :: Specimen Rego
30 :: Specimen Location
31 :: External Key


In [424]:
filename='SthnNSWRF_data_bionet2.xlsx'
worksheet='Floristics'
col_dict={'species':9, 'spcode':7, 'date':3, 'visit_id':1, 'sample_nr':5,'resprout_organ':17, 'seedbank':18}
extra_col_dict={'adults_unburnt':19,'resprouts_live':20,'resprouts_died':21,'resprouts_kill':22,
                 'resprouts_reproductive':23,'recruits_live':24, 'recruits_died':25, 'recruits_reproductive':26,
                 'species_notes':27}

insert_species_info_from_file(params=params, filename=(inputdir / "field-form" / filename), worksheet=worksheet,
                              col_dictionary = col_dict, extra_col_dictionary = extra_col_dict)

Connecting to the PostgreSQL database...


ForeignKeyViolation: insert or update on table "quadrat_samples" violates foreign key constraint "quadrat_samples_visit_id_visit_date_sample_nr_fkey1"
DETAIL:  Key (visit_id, visit_date, sample_nr)=(DeuaRF, 2021-11-30, 1) is not present in table "field_samples".


In [345]:
col_definitions={'species':5, 'spcode':6, 'date':4, 'visit_id':0, 'sample_nr':1,'resprout_organ':7, 'seedbank':8}

extra_col_definitions={'adults_unburnt':9,'resprouts_live':10,'resprouts_died':11,'resprouts_kill':12,
                 'resprouts_reproductive':13,'recruits_live':14, 'recruits_died':15, 'recruits_reproductive':16,
                 'species_notes':17}


workbook_name = "Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm"
spreadsheet_name = 'Floristics'
inputfile=inputdir / "field-form" / workbook_name

# using data_only=True to get the calculated cell values
# wb = openpyxl.load_workbook(inputfile,data_only=True)
ws = wb[spreadsheet_name]

item=ws[2364]
print(create_field_sample_record(item,col_definitions))
create_quadrat_sample_record(item,col_definitions,extra_col_definitions)

{'visit_id': 'BS2', 'sample_nr': 1, 'visit_date': datetime.date(2020, 11, 17)}


{'visit_id': 'BS2',
 'sample_nr': 1,
 'visit_date': datetime.date(2020, 11, 17),
 'species': 'Empodisma minus',
 'comments': ['Imported from workbook Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm using python script',
  'Imported from spreadsheet Floristics'],
 'species_code': 5532,
 'resprouts_live': 16,
 'resprouts_died': 0,
 'resprouts_reproductive': 0}

In [340]:
valid_seedbank

['Soil-persistent', 'Transient', 'Canopy', 'Non-canopy', 'Other']

In [119]:
#row_min = 3001

# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params)
cur = conn.cursor()

insert_fs = 'insert into form.field_samples (%s) values %s ON CONFLICT DO NOTHING'
insert_qs = 'insert into form.quadrat_samples (%s) values %s ON CONFLICT DO NOTHING'
updated_rows=0

for i in range(row_min, row_max):
    item = ws[i]
    record = create_quadrat_sample_record(item,col_definitions,extra_col_definitions)
    if record is not None:
        if (record['visit_id'],record['visit_date']) not in visits:
            record_fs = create_field_sample_record(item,col_definitions)
            cur.execute(insert_fs, (AsIs(','.join(record_fs.keys())), tuple(record_fs.values())))
            updated_rows = updated_rows+cur.rowcount
        cur.execute(insert_qs, (AsIs(','.join(record.keys())), tuple(record.values())))
        updated_rows = updated_rows+cur.rowcount

conn.commit()        
cur.close()
print("%s lines scanned %s updated rows" % (row_max-row_min,updated_rows))      
if conn is not None:
    conn.close()
    print('Database connection closed.')

Connecting to the PostgreSQL database...
5234 lines scanned 3149 updated rows
Database connection closed.


In [None]:
count of fully scorched & resprouting individuals
count of fully scorched & fire-killed individuals
count of partially scorched & resprouting individuals
count of partially scorched & fire-killed individuals


In [389]:
item = ws[k]

In [409]:
item

(<Cell 'Floristics'.A32>,
 <Cell 'Floristics'.B32>,
 <Cell 'Floristics'.C32>,
 <Cell 'Floristics'.D32>,
 <Cell 'Floristics'.E32>,
 <Cell 'Floristics'.F32>,
 <Cell 'Floristics'.G32>,
 <Cell 'Floristics'.H32>,
 <Cell 'Floristics'.I32>,
 <Cell 'Floristics'.J32>,
 <Cell 'Floristics'.K32>,
 <Cell 'Floristics'.L32>,
 <Cell 'Floristics'.M32>,
 <Cell 'Floristics'.N32>,
 <Cell 'Floristics'.O32>,
 <Cell 'Floristics'.P32>,
 <Cell 'Floristics'.Q32>,
 <Cell 'Floristics'.R32>,
 <Cell 'Floristics'.S32>,
 <Cell 'Floristics'.T32>,
 <Cell 'Floristics'.U32>,
 <Cell 'Floristics'.V32>,
 <Cell 'Floristics'.W32>,
 <Cell 'Floristics'.X32>,
 <Cell 'Floristics'.Y32>,
 <Cell 'Floristics'.Z32>,
 <Cell 'Floristics'.AA32>,
 <Cell 'Floristics'.AB32>,
 <Cell 'Floristics'.AC32>,
 <Cell 'Floristics'.AD32>,
 <Cell 'Floristics'.AE32>,
 <Cell 'Floristics'.AF32>)

In [415]:
(inputdir / "field-form" / filename).name

'SthnNSWRF_data_bionet2.xlsx'