# Read files summarising field work and update database
These Excel workbooks were imported on February 2022.

The scripts documented here have been created to:

- Read data from spreadsheets with field-work data
- Create records for data import into the database
- Insert or update records in the database

This jupyter notebook deals with the first step, which is importing field site and visit information. A second notebook deals with importing information from `quadrats`.

## Read workbooks
Each spreadsheet has a slightly different structure, so these scripts have to be adapted for each case.

### List of workbooks/spreadsheets in directory

## Set-up
Load libraries 

In [1]:
import openpyxl
from pathlib import Path
import os
from datetime import datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extensions import AsIs
#import postgis

Define path to workbooks

In [2]:
repodir = Path("../") 
inputdir = repodir / "data" / "field-form"

### DB connection parameters and helper functions
 
 Database credentials are stored in a database.ini file

In [3]:
filename = repodir / 'secrets' / 'database.ini'
section = 'aws-lght-sl'

# create a parser
parser = ConfigParser()
# read config file
parser.read(filename)

# get section, default to postgresql
db = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        db[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

params = db

Check list of observer ids:

In [32]:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params)
cur = conn.cursor()

cur.execute("SELECT userkey,givennames,surname FROM form.observerid;")
observerid = cur.fetchall()
cur.close()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Connecting to the PostgreSQL database...
Database connection closed.


In [33]:
observerid

[(7, 'David', 'Keith'),
 (9, 'D.', 'Benson'),
 (10, 'L.', 'Watts,'),
 (11, 'T.', 'Manson'),
 (12, 'Jackie', 'Miles'),
 (13, 'Robert', 'Kooyman'),
 (8, 'Alexandria', 'Thomsen'),
 (14, 'Jedda', 'Lemmen')]

Define a function to batch process insert or update queries:

In [4]:
def batch_upsert(params,table,records,keycol,idx, execute=False,useconn=None):
    if useconn is None:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
    else:
        conn = useconn
    cur = conn.cursor()
    #postgis.register(cur)
    updated_rows=0

    for record in records:
        if len(record.keys())>len(keycol):
            if 'geom' in record.keys():
                the_geom=record['geom']
                record['geom']='GEOMSTR'
            if idx is not None:
                qrystr = "INSERT INTO %s (%s) values %s ON CONFLICT ON CONSTRAINT %s DO UPDATE SET %s"
                upd=list()
                for k in record.keys():
                    if k not in keycol:
                        upd.append("{col}=EXCLUDED.{col}".format(col=k))
                qry = cur.mogrify(qrystr, (AsIs(table),
                                AsIs(','.join(record.keys())),
                                tuple(record.values()),
                                AsIs(idx),
                                AsIs(','.join(upd))
                               ))
            else:
                qrystr = "INSERT INTO %s (%s) values %s ON CONFLICT DO NOTHING"
                qry = cur.mogrify(qrystr, (AsIs(table),
                                AsIs(','.join(record.keys())),
                                tuple(record.values())
                               ))

            if 'geom' in record.keys():
                qry=qry.decode('utf-8')
                qry=qry.replace("'GEOMSTR'",the_geom)
                record['geom']=the_geom

            if execute:
                cur.execute(qry)
                if cur.rowcount > 0:
                    updated_rows = updated_rows + cur.rowcount
            else:
                print(qry)
            
    conn.commit()        
    cur.close()
    print("%s rows updated" % (updated_rows))
        
    if useconn is None and conn is not None:
        conn.close()
        print('Database connection closed.')


Just a test with random data, use `execute=False` to print the query:

In [5]:
record={'site_label':'test','geom':"ST_GeomFromText('POINT(1 2)', 4326)"}
batch_upsert(params,"form.field_site",(record,),keycol=('site_label',), idx='field_site_pkey1',execute=False)

Connecting to the PostgreSQL database...
INSERT INTO form.field_site (site_label,geom) values ('test', ST_GeomFromText('POINT(1 2)', 4326)) ON CONFLICT ON CONSTRAINT field_site_pkey1 DO UPDATE SET geom=EXCLUDED.geom
0 rows updated
Database connection closed.


In [6]:
batch_upsert(params,"form.field_site",(record,),keycol=('site_label',), idx=None,execute=False)

Connecting to the PostgreSQL database...
INSERT INTO form.field_site (site_label,geom) values ('test', ST_GeomFromText('POINT(1 2)', 4326)) ON CONFLICT DO NOTHING
0 rows updated
Database connection closed.


## Functions to read records from workbooks
Each spreadsheet has a slightly different structure, so these scripts have to be adapted for each case.

### List of workbooks/spreadsheets in directory

In [7]:
os.listdir(inputdir)

['~$Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm',
 'SthnNSWRF_data_bionet2.xlsx',
 'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
 'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
 'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
 '~$UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
 'UNSW_VegFireResponse_KNP AlpAsh.xlsx',
 'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
 'RobertsonRF_data_bionet2.xlsx',
 'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm']

In [8]:
valid_files = ['SthnNSWRF_data_bionet2.xlsx',
               'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
               'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
               'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx',
               'UNSW_VegFireResponse_KNP AlpAsh.xlsx',
               'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
               'RobertsonRF_data_bionet2.xlsx',
               'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm']

Here we create an index of worksheets and column headers for each file

In [9]:
wbindex=dict()
for workbook_name in valid_files:
    inputfile=inputdir / workbook_name
    # using data_only=True to get the calculated cell values
    wb = openpyxl.load_workbook(inputfile,data_only=True)
    wbindex[workbook_name]=dict()
    for ws in wb.worksheets:
        wbindex[workbook_name][ws._WorkbookChild__title]=list()
        for k in range(1,ws.max_column):
            wbindex[workbook_name][ws._WorkbookChild__title].append(ws.cell(row=1,column=k).value)
        

### Functions to read records in a spreadsheet
We need a wrapping function to apply a lower level function (`create_record_function`) to all rows in a `worksheet` of the selected `workbook` using a dictionary `col_dictionary`, we add a `**kwargs` to pass additional arguments to the lower level function:


In [10]:
def import_records_from_workbook(filepath,workbook,worksheet,col_dictionary,create_record_function,**kwargs):
    wb = openpyxl.load_workbook(filepath / workbook, data_only=True)
    ws=wb[worksheet]
    row_count = ws.max_row
    records=list()
    for k in range(2,row_count):
        item=ws[k]
        record=create_record_function(item,col_dictionary,**kwargs)
        if record is not None:
            if type(record)==list:
                records.extend(record)
            elif type(record)==dict:
                records.append(record)
    return records

#### Insert into field_site table

This function will transform create an insert records from one row of the spreadsheet (`item`) using a column dictionary (`sw`). 

We need to consider:
- geom might be single or multiple points
- projection (SRID) is UTM GDA zone 55 or 56, latlong WGS84, or a different format
- elevation in m, or NULL 
- GPS uncertainty in meters, or NULL
- text description of GPS location, or NULL

In [11]:
def create_field_site_record(item,sw):
    site_label = item[sw['site_label']].value
    if site_label is not None and site_label != "Site":
        record={'site_label': site_label}
    
        for column in ('elevation','location_description', 'gps_uncertainty_m', 'gps_geom_description'):
            if column in sw.keys():
                val=item[sw[column]].value
                if val is not None and val not in ('na','NA'):
                    record[column] =  val
    
        if 'lons' in sw.keys():
            for xs in sw['lons']:
                xlon = item[xs].value
            for ys in sw['lats']:
                ylat = item[ys].value
            srid = 4326


        if 'xs' in sw.keys():
            for xs in sw['xs']:
                xlon = item[xs].value
            for ys in sw['ys']:
                ylat = item[ys].value

            if 'fixed_utm_zone' in sw.keys():
                utm_zone=sw['fixed_utm_zone']
            else:
                utm_zone=item[sw['utm_zone']].value
            if  utm_zone == 56:
                srid = 28356
            elif utm_zone == 55:
                srid = 28355

   
        if srid is not None and xlon is not None and ylat is not None:
            record['geom'] = "ST_GeomFromText('POINT({xlon} {ylat})', {srid})".format(xlon=xlon,ylat=ylat,srid=srid)

        return(record)


    

Test this function with one workbook:

In [12]:
worksheet='Site'
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
col_definitions={'site_label':0, 'location_description':10,'utm_zone':11, 'xs':(12,), 'ys':(13,),
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'elevation':38, 'visit_date':(2,4,5)}
survey="UplandBasalt"

records = import_records_from_workbook(inputdir,filename,worksheet,col_definitions,create_field_site_record) 

len(records)

28

Check details from one record:

In [13]:
records[12]

{'site_label': 'MWL03',
 'location_description': 'Wynnes Rock Lookout Road',
 'gps_geom_description': '30 m transect for woody plants >2m tall, with two 5x5m subplots at either end (subplots 1 & 2) with 20x5m subplot in middle (subplot 3); non-woody spp and woodplants <2m tall counted in the two 5x5m subplots',
 'geom': "ST_GeomFromText('POINT(256134 6288811)', 28356)"}

#### Insert into field_visits table

This function will create an insert record from one row of the spreadsheet (`item`) using a column dictionary (`sw`). 

We need to consider:
- iterate over multiple visit dates in different columns
- add survey name to the record
- text description of visit, or NULL
- observerlist to be split into multiple names (list or array)

In [14]:
def create_field_visit_record(item,sw):
    site_label = item[sw['site_label']].value
    records = list()
    for k in sw['visit_date']:
        visit_date = item[k].value
        if site_label is not None and site_label != "Site":
            if isinstance(visit_date, datetime):
                record = {'visit_id': site_label, 'visit_date': visit_date}
                if 'survey' in sw.keys():
                    record['survey_name'] = sw['survey']
                for column in ('visit_description', 'mainobserver', 'observerlist','replicate_nr'):
                    if column in sw.keys():
                        val=item[sw[column]].value
                        if val is not None and val not in ('na','NA'):
                            if column=='observerlist':
                                val=val.split(',')
                            record[column] =  val
                records.append(record)
    return records

Test of the function with one workbook/worksheet:

In [15]:
worksheet='Site'
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
col_definitions={'site_label':0, 'location_description':10,'utm_zone':11, 'xs':(12,), 'ys':(13,),
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':3,
                 'elevation':38, 'visit_date':(2,4,5,6,7,8,9),
                 'survey':"UplandBasalt"}

records = import_records_from_workbook(inputdir,filename,worksheet,col_definitions,create_field_visit_record) 

len(records)

42

In [16]:
records[21]

{'visit_id': 'MWL04',
 'visit_date': datetime.datetime(2000, 12, 14, 0, 0),
 'survey_name': 'UplandBasalt',
 'observerlist': ['Alexandria Thomsen', ' Stephan Wilson']}

### Import records to database
I create another function that will call the above functions to process data from a workbook into records that are then imported into the database.

This function passes the keyword arguments `**kwargs` to the next functions. This works, because the structure of both the `create_record_function`s is similar and we can define the column correspondence in the same dictionary as we will see in the examples below:


In [17]:
def run_imports(**kwargs):
    records = import_records_from_workbook(**kwargs,create_record_function=create_field_site_record) 
    # function to create upsert queries with plain substitution to handle geom string
    batch_upsert(params,"form.field_site",records,keycol=('site_label',), idx='field_site_pkey1',execute=True)
    
    records = import_records_from_workbook(**kwargs,create_record_function=create_field_visit_record) 
    # this should work also without problem
    batch_upsert(params,"form.field_visit",records,keycol=('visit_id','visit_date'), idx='field_visit_pkey2',execute=True)




## Processing data from all workbooks

In the following section, I proceed to iterate through all the workbooks, adjusting code for each case. 

Here is the list of available workbooks (again):

In [18]:
wbindex.keys()

dict_keys(['SthnNSWRF_data_bionet2.xlsx', 'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx', 'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx', 'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx', 'UNSW_VegFireResponse_KNP AlpAsh.xlsx', 'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx', 'RobertsonRF_data_bionet2.xlsx', 'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm'])

If we select one workbook, we can retrieve a list of column names that we will use in our column definitions for each function:

In [19]:
cols=wbindex['UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx']['Site']
for k in range(1,len(cols)):
    print("%s :: %s" % (k-1,cols[k-1]))
               

0 :: Site
1 :: Replicate
2 :: Observers (comma sep if >1)
3 :: Date of samping
4 :: Survey Date Replicate 1
5 :: Survey Date Replicate 2
6 :: Survey Date Replicate 3
7 :: Survey Date Replicate 4
8 :: Survey Date Replicate 5
9 :: Survey Date Replicate 6
10 :: Location text
11 :: Zone
12 :: Easting
13 :: Northing
14 :: GPS Precision (m)
15 :: Latitude
16 :: Longitude
17 :: Layout & GPS marker position
18 :: 2nd ref point Zone
19 :: 2nd ref point Easting
20 :: 2nd ref point Northing
21 :: 2nd ref point Position of GPS
22 :: 3rd ref point Zone
23 :: 3rd ref point Easting
24 :: 3rd ref point Northing
25 :: 3rd ref point Position of GPS
26 :: 4th ref point Zone
27 :: 4th ref point Easting
28 :: 4th ref point Northing
29 :: 4th ref point Position of GPS
30 :: Total sample area (sq.m)
31 :: Subquadrat area (sq.m)
32 :: # subquadrats
33 :: Substrate
34 :: Notes
35 :: Slope
36 :: Aspect
37 :: Elevation
38 :: Disturbance notes
39 :: Cwth TEC
40 :: NSW TEC
41 :: variant
42 :: Vegetation formation


This helps us to determine which column numbers corresponds to the field that we want to extract from the spreadsheet. 
Check the number of rows updated in each case, and compare the changes in the database.

### Upland / Basalt

- 28 sites:
    - all with location description and coordinates, 
    - elevation data for all but three.
- 42 visits:
    - all visited by Alexandria Thomsen
    - most recent visit in 2021
    - older visits including values from the 90's ???

In [20]:
run_imports(filepath=inputdir,
            workbook='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
            worksheet='Site',
            col_dictionary={'site_label':0, 'location_description':10,'utm_zone':11, 'xs':(12,), 'ys':(13,),
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':3,'replicate_nr':1,
                 'elevation':38, 'visit_date':(2,4,5,6,7,8,9),
                 'survey':"UplandBasalt"})

Connecting to the PostgreSQL database...
28 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
42 rows updated
Database connection closed.


### RMK

- 16 sites:
    - all with location description and coordinates, 
    - all but one with elevation data
- 16 visits: 
    - each site visited once between September 2020 and August 2021
    - main observer is Robert Kooyman, except one by T. Manson


In [21]:
run_imports(filepath=inputdir,
            workbook='UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
            worksheet='Site',
            col_dictionary={'site_label':0,'location_description':10, 
                            'utm_zone':11,'xs':(12,), 'ys':(13,), 'elevation':37, 
                            'gps_uncertainty_m':14, 'gps_geom_description':17,
                            'visit_date':range(3,9), 'replicate_nr':1,'observerlist':2,'survey':"RMK"})

Connecting to the PostgreSQL database...
16 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
16 rows updated
Database connection closed.


### Southern NSW rain forest

- 5 sites, all with location description and coordinates, elevation data missing for four sites
- each site visited once, November/December 2021, main observer is David Keith

In [22]:
run_imports(filepath=inputdir,
            workbook='SthnNSWRF_data_bionet2.xlsx',
            worksheet='Site',
            col_dictionary={'site_label':0,'location_description':10, 'visit_date':range(3,9), 
                'lons':(16,), 'lats':(15,), 'elevation':37,
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':2,'replicate_nr':1,
                 'survey':"SthnNSWRF"})

Connecting to the PostgreSQL database...
5 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
5 rows updated
Database connection closed.


### KNP Alpine Ash

- 9 sites (but two shared with Robertson RF?), 
    - AlpAsh69 is missing site description, 
    - all with coordinates and elevation
- All sites visited once
    - all sites visited in April 2021
    - Main observer is Jackie Miles, except for AlpAsh_69 (missing)

In [23]:
run_imports(filepath=inputdir,
            workbook='UNSW_VegFireResponse_KNP AlpAsh.xlsx',
            worksheet='Site',
            col_dictionary={'site_label':0,'location_description':10, 'visit_date':range(3,9), 
               'utm_zone':11, 'xs':(12,), 'ys':(13,), 'elevation':37,
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':2,'replicate_nr':1,
                 'survey':"KNP AlpAsh"})


Connecting to the PostgreSQL database...
9 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
9 rows updated
Database connection closed.


### Alpine bogs

- Six sites, all with full information (description, elevation, coords)
- All sites visited once in 2021:
    - two sites by Jackie Miles in March
    - four sites by David Keith between October - December

In [24]:
run_imports(filepath=inputdir,
            workbook='UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
            worksheet='Site',
            col_dictionary={'site_label':0,'location_description':10, 'visit_date':range(3,9), 
               'utm_zone':11, 'xs':(12,), 'ys':(13,), 'elevation':37,
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':2,'replicate_nr':1,
                 'survey':"Alpine Bogs"})


Connecting to the PostgreSQL database...
6 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
6 rows updated
Database connection closed.


### Robertson RF 

- Two sites
    - both included in file for KNP AlpAsh? duplicated codes? same entries?
    - both with full information
- Three visits
    - both sites visited in January 2021
    - one site visited in August 2002 ?
    - three different main observers: David Keith, Robert Kooyman and T. Mason

In [25]:
run_imports(filepath=inputdir,
            workbook='RobertsonRF_data_bionet2.xlsx',
            worksheet='Site',
            col_dictionary={'site_label':0,'location_description':10, 'visit_date':range(3,9), 
               'utm_zone':11, 'xs':(12,), 'ys':(13,), 'elevation':37,
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':2,'replicate_nr':1,
                 'survey':"Robertson RF"})

Connecting to the PostgreSQL database...
2 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
3 rows updated
Database connection closed.


### Newness
This file has a different format, and includes many empty site records, had to tweak functions and troubleshoot a bit.
Much slower processing

- 20 sites
    - description missing for six sites
    - all with elevation and coordinates
- 54 visits (!)
    - each site visited two or three times 
    - visits between 2020 and 2021
    - observer information is missing or incomplete in most visits

In [26]:
filename='Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm'
col_definitions={'site_label':0, 'visit_date':(8,), 'fixed_utm_zone':56, 'xs':(1,), 'ys':(2,), 'elevation':4, 'survey':"NEWNES"}

run_imports(filepath=inputdir,
            workbook=filename,
            worksheet='Site',
            col_dictionary=col_definitions)


Connecting to the PostgreSQL database...
20 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
54 rows updated
Database connection closed.


### Yatteyattah 
This workbook had no 'Site' worksheet, had to reformat the data from 'Sample' and add the worksheet to make it work, also changed the format in "date of sampling" column.

- 6 sites, all with full information 
- 6 visits
    - two sites visited in July 2020
    - four sites visited in February 2021
    - all visits by Jackie Miles

In [27]:
filename='UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton.xlsx'
col_definitions={'site_label':0,'location_description':10, 'utm_zone':11,'elevation':37, 'visit_date':range(3,9), 
                'xs':(12,), 'ys':(13,),
                 'gps_uncertainty_m':14,
                 'gps_geom_description':17,
                 'observerlist':2,'replicate_nr':1,
                 'survey':"Yatteyattah"}
run_imports(filepath=inputdir,
            workbook=filename,
            worksheet='Site',
            col_dictionary=col_definitions)


Connecting to the PostgreSQL database...
6 rows updated
Database connection closed.
Connecting to the PostgreSQL database...
6 rows updated
Database connection closed.


### Sites without survey

Some sites / visits in the database with missing information:

Site | Date | samples | species
---|---|---|---
BER1 | 2020-02-06 | 1 | No data added
BER2 | 2020-02-25 | 1 | No data added
BS1 | 2020-06-03 | 20 | No data added
Duffy | 2020-01-14 | 1 | No data added
Ka1 | 2020-01-07 | 1 | No data added
Ka3 | 2020-01-09 | 1 | No data added
Ka3_b | 2020-01-08 | 1 | No data added
Ka4 | 2020-02-04 | 1 | No data added
Ka5 | 2020-02-05 | 2 | No data added
LC1 | 2020-01-15 | 1 | No data added
LC2 | 2020-02-14 | 1 | No data added
Madden1 | 2020-02-26 | 1 | No data added
R0Y005 | 2019-12-12 | 2 | No data added
ROY001 | 2019-10-25 | 2 | No data added
ROY002 | 2019-10-25 | 1 | No data added
ROY003 | 2019-12-05 | 1 | No data added
ROY004 | 2019-12-11 | 1 | No data added
SCCJB13 | 2020-12-07 | 4 | No data added
SCCJB37-Near | 2020-12-07 | 4 | No data added
UppClydeRF1 | 2021-11-29 | 4 | 27
UppClydeRF1 | 2021-12-01 | 1 | 1

### Fill main observer id

We run this after import in order to translate the list of observers into a integer value for the main observer

In [43]:

print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params)
cur = conn.cursor()

updated_rows=0
qrystr = "UPDATE form.field_visit set mainobserver=%s WHERE observerlist[1]='%s' AND mainobserver is NULL;"
for k in observerid:
    qry = qrystr % (k[0]," ".join(k[1:3]))
    print(qry)
    cur.execute(qry)
    if cur.rowcount > 0:
        updated_rows = updated_rows + cur.rowcount
        print("%s rows updated" % updated_rows)

cur.close()
conn.commit()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Connecting to the PostgreSQL database...
UPDATE form.field_visit set mainobserver=7 WHERE observerlist[1]='David Keith' AND mainobserver is NULL;
10 rows updated
UPDATE form.field_visit set mainobserver=9 WHERE observerlist[1]='D. Benson' AND mainobserver is NULL;
UPDATE form.field_visit set mainobserver=10 WHERE observerlist[1]='L. Watts,' AND mainobserver is NULL;
UPDATE form.field_visit set mainobserver=11 WHERE observerlist[1]='T. Manson' AND mainobserver is NULL;
UPDATE form.field_visit set mainobserver=12 WHERE observerlist[1]='Jackie Miles' AND mainobserver is NULL;
24 rows updated
UPDATE form.field_visit set mainobserver=13 WHERE observerlist[1]='Robert Kooyman' AND mainobserver is NULL;
30 rows updated
UPDATE form.field_visit set mainobserver=8 WHERE observerlist[1]='Alexandria Thomsen' AND mainobserver is NULL;
71 rows updated
UPDATE form.field_visit set mainobserver=14 WHERE observerlist[1]='Jedda Lemmen' AND mainobserver is NULL;
72 rows updated
Database connection closed.


In [44]:
updated_rows

72