# Read files summarising field work and update database
These Excel workbooks were imported on February 2022.

The scripts documented here have been created to:

- Read data from spreadsheets with field-work data
- Create records for data import into the database
- Insert or update records in the database


## Set-up
Load libraries 

In [3]:
import openpyxl
from pathlib import Path
import os
from datetime import datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor
from psycopg2.extensions import AsIs
import re
#import postgis
import pandas as pd
import copy

Define path to workbooks

In [4]:
repodir = Path("../../") 
inputdir = repodir / "data" / "field-form"

 ### DB connection parameters and helper functions
 
 Database credentials are stored in a database.ini file

In [5]:
filename = repodir / 'secrets' / 'database.ini'
section = 'aws-lght-sl'

# create a parser
parser = ConfigParser()
# read config file
parser.read(filename)

# get section, default to postgresql
db = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        db[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

params = db

Define a function to batch process insert or update queries:

In [6]:
def batch_upsert(params,table,records,keycol,idx, execute=False,useconn=None):
    if useconn is None:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
    else:
        conn = useconn
    cur = conn.cursor()
    #postgis.register(cur)
    updated_rows=0
    errors=dict()
    for record in records:
        if len(record.keys())>len(keycol):
            if 'geom' in record.keys():
                the_geom=record['geom']
                record['geom']='GEOMSTR'
            if idx is not None:
                qrystr = "INSERT INTO %s (%s) values %s ON CONFLICT ON CONSTRAINT %s DO UPDATE SET %s"
                upd=list()
                for k in record.keys():
                    if k not in keycol:
                        upd.append("{col}=EXCLUDED.{col}".format(col=k))
                qry = cur.mogrify(qrystr, (AsIs(table),
                                AsIs(','.join(record.keys())),
                                tuple(record.values()),
                                AsIs(idx),
                                AsIs(','.join(upd))
                               ))
            else:
                qrystr = "INSERT INTO %s (%s) values %s ON CONFLICT DO NOTHING"
                qry = cur.mogrify(qrystr, (AsIs(table),
                                AsIs(','.join(record.keys())),
                                tuple(record.values())
                               ))

            if 'geom' in record.keys():
                qry=qry.decode('utf-8')
                qry=qry.replace("'GEOMSTR'",the_geom)
                record['geom']=the_geom

            if execute:
               # try:
                cur.execute(qry)
               # except psycopg2.errors.CheckViolation as error:
               #     if type(error) in errors.keys():
               #         errors[type(error)]=errors[type(error)]+1
                #    else:
                #        errors[type(error)]=1
                #except psycopg2.errors.CheckViolation as error:
                #    print(error)
                if cur.rowcount > 0:
                    updated_rows = updated_rows + cur.rowcount
            else:
                print(qry)
            
    conn.commit()        
    cur.close()
    print("%s rows updated" % (updated_rows))
    print(errors)
    if useconn is None and conn is not None:
        conn.close()
        print('Database connection closed.')

        

Just a test with random data, use `execute=False` to print the query:

In [7]:
record={'site_label':'test','geom':"ST_GeomFromText('POINT(1 2)', 4326)"}
batch_upsert(params,"form.field_site",(record,),keycol=('site_label',), idx='field_site_pkey1',execute=False)

Connecting to the PostgreSQL database...
INSERT INTO form.field_site (site_label,geom) values ('test', ST_GeomFromText('POINT(1 2)', 4326)) ON CONFLICT ON CONSTRAINT field_site_pkey1 DO UPDATE SET geom=EXCLUDED.geom
0 rows updated
{}
Database connection closed.


In [8]:
batch_upsert(params,"form.field_site",(record,),keycol=('site_label',), idx=None,execute=False)

Connecting to the PostgreSQL database...
INSERT INTO form.field_site (site_label,geom) values ('test', ST_GeomFromText('POINT(1 2)', 4326)) ON CONFLICT DO NOTHING
0 rows updated
{}
Database connection closed.


### List of valid sites and visits

In [9]:
conn = psycopg2.connect(**params)
cur = conn.cursor(cursor_factory=DictCursor)
cur.execute("select distinct visit_id,visit_date FROM form.field_visit")
all_visits=cur.fetchall()
cur.close()
conn.close()

In [10]:
all_visits[1]['visit_date']

datetime.date(2020, 1, 15)

### Read valid vegetation classes from spreadsheet

## Read workbooks
Each spreadsheet has a slightly different structure, so these scripts have to be adapted for each case.

### List of workbooks/spreadsheets in directory

In [62]:
avail_files = os.listdir(inputdir)
#avail_files

In [63]:
valid_files = ['SthnNSWRF_data_bionet2.xlsx',
               'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx',
               'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx',
               'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton_revisedfields_Mar2022.xlsx',
               'UNSW_VegFireResponse_KNP AlpAsh_firehistupdate.xlsx',
               'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx',
               'RobertsonRF_data_bionet2.xlsx',
               'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm']

In [64]:
for ff in valid_files:
    print(ff in avail_files)

True
True
True
True
True
True
True
True


Here we create an index of worksheets and column headers for each file

In [65]:
wbindex=dict()
for workbook_name in valid_files:
    inputfile=inputdir / workbook_name
    # using data_only=True to get the calculated cell values
    wb = openpyxl.load_workbook(inputfile,data_only=True)
    wbindex[workbook_name]=dict()
    for ws in wb.worksheets:
        wbindex[workbook_name][ws._WorkbookChild__title]=[list(),list()]
        for k in range(1,ws.max_column):
            wbindex[workbook_name][ws._WorkbookChild__title][0].append(ws.cell(row=1,column=k).value)
            wbindex[workbook_name][ws._WorkbookChild__title][1].append(ws.cell(row=2,column=k).value)
        

In [66]:
wbindex.keys()

dict_keys(['SthnNSWRF_data_bionet2.xlsx', 'UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx', 'UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx', 'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton_revisedfields_Mar2022.xlsx', 'UNSW_VegFireResponse_KNP AlpAsh_firehistupdate.xlsx', 'UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx', 'RobertsonRF_data_bionet2.xlsx', 'Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm'])

In [67]:
wbindex['SthnNSWRF_data_bionet2.xlsx'].keys()

dict_keys(['Site', 'Fire', 'Structure', 'Floristics', 'Reference', 'Info', 'Sheet1'])

### Vegetation formation

I defined this function to read vegetation information from each worksheet.

In [17]:
def read_veg_classes(filepath,workbook,worksheet,col_definitions):
    wb = openpyxl.load_workbook(filepath / workbook,data_only=True)
    ws = wb[worksheet]
    records=list()
    for row in range(2,ws.max_row+1):
        visitid=ws.cell(row,col_definitions['visit_id']).value
        if visitid is not None and visitid != 'Site':
            visitdate=ws.cell(row,col_definitions['visit_date']).value
            if isinstance(visitdate,datetime):
                visitdate=visitdate.date()
            else:
                visitdate=datetime.strptime(visitdate, '%d/%m/%Y').date()
            vegclass=ws.cell(row,col_definitions['vegetation_class']).value
            vegformation=ws.cell(row,col_definitions['vegetation_formation']).value
            if vegclass=='Warm temperate rainforests':
                vegclass='Southern Warm Temperate Rainforests'
            if vegclass=='Littoral rainforest':
                vegclass='Littoral rainforests'
            if vegformation=='Rainforests':
                vegclass=vegclass.title()
            if vegformation in ('Blue Mountains Cool Wet Eucalypt Forest','Wet Sclerophyll Forests (Shrubby sub-formation)'):
                vegformation='Wet sclerophyll forests (Shrubby subformation)'
            if vegclass=='Southern Tableland Wet Sclerophyll Forests':
                vegformation='Wet sclerophyll forests (Grassy subformation)'
            if vegclass=='Montane wet sclerophyll forests':
                vegformation='Wet sclerophyll forests (Grassy subformation)'
                vegclass='Montane Wet Sclerophyll Forests'
            if vegclass=='Alpine bogs and fens':
                vegclass='Alpine Bogs and Fens'
            
            record={'visit_id': visitid,
            'visit_date': visitdate,
            'vegetation_formation':vegformation,
                'vegetation_class':vegclass}
            records.append(record)
    return records

#### Southern Rainforest

In [18]:
wbindex['SthnNSWRF_data_bionet2.xlsx']['Site'][0][40:]


['NSW TEC', 'variant', 'Vegetation formation', 'Vegegtation class']

In [18]:
filename='SthnNSWRF_data_bionet2.xlsx'
worksheet='Site'
records = read_veg_classes(inputdir,filename,worksheet,{'visit_id':1,'visit_date':4,'vegetation_formation':43,'vegetation_class':44})
records

[{'visit_id': 'UppClyde1',
  'visit_date': datetime.date(2021, 11, 29),
  'vegetation_formation': 'Rainforests',
  'vegetation_class': 'Southern Warm Temperate Rainforests'},
 {'visit_id': 'DeuaRF',
  'visit_date': datetime.date(2021, 11, 30),
  'vegetation_formation': 'Rainforests',
  'vegetation_class': 'Southern Warm Temperate Rainforests'},
 {'visit_id': 'MaxwellsCk',
  'visit_date': datetime.date(2021, 12, 2),
  'vegetation_formation': 'Rainforests',
  'vegetation_class': 'Southern Warm Temperate Rainforests'},
 {'visit_id': 'MongaPenanceLower',
  'visit_date': datetime.date(2021, 12, 3),
  'vegetation_formation': 'Rainforests',
  'vegetation_class': 'Cool Temperate Rainforests'},
 {'visit_id': 'MongaPenanceUpper',
  'visit_date': datetime.date(2021, 12, 3),
  'vegetation_formation': 'Rainforests',
  'vegetation_class': 'Cool Temperate Rainforests'}]

In [19]:
batch_upsert(params,"form.field_visit_veg_description",records,keycol=('visit_id','visit_date'), idx=None,execute=True)


Connecting to the PostgreSQL database...
0 rows updated
{}
Database connection closed.


#### Upland Basalt

In [20]:
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
worksheet='Site'

wbindex[filename][worksheet][0][0:8]


['Site',
 'Replicate',
 'Date of previous survey',
 'Observers (comma sep if >1)',
 'Date of sampling',
 'Prior Survey Date',
 None,
 None]

In [21]:
records = read_veg_classes(inputdir,filename,worksheet,{'visit_id':1,'visit_date':3,'vegetation_formation':44,'vegetation_class':45})


In [22]:
records[10]

{'visit_id': 'MWLB4B7Lb',
 'visit_date': datetime.date(2020, 12, 10),
 'vegetation_formation': 'Wet sclerophyll forests (Shrubby subformation)',
 'vegetation_class': 'Southern Escarpment Wet Sclerophyll Forests'}

In [23]:
batch_upsert(params,"form.field_visit_veg_description",records,keycol=('visit_id','visit_date'), idx='field_visit_veg_description_pkey',execute=True)


Connecting to the PostgreSQL database...
28 rows updated
{}
Database connection closed.


#### RMK

In [24]:
filename='UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx'
wbindex[filename][worksheet][0][0:8]


['Site',
 'Replicate',
 'Observers (comma sep if >1)',
 'Date of samping',
 'Survey Date Replicate 1',
 'Survey Date Replicate 2',
 'Survey Date Replicate 3',
 'Survey Date Replicate 4']

In [25]:
records = read_veg_classes(inputdir,filename,worksheet,{'visit_id':1,'visit_date':4,'vegetation_formation':43,'vegetation_class':44})


In [26]:
len(records)

17

In [27]:
batch_upsert(params,"form.field_visit_veg_description",records,keycol=('visit_id','visit_date'), idx='field_visit_veg_description_pkey',execute=True)


Connecting to the PostgreSQL database...
17 rows updated
{}
Database connection closed.


#### Yatteyatah

In [28]:
filename='UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton_revisedfields_Mar2022.xlsx'
worksheet='Site'

wbindex[filename][worksheet][0][40:48]


['NSW TEC', 'variant', 'Vegetation formation', 'Vegegtation class']

In [29]:
records = read_veg_classes(inputdir,filename,worksheet,{'visit_id':1,'visit_date':4,'vegetation_formation':43,'vegetation_class':44})
len(records)

7

In [30]:
batch_upsert(params,"form.field_visit_veg_description",records,keycol=('visit_id','visit_date'), idx=None,execute=True)


Connecting to the PostgreSQL database...
0 rows updated
{}
Database connection closed.


#### KNP Alp Ash

In [31]:
filename='UNSW_VegFireResponse_KNP AlpAsh_firehistupdate.xlsx'
worksheet='Site'

wbindex[filename][worksheet][0][40:48]


['NSW TEC',
 'variant',
 'Vegetation formation',
 'Vegegtation class',
 'NSW PCT',
 None,
 None,
 None]

In [32]:
records = read_veg_classes(inputdir,filename,worksheet,{'visit_id':1,'visit_date':4,'vegetation_formation':43,'vegetation_class':44})


In [33]:
len(records)

8

In [34]:
batch_upsert(params,"form.field_visit_veg_description",records,keycol=('visit_id','visit_date'), idx=None,execute=True)


Connecting to the PostgreSQL database...
0 rows updated
{}
Database connection closed.


#### Alpine bogs

In [35]:
filename='UNSW_VegFireResponse_AlpineBogs_reformat_Sep2021.xlsx'
worksheet='Site'

wbindex[filename][worksheet][0][40:48]

['NSW TEC',
 'variant',
 'Vegetation formation',
 'Vegegtation class',
 'NSW PCT',
 None,
 None,
 None]

In [36]:
records = read_veg_classes(inputdir,filename,worksheet,{'visit_id':1,'visit_date':4,'vegetation_formation':43,'vegetation_class':44})


In [37]:
len(records)

6

In [38]:
batch_upsert(params,"form.field_visit_veg_description",records,keycol=('visit_id','visit_date'), idx=None,execute=True)


Connecting to the PostgreSQL database...
0 rows updated
{}
Database connection closed.


#### Robertson RF

In [39]:
filename='RobertsonRF_data_bionet2.xlsx'
wbindex[filename][worksheet][0][40:48]


['NSW TEC', 'variant', 'Vegetation formation', 'Vegegtation class']

In [40]:
records = read_veg_classes(inputdir,filename,worksheet,{'visit_id':1,'visit_date':4,'vegetation_formation':43,'vegetation_class':44})
batch_upsert(params,"form.field_visit_veg_description",records,keycol=('visit_id','visit_date'), idx='field_visit_veg_description_pkey',execute=True)


Connecting to the PostgreSQL database...
2 rows updated
{}
Database connection closed.


#### Newnes
Vegetation information is not present in 'Site' worksheet. Not imported.

In [41]:
filename='Fire response quadrat survey Newnes Nov2020_DK_revised IDs+AllNovData.xlsm'
wbindex[filename][worksheet][0]


['Site',
 'Easting',
 'Northing',
 'Valley',
 'Elev',
 'Undermined',
 'Fire interval',
 'Census',
 'Date',
 'Scorch hgt',
 'Shb foliage scorch',
 "Shb foliage c'sume",
 'Herb foliage scorch',
 "Herb foliage c'sume",
 'Twig diam mean',
 'Twig diam se',
 'Peat depth burnt',
 'Peat extent burnt',
 'Peat fire index',
 'Postfire treehgt lower',
 'Postfire treehgt upper',
 'Postfire treehgt mode',
 'Postfire treecov',
 'Prefire shbhgt lower',
 'Prefire shbhgt upper',
 'Prefire shbhgt mode',
 'Prefire shbcov',
 'Postfire shbhgt lower',
 'Postfire shbhgt upper',
 'Postfire shbhgt mode',
 'Postfire shbcov',
 'Prefire hrbhgt lower',
 'Prefire hrbhgt upper',
 'Prefire hrbhgt mode',
 'Prefire hrbcov',
 'Postfire hrbhgt lower',
 'Postfire hrbhgt upper',
 'Postfire hrbhgt mode',
 'Postfire hrbcov',
 'Biomass A',
 'Biomass B',
 'Biomass C',
 'Biomass D',
 'Biomass E',
 'Mean dry (60C) biomass (g)',
 'CV biomass',
 'Mean biomass (g/m2)',
 'Native spp richness',
 'Sediment depth (mm) 1',
 'Sediment dep

### Vegetation structure

#### Function definitions

In [42]:
def read_veg_structure(filepath,workbook,worksheet,col_definitions):
    wb = openpyxl.load_workbook(filepath / workbook,data_only=True)
    ws = wb[worksheet]
    triplet=('best','lower','upper')
    records=list()
    for row in range(2,ws.max_row+1):
        visitid=ws.cell(row,col_definitions['visit_id']).value
        if visitid is not None and visitid != 'Site':
            visitdate=ws.cell(row,col_definitions['visit_date']).value
            if isinstance(visitdate,datetime):
                visitdate=visitdate.date()
            else:
                visitdate=datetime.strptime(visitdate, '%d/%m/%Y').date()
            record={'visit_id': visitid,
            'visit_date': visitdate}
            
            stage=ws.cell(row,col_definitions['stage']).value
            if stage is not None:
                record['comment']=['Stage: %s' % stage,]
            stratum=ws.cell(row,col_definitions['stratum']).value
            for var in ('height','cover','scorch'):
                if var in col_definitions.keys():
                    record1=copy.deepcopy(record)
                    record1['measured_var']='stratum %s %s' % (stratum,var)
                    for k in range(len(col_definitions[var])):
                        val=ws.cell(row,col_definitions[var][k]).value
                        if val is not None and val != 'NA':
                            if triplet[k]=='lower' and 'best' in record1.keys() and val > record1['best']:
                                record1['lower']=record1['best']
                                record1['comment'].append('lower bound given as %s but greater than best estimate' % val)
                            if triplet[k]=='upper' and 'best' in record1.keys() and val < record1['best']:
                                record1['comment'].append('upper bound given as %s but less than best estimate' % val)
                                record1['upper']=record1['best']
                            else:
                                record1[triplet[k]]=val
                    records.append(record1)
    return records


#### Southern NSW Rainforest

In [43]:
filename='SthnNSWRF_data_bionet2.xlsx'
worksheet='Structure'
wbindex[filename][worksheet][0]

['SiteNo',
 'Replicate',
 'Date',
 'Stage',
 'Stratum',
 'LowerHeight',
 'UpperHeight',
 'ModalHeight',
 'PercentCover',
 'Dominant1',
 'Dominant2']

In [44]:
col_def={'visit_id':1, 'visit_date':3, 'stage':4, 'stratum':5, 'height':(8,6,7),'cover':(9,)}

In [45]:
records=read_veg_structure(inputdir,filename,worksheet,col_def)

In [46]:
len(records)
records[36:38]

[{'visit_id': 'MaxwellsCk',
  'visit_date': datetime.date(2021, 12, 2),
  'comment': ['Stage: inferred prefire',
   'upper bound given as 20 but less than best estimate'],
  'measured_var': 'stratum T height',
  'best': 30,
  'lower': 12,
  'upper': 30},
 {'visit_id': 'MaxwellsCk',
  'visit_date': datetime.date(2021, 12, 2),
  'comment': ['Stage: inferred prefire'],
  'measured_var': 'stratum T cover',
  'best': 70}]

In [47]:
batch_upsert(params,"form.field_visit_vegetation_estimates",records,
             keycol=('visit_id','visit_date','measured_var'), 
             idx='field_visit_vegetation_estimates_pkey',execute=True)


Connecting to the PostgreSQL database...
72 rows updated
{}
Database connection closed.


#### Upland Basalt
Manual updates: 
- date for MWL15 changed from 11/11/2021 to 11/11/2020
- date for MWL11 changed from 18/11/2021 to 18/11/2020

Problems:
- date for MWL11b does not match, not sure which one is correct

In [48]:
filename='UNSWFireVegResponse_UplandBasalt_AlexThomsen+DK.xlsx'
wbindex[filename][worksheet][0]

['SiteNo',
 'Replicate',
 'Date',
 'Stage',
 'Stratum',
 'LowerHeight',
 'UpperHeight',
 'ModalHeight',
 'PercentCover',
 'Dominant1',
 'Dominant2']

In [49]:
col_def={'visit_id':1, 'visit_date':3, 'stage':4, 'stratum':5, 'height':(8,6,7),'cover':(9,)}
records=read_veg_structure(inputdir,filename,worksheet,col_def)
len(records)

186

In [50]:
valid_records=list()
for record in records:
    p=filter(lambda n: n['visit_id'] == record['visit_id'] and  n['visit_date'] == record['visit_date'], all_visits)
    found=list(p)
    if len(found) == 0:
        print(record)
    else:
        valid_records.append(record)

{'visit_id': 'MWL11b', 'visit_date': datetime.date(2021, 12, 2), 'comment': ['Stage: 1 year postfire'], 'measured_var': 'stratum T height', 'best': 25, 'lower': 20, 'upper': 25}
{'visit_id': 'MWL11b', 'visit_date': datetime.date(2021, 12, 2), 'comment': ['Stage: 1 year postfire'], 'measured_var': 'stratum T cover', 'best': 10}
{'visit_id': 'MWL11b', 'visit_date': datetime.date(2021, 12, 2), 'comment': ['Stage: 1 year postfire'], 'measured_var': 'stratum M1 height', 'best': 0.5, 'lower': 0.5, 'upper': 6}
{'visit_id': 'MWL11b', 'visit_date': datetime.date(2021, 12, 2), 'comment': ['Stage: 1 year postfire'], 'measured_var': 'stratum M1 cover', 'best': 2}
{'visit_id': 'MWL11b', 'visit_date': datetime.date(2021, 12, 2), 'comment': ['Stage: 1 year postfire'], 'measured_var': 'stratum L1 height', 'best': 1, 'lower': 0.1, 'upper': 1.5}
{'visit_id': 'MWL11b', 'visit_date': datetime.date(2021, 12, 2), 'comment': ['Stage: 1 year postfire'], 'measured_var': 'stratum L1 cover', 'best': 90}


In [51]:
batch_upsert(params,"form.field_visit_vegetation_estimates",
             valid_records,
             keycol=('visit_id','visit_date','measured_var'), 
             idx='field_visit_vegetation_estimates_pkey',execute=True)


Connecting to the PostgreSQL database...
180 rows updated
{}
Database connection closed.


#### NE NSW / SE Qld Rainforest

In [52]:
filename=valid_files[2]
print(filename)
wbindex[filename].keys()

UNSW_VegFireResponse_RMK_reformat_Sep2021a.xlsx


dict_keys(['Site', 'Fire', 'VegStructure', 'Floristics', 'pivot', 'Reference'])

In [53]:
worksheet='VegStructure'
wbindex[filename][worksheet][1]

['Site',
 'Replicate',
 'Prefire treehgt lower',
 'Prefire treehgt upper',
 'Prefire treehgt mode',
 'Prefire treecov',
 'Prefire shbhgt lower',
 'Prefire shbhgt upper',
 'Prefire shbhgt mode',
 'Prefire shbcov',
 'Prefire hrbhgt lower',
 'Prefire hrbhgt upper',
 'Prefire hrbhgt mode',
 'Prefire hrbcov',
 'Postfire treehgt lower',
 'Postfire treehgt upper',
 'Postfire treehgt mode',
 'Postfire treecov',
 'Postfire shbhgt lower',
 'Postfire shbhgt upper',
 'Postfire shbhgt mode',
 'Postfire shbcov',
 'Postfire hrbhgt lower',
 'Postfire hrbhgt upper',
 'Postfire hrbhgt mode']

#### Yatteyattah

In [54]:
valid_files[3]

'UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton_revisedfields_Mar2022.xlsx'

In [55]:
filename='UNSW_VegFireResponse_DataEntry_Yatteyattah all +DK +Milton_revisedfields_Mar2022.xlsx'
wbindex[filename].keys()

dict_keys(['Sample', 'Site', 'Environment', 'Fire', 'VegStructure', 'Floristics', 'Reference'])

In [56]:
worksheet='VegStructure'
wbindex[filename][worksheet][1]

['Site',
 'Prefire treehgt lower',
 'Prefire treehgt upper',
 'Prefire treehgt mode',
 'Prefire treecov',
 'Prefire shbhgt lower',
 'Prefire shbhgt upper',
 'Prefire shbhgt mode',
 'Prefire shbcov',
 'Prefire hrbhgt lower',
 'Prefire hrbhgt upper',
 'Prefire hrbhgt mode',
 'Prefire hrbcov',
 'Postfire treehgt lower',
 'Postfire treehgt upper',
 'Postfire treehgt mode',
 'Postfire treecov',
 'Postfire shbhgt lower',
 'Postfire shbhgt upper',
 'Postfire shbhgt mode',
 'Postfire shbcov',
 'Postfire hrbhgt lower',
 'Postfire hrbhgt upper',
 'Postfire hrbhgt mode']

#### Alpine Ash

In [57]:
valid_files[4]

'UNSW_VegFireResponse_KNP AlpAsh_firehistupdate.xlsx'

In [60]:
filename=valid_files[4]
wbindex[filename].keys()
worksheet='Structure'

In [61]:
col_def={'visit_id':1, 'visit_date':3, 'stage':4, 'stratum':5, 'height':(8,6,7),'cover':(9,)}
records=read_veg_structure(inputdir,filename,worksheet,col_def)
len(records)

110

In [62]:
for record in records:
    print(record['measured_var'])

stratum T1 height
stratum T1 cover
stratum T2 height
stratum T2 cover
stratum M1 height
stratum M1 cover
stratum L1 height
stratum L1 cover
stratum T1 height
stratum T1 cover
stratum T2 height
stratum T2 cover
stratum M1 height
stratum M1 cover
stratum L1 height
stratum L1 cover
stratum T1 height
stratum T1 cover
stratum T2 height
stratum T2 cover
stratum M1 height
stratum M1 cover
stratum L1 height
stratum L1 cover
stratum T1 height
stratum T1 cover
stratum T2 height
stratum T2 cover
stratum M1 height
stratum M1 cover
stratum L1 height
stratum L1 cover
stratum T height
stratum T cover
stratum M1 height
stratum M1 cover
stratum L1 height
stratum L1 cover
stratum T height
stratum T cover
stratum M1 height
stratum M1 cover
stratum L1 height
stratum L1 cover
stratum T1 height
stratum T1 cover
stratum T2 height
stratum T2 cover
stratum M1 height
stratum M1 cover
stratum L1 height
stratum L1 cover
stratum T1 height
stratum T1 cover
stratum T2 height
stratum T2 cover
stratum M1 height
stratu

In [63]:
valid_records=list()
for record in records:
    p=filter(lambda n: n['visit_id'] == record['visit_id'] and  n['visit_date'] == record['visit_date'], all_visits)
    found=list(p)
    if len(found) == 0:
        print(record)
    else:
        valid_records.append(record)

{'visit_id': 'AlpAsh_69', 'visit_date': datetime.date(2021, 4, 14), 'comment': ['Stage: inferred prefire'], 'measured_var': 'stratum T1 height', 'best': 40, 'lower': 40, 'upper': 40}
{'visit_id': 'AlpAsh_69', 'visit_date': datetime.date(2021, 4, 14), 'comment': ['Stage: inferred prefire'], 'measured_var': 'stratum T1 cover', 'best': 3}


In [64]:
batch_upsert(params,"form.field_visit_vegetation_estimates",valid_records,
             keycol=('visit_id','visit_date','measured_var'), 
             idx='field_visit_vegetation_estimates_pkey',execute=True)


Connecting to the PostgreSQL database...


CheckViolation: new row for relation "field_visit_vegetation_estimates" violates check constraint "field_visit_vegetation_estimates_check"
DETAIL:  Failing row contains (AlpAsh_25, 2021-04-15, stratum M1 height, null, 1, 2, 4, {"Stage: inferred prefire"}).


#### Alpine Bogs

In [65]:
valid_files[5]
filename=valid_files[5]
wbindex[filename].keys()

dict_keys(['Site', 'Fire', 'VegStructure', 'Floristics', 'Reference'])

#### Robertson RF
Edited dates to match the visit dates (previous day)

In [66]:
filename=valid_files[6]
print(filename)
wbindex[filename].keys()
worksheet='Structure'
wbindex[filename][worksheet][0]

RobertsonRF_data_bionet2.xlsx


['SiteNo',
 'Replicate',
 'Date',
 'Stage',
 'Stratum',
 'LowerHeight',
 'UpperHeight',
 'ModalHeight',
 'PercentCover',
 'Dominant1',
 'Dominant2']

In [67]:
col_def={'visit_id':1, 'visit_date':3, 'stage':4, 'stratum':5, 'height':(8,6,7),'cover':(9,)}
records=read_veg_structure(inputdir,filename,worksheet,col_def)
len(records)

32

In [68]:
valid_records=list()
for record in records:
    p=filter(lambda n: n['visit_id'] == record['visit_id'] and  n['visit_date'] == record['visit_date'], all_visits)
    found=list(p)
    if len(found) == 0:
        print(record)
    else:
        valid_records.append(record)
print(len(valid_records))

32


In [69]:
batch_upsert(params,"form.field_visit_vegetation_estimates",
             records,
             keycol=('visit_id','visit_date','measured_var'), 
             idx='field_visit_vegetation_estimates_pkey',execute=True)


Connecting to the PostgreSQL database...
32 rows updated
{}
Database connection closed.


#### Newnes

In [70]:

newnes = pd.read_csv(inputdir / 'NewnesStruc.csv')
newnes['Stratum']

0       M1
1       M1
2      NaN
3      NaN
4       M1
      ... 
115     L1
116     L1
117     L1
118     L1
119     L1
Name: Stratum, Length: 120, dtype: object

#### Other changes

In [72]:
conn = psycopg2.connect(**params)
cur = conn.cursor(cursor_factory=DictCursor)

upd = """
UPDATE form.field_visit_vegetation_estimates  
SET units='m' 
WHERE measured_var::text LIKE '%height' AND units IS NULL;
"""
cur.execute(upd)

upd = """
UPDATE form.field_visit_vegetation_estimates  
SET units='%' 
WHERE measured_var::text LIKE '%cover' AND units is NULL;
"""
cur.execute(upd)

cur.close()
conn.commit()
conn.close()