In [54]:
import openpyxl
from pathlib import Path
import os
from datetime import datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor
from psycopg2.extensions import AsIs
import pandas as pd
import numpy as np

In [133]:
repodir = Path("../../") 
filename = repodir / 'secrets' / 'database.ini'
section = 'aws-lght-sl'

parser = ConfigParser()
parser.read(filename)

dbparams = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        dbparams[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

In [134]:

inputdir = repodir / "data" / "input-form"

In [135]:
os.listdir(inputdir)

['Species traits_Blue table_RFW_ 20220505.xlsx',
 'fireveg-trait-input-spreadsheet_V3_DE_trial.xlsx']

## Read data entry form with pandas

In [87]:
newdata = pd.read_excel(inputdir / 'Species traits_Blue table_RFW_ 20220505.xlsx', sheet_name='Data entry')
contributor = pd.read_excel(inputdir / 'Species traits_Blue table_RFW_ 20220505.xlsx', sheet_name='Contributor')

In [107]:
contributor.shape[0]

3

In [43]:
newdata.head()

Unnamed: 0,Main source,Original sources,Original species name,Species code,Species name,Trait code,Trait name,Trait type,Raw value,Norm value,Best,Lower,Upper,Method of estimation,Notes
0,NSWFFRDv2.1,Auld 1987,Acacia suaveolens,3881,Acacia suaveolens,surv6,Seedbank half-life,numerical,hl 10.7 y,,10.7,,,,No vocabularies for MoE
1,NSWFFRDv2.1,Auld Keith Bradstock 2000,Conospermum taxifolium,5352,Conospermum taxifolium,surv6,Seedbank half-life,numerical,hl 2,,2.0,,,,No vocabularies for MoE
2,NSWFFRDv2.1,Auld Scott 1997,Darwinia biflora,4024,Darwinia biflora,surv6,Seedbank half-life,numerical,hl 0.9,,0.9,,,,No vocabularies for MoE
3,NSWFFRDv2.1,Auld Scott 1997,Grevillea caleyi,5365,Grevillea caleyi,surv6,Seedbank half-life,numerical,hl 7.6,,7.6,,,,No vocabularies for MoE
4,NSWFFRDv2.1,Auld Keith Bradstock 2000,Grevillea linearifolia,5381,Grevillea linearifolia,surv6,Seedbank half-life,numerical,hl 9-10,,,9.0,10.0,,No vocabularies for MoE


In [140]:
records=dict()
for row in newdata.to_dict(orient='records'):
    trait = row['Trait code']
    if trait not in records.keys():
        records[trait]=list()
    ttype = row['Trait type']
    record=dict()
    notes=list()
    if contributor.shape[0]>0:
        contribdata = [x for x in contributor['Your response'].values.tolist() if pd.isnull(x) == False]
        notes.append('Data entry by')
        notes.extend(contribdata)
    
    record['species']=row['Species name']
    if row['Species name']!=row['Original species name']:
        notes.append('Original species name')
        notes.append(record['Original species name'])
    if not pd.isnull(row['Notes']):
        notes.append(row['Notes'])
    for k in ('Main source','Species code',):
        if not pd.isnull(row[k]):
            record[k.lower().replace(' ','_')] = row[k]
    for k in ('Original sources','Raw value'):
        if not pd.isnull(row[k]):
            record[k.lower().replace(' ','_')] = [row[k]]
    if ttype == 'numerical':
        for k in ('Best','Lower', 'Upper'):
            if not pd.isnull(row[k]):
                record[k.lower()] = row[k]
    elif ttype == 'categorical':
        for k in ('Norm value',):
            if not pd.isnull(row[k]):
                record[k.lower().replace(' ','_')] = row[k]
    if len(notes)>0:
        record['original_notes']=notes
    records[trait].append(record)


In [141]:
print(records.keys())

records['repr3a']

dict_keys(['surv6', 'repr3', 'repr4', 'surv1', 'surv5', 'repr3a'])


[{'species': 'Acacia melanoxylon',
  'main_source': 'NSWFFRDv2.1',
  'species_code': 3824,
  'original_sources': ['Wark 1997'],
  'raw_value': ['Secondary juvenile period ->3<10'],
  'lower': 3.0,
  'upper': 10.0,
  'original_notes': ['Data entry by', 'Renee Woodward']},
 {'species': 'Lambertia formosa',
  'main_source': 'NSWFFRDv2.1',
  'species_code': 5440,
  'original_sources': ['Pyke 1983'],
  'raw_value': ['Secondary juvenile period -peak flowering at 2-3 y post-fire'],
  'best': 2.0,
  'upper': 3.0,
  'original_notes': ['Data entry by', 'Renee Woodward']}]

In [143]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()
affected_rows=0

insert_statement = 'insert into litrev.%s (%s) values %s ON CONFLICT DO NOTHING'

for trait in records.keys():
    print("total of %s records prepared for trait %s" % (len(records[trait]), trait))
    for record in records[trait]: 
        cur.execute(insert_statement, (AsIs(trait),AsIs(','.join(record.keys())), tuple(record.values())))
        affected_rows = affected_rows+cur.rowcount
        
#records.clear()
conn.commit()
print("total number of lines updated: %s" % affected_rows)

cur.close()
if conn is not None:
    conn.close()
    print('Database connection closed.')     


Connecting to the PostgreSQL database...
total of 7 records prepared for trait surv6
total of 98 records prepared for trait repr3
total of 23 records prepared for trait repr4
total of 2 records prepared for trait surv1
total of 1 records prepared for trait surv5
total of 2 records prepared for trait repr3a
total number of lines updated: 133
Database connection closed.


## Alternative with openpyxl

In [6]:
inputfile=inputdir / 'Species traits_Blue table_RFW_ 20220505.xlsx'
wb = openpyxl.load_workbook(inputfile,data_only=True)
wb.worksheets

[<Worksheet "Instructions">,
 <Worksheet "Contributor">,
 <Worksheet "Data entry">,
 <Worksheet "References">,
 <Worksheet "Species list">,
 <Worksheet "Trait description">,
 <Worksheet "Vocabularies">,
 <Worksheet "Vocabularies for methods">]

### References table

In [9]:
ws=wb["References"]

In [10]:
for table in ws.tables:
    print(table)

References


In [12]:
table = ws.tables['References']

In [13]:
tbl=ws[table.ref]

In [14]:
for cell in tbl[0]:
    print(cell.value)

Code
Full reference


In [15]:
for cell in tbl[len(tbl)-1]:
    print(cell.value)

Benson McDougall 2000
Benson, D. & McDougall, L. (2000). Ecology of Sydney plant species: Part 7b: Dicotyledon families Proteaceae to Rubiaceae. Cunninghamia 6(4) 1016-1202.


### Input table

In [17]:
ws=wb["Data entry"]

In [18]:
ws['A1':'B7']

((<Cell 'Data entry'.A1>, <Cell 'Data entry'.B1>),
 (<Cell 'Data entry'.A2>, <Cell 'Data entry'.B2>),
 (<Cell 'Data entry'.A3>, <Cell 'Data entry'.B3>),
 (<Cell 'Data entry'.A4>, <Cell 'Data entry'.B4>),
 (<Cell 'Data entry'.A5>, <Cell 'Data entry'.B5>),
 (<Cell 'Data entry'.A6>, <Cell 'Data entry'.B6>),
 (<Cell 'Data entry'.A7>, <Cell 'Data entry'.B7>))

In [20]:
for table in ws.tables:
    print(table)

DataEntry


In [21]:
table = ws.tables['DataEntry']

In [22]:
for row in table:
    print(row)

('id', '3')
('name', 'DataEntry')
('displayName', 'DataEntry')
('ref', 'A1:O134')
('headerRowCount', '1')
('headerRowDxfId', '17')
('dataDxfId', '16')
('totalsRowDxfId', '15')


In [23]:
table.ref

'A1:O134'

In [24]:
for row in ws[table.ref]:
    for cell in row:
        print(cell.value)

Main source
Original sources
Original species name
Species code
Species name
Trait code
Trait name
Trait type
Raw value
Norm value
Best
Lower
Upper
Method of estimation
Notes
NSWFFRDv2.1
Auld 1987
Acacia suaveolens
3881
Acacia suaveolens
surv6
Seedbank half-life
numerical
hl 10.7 y
None
10.7
None
None
None
No vocabularies for MoE
NSWFFRDv2.1
Auld Keith Bradstock 2000
Conospermum taxifolium
5352
Conospermum taxifolium
surv6
Seedbank half-life
numerical
hl 2
None
2
None
None
None
No vocabularies for MoE
NSWFFRDv2.1
Auld Scott 1997
Darwinia biflora
4024
Darwinia biflora
surv6
Seedbank half-life
numerical
hl 0.9
None
0.9
None
None
None
No vocabularies for MoE
NSWFFRDv2.1
Auld Scott 1997
Grevillea caleyi
5365
Grevillea caleyi
surv6
Seedbank half-life
numerical
hl 7.6
None
7.6
None
None
None
No vocabularies for MoE
NSWFFRDv2.1
Auld Keith Bradstock 2000
Grevillea linearifolia
5381
Grevillea linearifolia
surv6
Seedbank half-life
numerical
hl 9-10
None
None
9
10
None
No vocabularies for MoE
NSW

In [46]:
dir(table)

['__add__',
 '__attrs__',
 '__class__',
 '__copy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__elements__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__namespaced__',
 '__ne__',
 '__nested__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_initialise_columns',
 '_path',
 '_rel_id',
 '_rel_type',
 '_write',
 'autoFilter',
 'column_names',
 'comment',
 'connectionId',
 'dataCellStyle',
 'dataDxfId',
 'displayName',
 'extLst',
 'from_tree',
 'headerRowBorderDxfId',
 'headerRowCellStyle',
 'headerRowCount',
 'headerRowDxfId',
 'id',
 'idx_base',
 'insertRow',
 'insertRowShift',
 'mime_type',
 'name',
 'namespace',
 'path',
 'published',
 'ref',
 'sortState',
 'tableBorderDxfId',
 'tableColumns',
 'tableStyleInfo',
 'tableType',
 'tagname',
 'to_tree',
 'tota

In [47]:
table.column_names

['main_source',
 'original_sources',
 'original_species_name',
 'species_code',
 'species name',
 'Trait code',
 'Trait name',
 'Trait type',
 'raw_value',
 'norm_value',
 'Best',
 'Lower',
 'Upper',
 'method_of_estimation',
 'notes']

In [48]:
table.headerRowCount

1

In [75]:
tbl=ws[table.ref]
hdr=table.column_names
surv1=list()
surv4=list()
records=list()
for k in range(1,len(tbl)):
    record=dict()
    for j in range(0,len(hdr)):
        record[hdr[j]]=tbl[k][j].value
    if record['Trait code']=='surv1':
        surv1.append(record)
    elif record['Trait code']=='surv4':
        surv4.append(record)
    else: 
        records.append(record)

In [77]:
surv1

[{'main_source': 'MacKenzie Auld Keith Ooi 2021 ',
  'original_sources': None,
  'original_species_name': 'Boronia anemonifolia subsp. anemonifolia',
  'species_code': 12257,
  'species name': 'Boronia anemonifolia subsp. anemonifolia',
  'Trait code': 'surv1',
  'Trait name': 'Resprouting - full canopy scorch',
  'Trait type': 'categorical',
  'raw_value': None,
  'norm_value': 'Most',
  'Best': None,
  'Lower': None,
  'Upper': None,
  'method_of_estimation': 'Direct observation (many)',
  'notes': None},
 {'main_source': 'MacKenzie Auld Keith Ooi 2021 ',
  'original_sources': None,
  'original_species_name': 'Boronia floribunda',
  'species_code': 5740,
  'species name': 'Boronia floribunda',
  'Trait code': 'surv1',
  'Trait name': 'Resprouting - full canopy scorch',
  'Trait type': 'categorical',
  'raw_value': None,
  'norm_value': 'Most',
  'Best': None,
  'Lower': None,
  'Upper': None,
  'method_of_estimation': 'Direct observation (many)',
  'notes': None},
 {'main_source': 'M

In [74]:
hdr 'surv1'

False