# Summary report in a workbook

We want to create a workbook with:

- Authoring information and instruction
- Summary table for species with links
- Trait codes and descriptions
- Vocabularies
- List of references


## Setup

### Import modules

In [1]:
# work with paths in operating system
from pathlib import Path
import os

# datetime support
import datetime

# work with xlsx workbooks
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.styles import Alignment, PatternFill, Border, Font # Side, Alignment, Protection,
from openpyxl.formatting import Rule
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.worksheet.datavalidation import DataValidation

from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.utils import get_column_letter

# For database connection
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor

# Pandas for calculations
import pandas as pd


### Define paths for input and output

In [2]:
repodir = Path("../../") 
inputdir = repodir / "data" / "output-report"
os.listdir(inputdir)

['.~lock.fireveg-trait-report-model.xlsx#', 'fireveg-trait-report-model.xlsx']

### Database connection

Function to parse connection parameters from a file

In [3]:
def read_dbparams(filename,section="postgresql"):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db


Reading the default parameters for this session:

In [4]:
filename = repodir / 'secrets' / 'database.ini'
dbparams=read_dbparams(filename,section='aws-lght-sl')

In [5]:
if "conn" not in globals() or conn.closed!=0:
    print('Connecting to the PostgreSQL database...')
    conn = psycopg2.connect(**dbparams)
if "cur" not in globals() or cur.closed:
    cur = conn.cursor(cursor_factory=DictCursor)

Connecting to the PostgreSQL database...


## Create workbook

### Styles
Define styles to be used across the workbook

In [6]:
cent_align=Alignment(horizontal='center', vertical='center', wrap_text=False)
wrap_align=Alignment(horizontal='left', vertical='top', wrap_text=True)

fontSmall = Font(size = "9")


sheet_colors = {"intro": "1072BA" , "summary": "5AFF5A", "default":"505050", "addentry": "20CA82"}

table_style={"Instructions":TableStyleInfo(name="TableStyleMedium9", showFirstColumn=True, showLastColumn=False, 
                                           showRowStripes=True, showColumnStripes=False),
             "Contributor": TableStyleInfo(name="TableStyleMedium18", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Lists": TableStyleInfo(name="TableStyleMedium14", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Info":  TableStyleInfo(name="TableStyleMedium14", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Vocabularies": TableStyleInfo(name="TableStyleMedium14", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Entry": TableStyleInfo(name="TableStyleMedium18", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False)

             }




In [7]:
wb = Workbook()

In [8]:
wsheets = (
    {"title": "About", "colWidths":[("A",90),("B",40)], "tabColor":"intro","active":True},
    {"title": "Summary", "colWidths":[("A",70),("B",10),(("C","D","E","F","G","H","I","J","K"),30),(("L","M","N",),25)], "tabColor":"summary"},
    {"title": "References", "colWidths":[("A",25),("B",80)], "tabColor":"addentry"},
    {"title": "Trait description", "colWidths":[("A",12),("B",30),("C",70)], "tabColor":"default"}
    )
for item in wsheets:
    if "active" in item.keys():
        ws = wb.active
        ws.title = item['title']
    else:
        ws = wb.create_sheet(item['title'])
    for k in item['colWidths']:
        for j in k[0]:
            ws.column_dimensions[j].width = k[1]
    ws.sheet_properties.tabColor = sheet_colors[item["tabColor"]]


In [9]:
ws = wb["About"]

info = ("Fire Ecology Traits for Plants",
        "Version 1.00 (April 2022)",
        "This data export reflects the status of the database on the %s" % datetime.date.today().strftime('%d %b %Y'),
        "Developed by  José R. Ferrer-Paris and David Keith",
        "Centre for Ecosystem Science / University of New South Wales",
        "Please cite this work as:",
        "Ferrer-Paris, J. R. and Keith, D. A. (2022) Fire Ecology Traits for Plants: A database for fire research and management. Version 1.00. Centre for Ecosystem Science, University of New South Wales, Sydney, Australia.", 
        )

k = 1
for row in info:
    ws.cell(k,1,value=row)
    ws.cell(k,1).alignment=wrap_align
    k=k+1
    
ws.cell(1,1).style='Title'
ws.cell(5,1).hyperlink='https://www.unsw.edu.au/research/ecosystem'
ws.cell(5,1).style='Hyperlink'

supporters = ({'institution':"University of New South Wales",'url':"https://www.unsw.edu.au/"},
              {'institution':"NSW Bushfire Research Hub",'url':"https://www.bushfirehub.org/"},
              {'institution':"NESP Threatened Species Recovery Hub",'url':"https://www.nespthreatenedspecies.edu.au/"},
              {'institution':"NSW Department of Planning & Environment",'url':"https://www.planning.nsw.gov.au/"})

k=k+2
ws.cell(k-1,1,value="This work has been supported by:")
for item in supporters:
    cell=ws.cell(k,1)
    cell.value=item['institution']
    cell.hyperlink=item['url']
    cell.style = "Hyperlink"
    k=k+1

k=k+2
description = (
              "Taxonomic nomenclature following BioNET (data export from February 2022)",
              "Data in the report is summarised based on BioNET fields 'currentScientificName' and 'currentScientificNameCode'",
              "For general description of the traits, please refer to the 'Trait description' sheet",
              "Vocabularies for categorical traits are available in the 'Vocabularies' sheet",
              "For categorical traits the values in the 'Summary' sheet show the different values reported in the literature records separated by slashes.",
               "If more than one category has been reported, the values are ordered from higher to lower 'weight', categories receiving less than 10% weight are in round brackets, categories with less than 5% in square brackets",
              "The weight is calculated by multiplying the number of time a value is reported (nr. of records) with the weight given to each record (default to 1), and divided by the weight of all records for a given species.",
              "An asterisk (*) indicates a data entry error or uncertainty in the assignment of a category or value. ",
              "'Main sources' refer to references that were imported directly using automated scripts or manual entry.",
              "'Additional sources' refer to references given in the main references for a particular record, but not verified during data import.",
              "Some sheets are protected to avoid accidental changes, but they are not password protected. If you need to filter and reorder entries in the table, please unprotect the sheet first.",
              )

for row in description:
    ws.cell(k,1,value=row)
    ws.cell(k,1).alignment=wrap_align
    k=k+1
    
ws.protection.sheet = True

In [10]:
cur.execute("SELECT code,name,description,value_type,life_stage,life_history_process,priority FROM litrev.trait_info ORDER BY code")
trait_info = cur.fetchall()

ws = wb["Trait description"]

k=1
description = ("The following table gives a general description of the traits used in the 'Summary' sheet",
               "This sheet is protected to avoid accidental changes, but it is not password protected. If you need to filter and reorder entries in the table, please unprotect the sheet first.",
              "Vocabularies for categorical traits are available in the 'Vocabularies' sheet","","")

for row in description:
    ws.cell(k,3,value=row)
    ws.cell(k,3).alignment=wrap_align
    k=k+1
    

ws.append(["Trait Code", "Trait Name", "Description", "Type", "Life stage", "Life history process", "Priority"])

for row in trait_info:
    ws.append(row)
    
#ws.max_row
for j in range(k,ws.max_row+1):
    ws.cell(j,3).alignment=wrap_align
    
tab = Table(displayName="TraitInformation", ref="A{}:G{}".format(k,ws.max_row))

tab.tableStyleInfo = table_style["Info"]
ws.add_table(tab)
ws.protection.sheet = True

In [11]:
def summarise_values(x,w):
    if None in x:
        sfx = " * "
    else:
        sfx = ""
    df=pd.concat({"value": pd.Series(x),"weight": pd.Series(w)},axis=1)
    res = df.groupby(by="value").sum() / df.weight.sum()
    res = res.sort_values(by="weight",ascending=[0])
    val = ""
    glue = ""
    for index, row in res.iterrows():
        if row['weight'] > 0.1:
            val = val + glue + index 
            glue = " / "
        elif row['weight'] > 0.05:
            val = val + glue + ("(%s)" % index) 
            glue = " / "
        else:
            val = val + glue + ("[%s]" % index)
            glue = " / "
    return (val + sfx).strip(" ")

We will need a custom array function for [handling empty arrays in postgresql](https://stackoverflow.com/questions/43472482/postgres-array-agg-throws-cannot-accumulate-empty-arrays-for-empty-arrays)

In [12]:
qry= """
SELECT "currentScientificName" as spp, "currentScientificNameCode" as sppcode,
    array_agg(species) as nspp,
    array_agg(norm_value::text) as val,array_agg(weight) as w,
    array_agg(main_source) as refs,
    array_accum(original_sources) as orefs
    
FROM litrev.{} 
LEFT JOIN species.caps
ON species_code="speciesCode_Synonym"
WHERE species ilike '%euca%' and "currentScientificName" is not NULL AND weight>0
GROUP BY spp,sppcode;
"""

for trait in ['surv1','surv4','repr2','rect2','disp1','germ1','germ8']:
    cur.execute(qry.format(trait))
    res = cur.fetchall()
    df1 = pd.DataFrame(res)
    col1="%s.txn" % trait
    col2="%s.v" % trait
    col3="%s.w" % trait
    col4="%s.mref" % trait
    col5="%s.oref" % trait
   
    df1=df1.rename(columns={0:"Species",1:"Code",2:col1,3:col2,4:col3,5:col4,6:col5})
    df1[trait]=df1.apply(lambda row : summarise_values(row[col2],row[col3]), axis = 1)
    if "df" in globals():
        df = pd.merge(df, df1, on = ["Species","Code"], how = "outer").sort_values(by="Species",ascending=[1])
    else:
        df = df1

In [13]:
df

Unnamed: 0,Species,Code,surv1.txn,surv1.v,surv1.w,surv1.mref,surv1.oref,surv1,surv4.txn,surv4.v,...,germ1.w,germ1.mref,germ1.oref,germ1,germ8.txn,germ8.v,germ8.w,germ8.mref,germ8.oref,germ8
0,Caladenia catenata,6703,[Caladenia xantholeuca],[All],[1],[austraits-3.0.2],[White Sinclair Frood 2020],All,,,...,,,,,,,,,,
322,Corymbia eximia,9743,,,,,,,,,...,,,,,[Eucalyptus eximia],[ND],[1],[Ooi Myerscough Auld 2007],[],ND
323,Corymbia maculata,9692,,,,,,,,,...,,,,,[Eucalyptus maculata],[ND],[1],[Ooi Myerscough Auld 2007],[],ND
324,Corymbia trachyphloia,9739,,,,,,,,,...,,,,,[Eucalyptus trachyphloia],[ND],[1],[Ooi Myerscough Auld 2007],[],ND
1,Craspedia leucantha,1419,[Craspedia leucantha],[All],[1],[austraits-3.0.2],[White Sinclair Frood 2020],All,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317,Melaleuca uncinata,4268,"[Melaleuca uncinata, Melaleuca uncinata, Melal...","[All, All, All, All, All, All, All, None, All,...","[1, 1, 1, 1, 1, 1, 1, 1, 10, 1]","[austraits-3.0.2, NSWFFRDv2.1, NSWFFRDv2.1, NS...",[Clarke Lawes Murphy Russell-Smith Nano Bradst...,All / (None),[Melaleuca uncinata],[Lignotuber],...,,,,,,,,,,
318,Muellerina eucalyptoides,3620,"[Muellerina eucalyptoides, Muellerina eucalypt...","[None, None]","[1, 1]","[austraits-3.0.2, austraits-3.0.2]",[Clarke Lawes Murphy Russell-Smith Nano Bradst...,,,,...,,,,,,,,,,
319,Parsonsia eucalyptophylla,1178,"[Parsonsia eucalyptophylla, Parsonsia eucalypt...","[All, All, All, All, All]","[1, 1, 10, 1, 1]","[austraits-3.0.2, austraits-3.0.2, NSWFFRDv2.1...",[Clarke Lawes Murphy Russell-Smith Nano Bradst...,All,,,...,,,,,,,,,,
320,Rhodanthe diffusa subsp. leucactina,10148,[Rhodanthe diffusa subsp. leucactina],[None],[1],[austraits-3.0.2],[White Sinclair Frood 2020],,,,...,,,,,,,,,,


In [14]:
def summarise_triplet(x,y,z,w):
    df=pd.concat({"best": pd.Series(x),"lower": pd.Series(y),"upper": pd.Series(z),"weight": pd.Series(w)},axis=1)
    val="%0.1f (%0.1f -- %0.1f)" % (df['best'].mean(),df['lower'].min(),df['upper'].max())
    if val=="nan (nan -- nan)":
        val="*"
    elif val.find("nan")==0:
        val=val.replace("nan (","(")
    elif val.find("nan")>0:
        val=val.replace(" (nan -- nan)","")
    if val.find("nan")>0:
        val=val.replace("nan","?")
    return val 

In [15]:
qry= """
SELECT "currentScientificName" as spp, "currentScientificNameCode" as sppcode,
    array_agg(species) as nspp,
    array_agg(best) as best,array_agg(lower) as lower,array_agg(upper) as upper,array_agg(weight) as w,
    array_agg(main_source) as refs,
    array_accum(original_sources) as orefs
FROM litrev.{} 
LEFT JOIN species.caps
ON species_code="speciesCode_Synonym"
WHERE species ilike '%euca%' and "currentScientificName" is not NULL AND weight>0
GROUP BY spp,sppcode;
"""



for trait in ['repr3','repr3a','repr4',]:
    cur.execute(qry.format(trait))
    res = cur.fetchall()
    if len(res)>0:
        df1 = pd.DataFrame(res)
        col1="%s.txn" % trait
        col2="%s.best" % trait
        col3="%s.lower" % trait
        col4="%s.upper" % trait
        col5="%s.w" % trait
        col6="%s.mref" % trait
        col7="%s.oref" % trait
   
   
        df1=df1.rename(columns={0:"Species",1:"Code",2:col1,3:col2,4:col3,5:col4,6:col5,7:col6,8:col7})
        df1[trait]=df1.apply(lambda row : summarise_triplet(row[col2],row[col3],row[col4],row[col5]), axis = 1)
        df = pd.merge(df, df1, on = ["Species","Code"], how = "outer").sort_values(by="Species",ascending=[1])
   


In [16]:
def unique_taxa(row,slc):
    ss=[col.find(slc)>0 for col in df.loc[317].index.tolist()]
    record=row[ss]
    records=record.values.tolist()
    valid=list()
    for x in records:
        if type(x)==list:
            valid=valid+x
    z=list(set(valid))
    z="; ".join(z)
    return(z)



In [17]:

df['orig_species']=df.apply(lambda row : unique_taxa(row,'txn'), axis = 1)
df['main_refs']=df.apply(lambda row : unique_taxa(row,'mref'), axis = 1)
df['orig_refs']=df.apply(lambda row : unique_taxa(row,'oref'), axis = 1)
df[['orig_species','main_refs','orig_refs']]

Unnamed: 0,orig_species,main_refs,orig_refs
0,Caladenia xantholeuca,austraits-3.0.2,White Sinclair Frood 2020
1,Eucalyptus eximia,Ooi Myerscough Auld 2007,
2,Eucalyptus maculata,Ooi Myerscough Auld 2007,
3,Eucalyptus trachyphloia,Ooi Myerscough Auld 2007,
4,Craspedia leucantha,austraits-3.0.2,{Australian National Botanic Gardens} 2018; Wh...
...,...,...,...
321,Melaleuca uncinata,NSWFFRDv2.1; austraits-3.0.2,Clarke Lawes Murphy Russell-Smith Nano Bradsto...
322,Muellerina eucalyptoides,austraits-3.0.2,Morgan 2011; Clarke Lawes Murphy Russell-Smith...
323,Parsonsia eucalyptophylla,NSWFFRDv2.1; austraits-3.0.2,Clarke Lawes Murphy Russell-Smith Nano Bradsto...
324,Rhodanthe diffusa subsp. leucactina,austraits-3.0.2,White Sinclair Frood 2020


In [18]:

ws = wb["Summary"]
ws.append(['Species','Code','surv1','surv4','germ1','germ8','rect2','repr2','repr3','repr3a','disp1','Original Species name(s) used','Main sources','Additional sources'])
rows = dataframe_to_rows(df[['Species','Code','surv1','surv4','germ1','germ8','rect2','repr2','repr3','repr3a','disp1','orig_species','main_refs','orig_refs']],index=False, header=False)


for r_idx, row in enumerate(rows, 2):
    for c_idx, value in enumerate(row, 1):
        ws.cell(row=r_idx, column=c_idx, value=value)
    
    for k in (12,13,14):
        ws.cell(r_idx,k).alignment=wrap_align
        ws.cell(r_idx,k).font = fontSmall

    
tab = Table(displayName="Summary", ref="A1:{}{}".format(get_column_letter(c_idx),r_idx))
tab.tableStyleInfo = table_style["Lists"]
ws.add_table(tab)


In [19]:
def extract_refs(row,slc):
    ss=[col.find(slc)>0 for col in df.loc[317].index.tolist()]
    record=row[ss]
    records=record.values.tolist()
    valid=list()
    for x in records:
        if type(x)==list:
            valid=valid+x
    z=list(set(valid))
    return(z)

In [20]:
refs=df.apply(lambda row : extract_refs(row,'mref'), axis = 1)
valid_refs=list()
for x in refs:
    if type(x)==list:
        valid_refs=valid_refs+x
        
refs=df.apply(lambda row : extract_refs(row,'oref'), axis = 1)
for x in refs:
    if type(x)==list:
        valid_refs=valid_refs+x
   
valid_refs=tuple(set(valid_refs))

In [21]:
cur.execute("SELECT ref_code,ref_cite FROM litrev.ref_list WHERE ref_code IN %s ORDER BY ref_code",(valid_refs,))
ref_info = cur.fetchall()

ws = wb["References"]

k=1
description = ("The following table includes bibliographical information for the sources referenced in the 'Summary' sheet",
               "This sheet is protected to avoid accidental changes, but it is not password protected. If you need to filter and reorder entries in the table, please unprotect the sheet first.",
              "","")

for row in description:
    ws.cell(k,2,value=row)
    ws.cell(k,2).alignment=wrap_align
    k=k+1
    

ws.append(["Reference code", "Reference information"])

for row in ref_info:
    ws.append(row)
    
#ws.max_row
for j in range(k+1,ws.max_row+1):
    ws.cell(j,2).alignment=wrap_align
    ws.cell(j,2).font = fontSmall
    
tab = Table(displayName="ReferenceInformation", ref="A{}:B{}".format(k,ws.max_row))

tab.tableStyleInfo = table_style["Lists"]
ws.add_table(tab)
ws.protection.sheet = True

In [22]:
wb.save(inputdir / "fireveg-trait-report-model.xlsx")

In [23]:
cur.close()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Database connection closed.
