# Summary report in a workbook

We want to create a workbook with:

- Authoring information and instruction
- Summary table for species with links
- Trait codes and descriptions
- Vocabularies
- List of references


## Setup

### Import modules

In [1]:
# work with paths in operating system
from pathlib import Path
import os

# datetime support
import datetime

# work with xlsx workbooks
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.styles import Alignment, PatternFill, Border, Font # Side, Alignment, Protection,
from openpyxl.formatting import Rule
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.worksheet.datavalidation import DataValidation

from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.utils import get_column_letter

# For database connection
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor

# Pandas for calculations
import pandas as pd


### Define paths for input and output

In [2]:
repodir = Path("../../") 
inputdir = repodir / "data" / "output-report"
os.listdir(inputdir)

['fireveg-trait-report-model.xlsx']

### Database connection

Function to parse connection parameters from a file

In [3]:
def read_dbparams(filename,section="postgresql"):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db


Reading the default parameters for this session:

In [4]:
filename = repodir / 'secrets' / 'database.ini'
dbparams=read_dbparams(filename,section='aws-lght-sl')

In [5]:
if "conn" not in globals() or conn.closed!=0:
    print('Connecting to the PostgreSQL database...')
    conn = psycopg2.connect(**dbparams)
if "cur" not in globals() or cur.closed:
    cur = conn.cursor(cursor_factory=DictCursor)

Connecting to the PostgreSQL database...


## Create workbook

### Styles
Define styles to be used across the workbook

In [6]:
cent_align=Alignment(horizontal='center', vertical='center', wrap_text=False)
wrap_align=Alignment(horizontal='left', vertical='top', wrap_text=True)

sheet_colors = {"intro": "1072BA" , "summary": "5AFF5A", "default":"505050", "addentry": "20CA82"}

table_style={"Instructions":TableStyleInfo(name="TableStyleMedium9", showFirstColumn=True, showLastColumn=False, 
                                           showRowStripes=True, showColumnStripes=False),
             "Contributor": TableStyleInfo(name="TableStyleMedium18", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Lists": TableStyleInfo(name="TableStyleMedium14", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Info":  TableStyleInfo(name="TableStyleMedium14", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Vocabularies": TableStyleInfo(name="TableStyleMedium14", showFirstColumn=True,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False),
             "Entry": TableStyleInfo(name="TableStyleMedium18", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=False, showColumnStripes=False)

             }




In [7]:
wb = Workbook()

In [8]:
wsheets = (
    {"title": "About", "colWidths":[("A",90),("B",40)], "tabColor":"intro","active":True},
    {"title": "Summary", "colWidths":[("A",70),("B",10),(("C","D","E","F","G"),30)], "tabColor":"summary"},
    {"title": "References", "colWidths":[("A",30),("B",60)], "tabColor":"addentry"},
    {"title": "Trait description", "colWidths":[("A",12),("B",30),("C",70)], "tabColor":"default"}
    )
for item in wsheets:
    if "active" in item.keys():
        ws = wb.active
        ws.title = item['title']
    else:
        ws = wb.create_sheet(item['title'])
    for k in item['colWidths']:
        for j in k[0]:
            ws.column_dimensions[j].width = k[1]
    ws.sheet_properties.tabColor = sheet_colors[item["tabColor"]]


In [9]:
ws = wb["About"]

info = ("Fire Ecology Traits for Plants",
        "Version 1.00 (April 2022)",
        "This data export reflects the status of the database on the %s" % datetime.date.today().strftime('%s %b %Y'),
        "Developed by  José R. Ferrer-Paris and David Keith",
        "Centre for Ecosystem Science / University of New South Wales",
        "Please cite this work as:",
        "Ferrer-Paris, J. R. and Keith, D. A. (2022) Fire Ecology Traits for Plants: A database for fire research and management. Version 1.00. Centre for Ecosystem Science, University of New South Wales, Sydney, Australia.", 
        )

k = 1
for row in info:
    ws.cell(k,1,value=row)
    ws.cell(k,1).alignment=wrap_align
    k=k+1
    
ws.cell(1,1).style='Title'
ws.cell(5,1).hyperlink='https://www.unsw.edu.au/research/ecosystem'
ws.cell(5,1).style='Hyperlink'

supporters = ({'institution':"University of New South Wales",'url':"https://www.unsw.edu.au/"},
              {'institution':"NSW Bushfire Research Hub",'url':"https://www.bushfirehub.org/"},
              {'institution':"NESP Threatened Species Recovery Hub",'url':"https://www.nespthreatenedspecies.edu.au/"},
              {'institution':"NSW Department of Planning & Environment",'url':"https://www.planning.nsw.gov.au/"})

k=k+2
ws.cell(k-1,1,value="This work has been supported by:")
for item in supporters:
    cell=ws.cell(k,1)
    cell.value=item['institution']
    cell.hyperlink=item['url']
    cell.style = "Hyperlink"
    k=k+1

k=k+2
description = ("For general description of the traits, please refer to the 'Trait description' sheet",
              "Vocabularies for categorical traits are available in the 'Vocabularies' sheet",
              "For categorical traits the values in the 'Summary' sheet show the different values reported in the literature records separated by slashes.",
               "If more than one category has been reported, the values are ordered from higher to lower 'weight', categories receiving less than 10% weight are in round brackets, categories with less than 5% in square brackets",
              "The weight is calculated by multiplying the number of time a value is reported (nr. of records) with the weight given to each record (default to 1), and divided by the weight of all records for a given species.",
              "Data entry errors and uncertainties are indicated by asterisks (*)")

for row in description:
    ws.cell(k,1,value=row)
    ws.cell(k,1).alignment=wrap_align
    k=k+1
    
ws.protection.sheet = True

In [10]:
cur.execute("SELECT code,name,description,value_type,life_stage,life_history_process,priority FROM litrev.trait_info ORDER BY code")
trait_info = cur.fetchall()

ws = wb["Trait description"]

k=1
description = ("The following table gives a general description of the traits used in the 'Summary' sheet",
               "This sheet is protected to avoid accidental changes, but it is not password protected. If you need to filter and reorder entries in the table, please unprotect the sheet first.",
              "Vocabularies for categorical traits are available in the 'Vocabularies' sheet","","")

for row in description:
    ws.cell(k,3,value=row)
    ws.cell(k,3).alignment=wrap_align
    k=k+1
    

ws.append(["Trait Code", "Trait Name", "Description", "Type", "Life stage", "Life history process", "Priority"])

for row in trait_info:
    ws.append(row)
    
#ws.max_row
for j in range(k,ws.max_row+1):
    ws.cell(j,3).alignment=wrap_align
    
tab = Table(displayName="TraitInformation", ref="A{}:G{}".format(k,ws.max_row))

tab.tableStyleInfo = table_style["Info"]
ws.add_table(tab)
ws.protection.sheet = True

In [11]:
def summarise_values(x,w):
    if None in x:
        sfx = " * "
    else:
        sfx = ""
    df=pd.concat({"value": pd.Series(x),"weight": pd.Series(w)},axis=1)
    res = df.groupby(by="value").sum() / df.weight.sum()
    res = res.sort_values(by="weight",ascending=[0])
    val = ""
    glue = ""
    for index, row in res.iterrows():
        if row['weight'] > 0.1:
            val = val + glue + index 
            glue = " / "
        elif row['weight'] > 0.05:
            val = val + glue + ("(%s)" % index) 
            glue = " / "
        else:
            val = val + glue + ("[%s]" % index)
            glue = " / "
    return (val + sfx).strip(" ")

In [12]:
for trait in ['surv1','surv4','repr2','rect2','disp1','germ1','germ8']:
    cur.execute("SELECT species,species_code,array_agg(norm_value::text) as val,array_agg(weight) as w from litrev.{} WHERE species ilike '%euca%' GROUP BY species, species_code".format(trait))
    res = cur.fetchall()
    df1 = pd.DataFrame(res)
    col1="%s.v" % trait
    col2="%s.w" % trait
    
    df1=df1.rename(columns={0:"Species",1:"Code",2:col1,3:col2})
    df1[trait]=df1.apply(lambda row : summarise_values(row[col1],row[col2]), axis = 1)
    if "df" in globals():
        df = pd.merge(df, df1, on = ["Species","Code"], how = "outer").sort_values(by="Species",ascending=[1])
    else:
        df = df1

In [13]:
df

Unnamed: 0,Species,Code,surv1.v,surv1.w,surv1,surv4.v,surv4.w,surv4,repr2.v,repr2.w,...,rect2,disp1.v,disp1.w,disp1,germ1.v,germ1.w,germ1,germ8.v,germ8.w,germ8
0,?Eucalyptus sp. Truslove (Brooker 7499),,[All],[1],All,,,,,,...,,,,,,,,,,
1,Amyema melaleucae,,[None],[1],,,,,,,...,,[animal-ingestion],[1],animal-ingestion,,,,,,
2,Caladenia xantholeuca,4369,[All],[1],All,,,,,,...,,[wind-unspec.],[1],wind-unspec.,,,,,,
3,Craspedia leucantha,1419,[All],[1],All,,,,,,...,,"[wind-unspec., None]","[1, 1]",wind-unspec. *,,,,,,
4,Eucalyptus X calycogona x gracilis,,[All],[1],All,,,,,,...,,[None],[1],*,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1118,[Eucalyptus x sp. Cattai (Gregson s.n.)],,[All],[1],All,,,,,,...,,,,,,,,,,
1119,[Eucalyptus x viridis var. latiuscula],,[All],[1],All,,,,,,...,,,,,,,,,,
1120,[Eucalyptus x walshii],,[All],[1],All,,,,,,...,,,,,,,,,,
1121,[Eucalyptus x yarriambiack],,[All],[1],All,,,,,,...,,,,,,,,,,


In [14]:
def summarise_triplet(x,y,z,w):
    df=pd.concat({"best": pd.Series(x),"lower": pd.Series(y),"upper": pd.Series(z),"weight": pd.Series(w)},axis=1)
    val="%0.1f (%0.1f -- %0.1f)" % (df['best'].mean(),df['lower'].min(),df['upper'].max())
    if val=="nan (nan -- nan)":
        val="*"
    elif val.find("nan")==0:
        val=val.replace("nan (","(")
    elif val.find("nan")>0:
        val=val.replace(" (nan -- nan)","")
    if val.find("nan")>0:
        val=val.replace("nan","?")
    return val 

In [15]:
for trait in ['repr3','repr3a','repr4',]:
    cur.execute("SELECT species,species_code,array_agg(best) as best,array_agg(lower) as lower,array_agg(upper) as upper,array_agg(weight) as w from litrev.{} WHERE species ilike '%euca%' GROUP BY species, species_code".format(trait))
    res = cur.fetchall()
    if len(res)>0:
        df1 = pd.DataFrame(res)
        col1="%s.best" % trait
        col2="%s.lower" % trait
        col3="%s.upper" % trait
        col4="%s.w" % trait

        df1=df1.rename(columns={0:"Species",1:"Code",2:col1,3:col2,4:col3,5:col4})
        df1[trait]=df1.apply(lambda row : summarise_triplet(row[col1],row[col2],row[col3],row[col4]), axis = 1)
        df = pd.merge(df, df1, on = ["Species","Code"], how = "outer").sort_values(by="Species",ascending=[1])
   


In [16]:

ws = wb["Summary"]
ws.append(['Species','Code','surv1','surv4','germ1','rect2','repr2','repr3','repr3a'])
rows = dataframe_to_rows(df[['Species','Code','surv1','surv4','germ1','rect2','repr2','repr3','repr3a']],index=False, header=False)

for r_idx, row in enumerate(rows, 2):
    for c_idx, value in enumerate(row, 1):
        ws.cell(row=r_idx, column=c_idx, value=value)

tab = Table(displayName="Summary", ref="A1:{}{}".format(get_column_letter(c_idx),r_idx))
tab.tableStyleInfo = table_style["Lists"]
ws.add_table(tab)


In [17]:
wb.save(inputdir / "fireveg-trait-report-model.xlsx")

In [18]:
cur.close()
        
if conn is not None:
    conn.close()
    print('Database connection closed.')

Database connection closed.
