In [33]:
# work with paths in operating system
from pathlib import Path
import os

# datetime support
import datetime
# For database connection
from configparser import ConfigParser
import psycopg2
from psycopg2.extras import DictCursor

# Pandas for calculations
import pandas as pd

# Regular expressions
import re

In [2]:
repodir = Path("../../") 
inputdir = repodir / "data" / "output-report"
os.listdir(inputdir)

['fireveg-trait-report-model.xlsx']

In [3]:
def read_dbparams(filename,section="postgresql"):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db


In [4]:
filename = repodir / 'secrets' / 'database.ini'
dbparams=read_dbparams(filename,section='aws-lght-sl')

In [13]:
if "conn" not in globals() or conn.closed!=0:
    print('Connecting to the PostgreSQL database...')
    conn = psycopg2.connect(**dbparams)
if "cur" not in globals() or cur.closed:
    cur = conn.cursor(cursor_factory=DictCursor)

Connecting to the PostgreSQL database...


In [14]:
cur.execute("SELECT * FROM litrev.ref_list ")
ref_info = cur.fetchall()



In [44]:

r = re.compile("[A-Z][a-z]+")
def extract_year(x):
    if x.__contains__("personal communication"):
        y = x[0:x.find(" personal")].replace(",","")
        year = "pers. comm."
    elif x.__contains__("unpublished"):
        y = x[0:x.find("unpublished")].replace(",","")
        year = "unpub."
    else:
        y = x[0:x.find(")")].replace(",","")
        year = ''.join(re.findall("\d+", y))
    return(year)

def extract_authors(x):
    authors = x[0:x.find("(")]
    return(authors)

def extract_rest(x):
    post_year = x[x.find(")")+1:]
    return(post_year)

In [45]:
df=pd.DataFrame(ref_info,columns=ref_info[1].keys())

In [46]:
df['date']=df.apply(lambda row : extract_year(row['ref_cite']), axis = 1)
df['authors']=df.apply(lambda row : extract_authors(row['ref_cite']), axis = 1)
df['ref_info']=df.apply(lambda row : extract_rest(row['ref_cite']), axis = 1)

In [47]:
df

Unnamed: 0,ref_code,ref_cite,alt_code,date,authors,ref_info
0,Peter Byrne Beerwah Qld. unpub.,"Peter Byrne, Beerwah, Qld. (unpublished)",NSWFFRD-NFRR-ref-B,unpub.,"Peter Byrne, Beerwah, Qld.",
1,Baird 1977,"Baird, A.M. (1977). Regeneration after fire in...",NSWFFRD-NFRR-ref-BA,1977,"Baird, A.M.",". Regeneration after fire in King's Park, Pert..."
2,Benson McDougall 1995,"Benson, D. and McDougall, L. (1995). Ecology o...",NSWFFRD-NFRR-ref-BB,1995,"Benson, D. and McDougall, L.",. Ecology of Sydney plant species part 3: Dico...
3,Benson McDougall 1997,"Benson, D. and McDougall, L. (1997). Ecology o...",NSWFFRD-NFRR-ref-BD,1997,"Benson, D. and McDougall, L.",. Ecology of Sydney plant species part 5: Dico...
4,Benson 1985,"Benson, D.H. (1985). Maturation periods for fi...",NSWFFRD-NFRR-ref-BE,1985,"Benson, D.H.",. Maturation periods for fire sensitive shrub ...
...,...,...,...,...,...,...
304,Baskin & Baskin 2014,"Baskin, C. and Baskin, J.M. (2014) Seeds: Ecol...",,2014,"Baskin, C. and Baskin, J.M.","Seeds: Ecology, Biogeography, and Evolution o..."
305,Vening etal 2017,Vening etal 2017 Aust J Bot,,2017,Vening etal 2017 Aust J Bo,Vening etal 2017 Aust J Bot
306,Myerscough 1998,Myerscough 1998 Cunninghamia,,1998,Myerscough 1998 Cunninghami,Myerscough 1998 Cunninghamia
307,Clarke et al 2000,Clarke et al 2000,,200,Clarke et al 200,Clarke et al 2000


In [48]:
df.to_excel(inputdir / "fireveg-db-references.xlsx") 

In [11]:
conn.close()