# Read fire related traits from austraits data

We will download data from [AusTraits](https://austraits.org/) ([pre-print](https://www.biorxiv.org/content/10.1101/2021.01.04.425314v1)) and add entries to the database for resprouting time for each species.

Let's start loading the libraries

In [23]:
from pathlib import Path
import os
import json
import urllib
from zipfile import ZipFile
import pandas as pd
import numpy as np
import copy

## Read _austraits_ data 
We will download the file from the [Zenodo repository](https://zenodo.org/record/5112001) using the API url and saving this under the data folder.

In [3]:
repodir = Path("../../") 
dataset = "https://zenodo.org/api/records/3568417"
outputdir = repodir / "data/austraits/"

if not os.path.isdir(outputdir):
    os.makedirs(outputdir)

We use urllib to open the url and read the data (if successfully connected!)

In [4]:
def getResponse(url):
    operUrl = urllib.request.urlopen(url)
    if(operUrl.getcode()==200):
       data = operUrl.read()
    else:
       print("Error receiving data", operUrl.getcode())
    return data
zrecord = getResponse(dataset)

Response data is in json format, need to parse it and read list of files:

In [5]:
jsonData = json.loads(zrecord)
#jsonData
for files in jsonData['files']:
    print(files['key'])

austraits-3.0.2.rds
austraits-3.0.2.zip
dictionary.html
NEWS.md
readme.txt


We want to download the zip file with the csv_files

In [6]:
outputfile = outputdir / jsonData['files'][1]['key']

if os.path.isfile(outputfile):
    print('File exists')
else:
    resp = getResponse(jsonData['files'][1]['links']['self'])
    output = open(outputfile,'wb')
    output.write(resp)
    output.close()

File exists


We will read from the zipfile the data that we need:

In [7]:
zfobj = ZipFile(outputfile)
zfobj.namelist()

['austraits-3.0.2/',
 'austraits-3.0.2/taxa.csv',
 'austraits-3.0.2/methods.csv',
 'austraits-3.0.2/definitions.yml',
 'austraits-3.0.2/build_info.md',
 'austraits-3.0.2/contributors.csv',
 'austraits-3.0.2/contexts.csv',
 'austraits-3.0.2/excluded_data.csv',
 'austraits-3.0.2/traits.csv',
 'austraits-3.0.2/taxonomic_updates.csv',
 'austraits-3.0.2/sites.csv',
 'austraits-3.0.2/sources.bib']

### Read files
We will need to read the files with the definitions (in _yaml_ format), the sources or references (in _bibtex_ format) and the traits and taxonomic data (in _csv_ format)

In [8]:
import yaml

with zfobj.open('austraits-3.0.2/definitions.yml') as file:
    try:
        ATdefinitions = yaml.safe_load(file)   
        print(ATdefinitions.keys())
    except yaml.YAMLError as exc:
        print(exc)

dict_keys(['traits', 'value_type', 'austraits', 'metadata'])


In [9]:
from pybtex.database.input import bibtex
parser = bibtex.Parser()

ATrefs = parser.parse_bytes(zfobj.open('austraits-3.0.2/sources.bib').read())


In [10]:
ATtraits = pd.read_csv(zfobj.open('austraits-3.0.2/traits.csv'),low_memory=False)

In [11]:
ATtaxa = pd.read_csv(zfobj.open('austraits-3.0.2/taxa.csv'))

We will also read the updated species data from BioNET:

In [12]:
inputdir = repodir / "data/"
BioNET = pd.read_excel(inputdir / 'vis-survey-datasheet-6000.PowerQuery.20210708.xlsx')

## Utility functions

In [13]:
def extract_reflabel(refid):
    authors=list()
    year=ATrefs.entries[refid].fields['year']
    for person in ATrefs.entries[refid].persons['author']:
        authors.extend(person.last_names)
    reflabel = "%s %s" % (" ".join(authors),year)
    if len(reflabel)>50:
        reflabel=reflabel[0:47]+"..."
    return(reflabel)

def extract_refinfo(refid):
    year=ATrefs.entries[refid].fields['year']
    title=ATrefs.entries[refid].fields['title']
    persons = ATrefs.entries[refid].persons['author']
    if len(persons)==1:
        refcitation = "%s (%s) %s" % (persons[0],year, title)
    else:
        authors=list()
        for person in persons:
            authors.append(person.__str__())
        refcitation = "%s (%s) %s" % ("; ".join(authors),year, title)
    for f in ('journal','volume','doi'):
        if f in ATrefs.entries[refid].fields.keys():
            refcitation = refcitation + " " + ATrefs.entries[refid].fields[f]
    return refcitation 

def match_spcode(row):
    spname=row['taxon_name']
    altname=row['original_name']
    result={'species':spname}
    if altname!=spname:
        result['original_notes']=['original_name:',altname]
    spp_info = BioNET[BioNET['scientificName'] == spname] 
    spcode=None
    if len(spp_info)==1 and spp_info.speciesCode_Synonym is not None:
        spcode=spp_info.speciesCode_Synonym.values[0]
        result['species_code']=spcode
    elif spname != altname:
        spp_info = BioNET[BioNET['scientificName'] == altname]
        if len(spp_info)==1 and spp_info.speciesCode_Synonym is not None:
            spcode=spp_info.speciesCode_Synonym.values[0]
            result['species_code']=spcode
            result['original_notes'].append('original name used to match with BioNET names')
 
    return result


In [14]:
print(extract_refinfo('NSWFRD_2014'))
print(extract_reflabel('NSWFRD_2014'))

Kenny, Belinda; Orscheg, Corinna; Tasker, Elizabeth; Gill, Malcolm A.; Bradstock, Ross (2014) {NSW Flora Fire Response Database, v2.1}
Kenny Orscheg Tasker Gill Bradstock 2014


In [27]:
def create_record(row):
    refid=row['dataset_id']
    reflabel = extract_reflabel(refid)
    val = row['value']
    records=list()
    record={'main_source': 'austraits-3.0.2',
            'additional_notes': ['Values reclassified by JRFP',
                                'Automatic extraction with python script'],
            'raw_value': [row['trait_name'],val,row['value_type']],
            'original_notes': list(),
           'original_sources':[reflabel]}
    if row['site_name'] != "nan":
        record['original_notes'].append('site name:')
        record['original_notes'].append(row['site_name'])

    spinfo=match_spcode(row)
    for key in spinfo.keys():
        record[key]=spinfo[key]
    if reflabel=='NSWFRD_2014':
        record['weight'] = 0
        record['weight_notes'] = ["python-script import","default of 0 for redundant records"]
    else:
        record['weight'] = 1
        record['weight_notes'] = ["python-script import","default of 1"]
    
    for sw in val.split(" "):
        indrecord=copy.deepcopy(record)
        sw=sw.strip(" ")
        transvalue=switcher.get(sw, None)
        if sw != val:
            indrecord['raw_value'].extend(['->',sw])            
        if transvalue is not None:   
            indrecord["norm_value"]=transvalue
        records.append(indrecord)
    return(records)

## Categorical traits

In [49]:
alltraits = ATtraits['trait_name'].unique()
len(alltraits)

448

In [58]:
alltraits[112:122]


array(['leaf_area', 'leaf_delta13C', 'leaf_N_per_dry_mass', 'leaf_PRI',
       'modified_NDVI', 'specific_leaf_area', 'water_band_index',
       'wood_density', 'seed_length', 'seed_shape', 'seed_texture',
       'seed_width', 'seed_mass', 'germination_treatment',
       'dispersal_appendage', 'dispersal_syndrome',
       'leaf_dry_matter_content', 'plant_growth_form', 'plant_height',
       'leaf_length', 'leaf_thickness', 'leaf_width',
       'sapwood_specific_conductivity_theoretical', 'vessel_density',
       'vessel_diameter', 'vessel_lumen_fraction',
       'vessel_diameter_hydraulic', 'vessel_multiple_fraction',
       'water_use_efficiency_intrinsic', 'leaf_hairs_adult', 'leaf_shape',
       'fire_response', 'regen_strategy', 'fire_cued_seeding',
       'leaf_compoundness', 'seed_breadth', 'leaf_phenology',
       'flowering_time', 'life_history', 'leaf_type',
       'leaf_hydraulic_conductivity', 'leaf_hydraulic_vulnerability',
       'leaf_turgor_loss_point', 'ci_at_Amax', 'c

### Dormancy type

In [44]:
element=ATdefinitions['traits']['elements']['dormancy_type']
for k in element.keys():
    print("{key} :: {value}".format(key=k,value=element[k]))
#    print('' % (ATdefinitions['traits']['elements']['dormancy_type']['values'])

description :: Classification for seed dormancy
type :: categorical
label :: Dormancy type
values :: {'morphophysiological_dormancy': 'Seeds exhibit morphophysiological dormancy', 'non_dormant': 'Seeds are non-dormant', 'physical_dormancy': 'Seeds exhibit physical dormancy', 'physiological_dormancy': 'Seeds exhibit physiological dormancy'}


In [28]:
ss = (ATtraits['trait_name']=='dormancy_type' )
ATtraits[ss]

Unnamed: 0,dataset_id,taxon_name,site_name,context_name,observation_id,trait_name,value,unit,date,value_type,replicates,original_name
479894,Ooi_2007,Acacia binervata,Fredericktown,,Ooi_2007_00001,dormancy_type,physical_dormancy,,1978-11-21,expert_mean,,Acacia binervata
481466,Ooi_2007,Angophora bakeri,Agnes Banks to Castlereagh,,Ooi_2007_01580,dormancy_type,non_dormant,,1977-02-18,expert_mean,,Angophora bakeri
488008,Ooi_2007,Isopogon anemonifolius,Cordeaux Cataract Catchment,,Ooi_2007_08122,dormancy_type,physiological_dormancy,,1975-08-12,expert_mean,,Isopogon anemonifolius
489445,Ooi_2007,Bulbine bulbosa,Blacktown,,Ooi_2007_09558,dormancy_type,morphophysiological_dormancy,,1974-12-04,expert_mean,,Bulbine bulbosa
489745,Ooi_2007,Calotis cuneifolia,Ashford,,Ooi_2007_09858,dormancy_type,non_dormant physiological_dormancy,,1978-11-25,expert_mean,,Calotis cuneifolia


In [35]:
ATtraits.fillna("nan",inplace=True)
target = ATtraits[ss]

switcher={
        "non_dormant": "ND",
        'physiological_dormancy': "PD",
        'morphophysiological_dormancy': 'MPD', 
        'physical_dormancy': 'PY'
    }
    
reflist=list()
records=list()
for idx, row in target.iterrows():
    record=create_record(row)
    refid=row['dataset_id']
    extract_reflabel(refid)
    if refid not in reflist:
        reflist.append(refid)
    records.extend(record)
records

[{'main_source': 'austraits-3.0.2',
  'additional_notes': ['Values reclassified by JRFP',
   'Automatic extraction with python script'],
  'raw_value': ['dormancy_type', 'physical_dormancy', 'expert_mean'],
  'original_notes': ['site name:', 'Fredericktown'],
  'original_sources': ['Ooi Myerscough Auld 2007'],
  'species': 'Acacia binervata',
  'species_code': '3716',
  'weight': 1,
  'weight_notes': ['python-script import', 'default of 1'],
  'norm_value': 'PY'},
 {'main_source': 'austraits-3.0.2',
  'additional_notes': ['Values reclassified by JRFP',
   'Automatic extraction with python script'],
  'raw_value': ['dormancy_type', 'non_dormant', 'expert_mean'],
  'original_notes': ['site name:', 'Agnes Banks to Castlereagh'],
  'original_sources': ['Ooi Myerscough Auld 2007'],
  'species': 'Angophora bakeri',
  'species_code': '3969',
  'weight': 1,
  'weight_notes': ['python-script import', 'default of 1'],
  'norm_value': 'ND'},
 {'main_source': 'austraits-3.0.2',
  'additional_notes

In [36]:
from configparser import ConfigParser
import psycopg2
from psycopg2.extensions import AsIs

filename = repodir / 'secrets' / 'database.ini'
section = 'aws-lght-sl'

parser = ConfigParser()
parser.read(filename)

dbparams = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        dbparams[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

In [37]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()
affected_rows=0

        
for refid in reflist:
    cur.execute("INSERT INTO litrev.ref_list(ref_code,alt_code,ref_cite) values(%s,%s,%s) ON CONFLICT DO NOTHING",
                (extract_reflabel(refid), refid, extract_refinfo(refid)))
    affected_rows = affected_rows+cur.rowcount
conn.commit()
print("total number of lines updated: %s" % affected_rows)

insert_statement = 'insert into litrev.germ8 (%s) values %s ON CONFLICT DO NOTHING'
print("total of %s records prepared" % len(records)) 
for record in records: 
    cur.execute(insert_statement, (AsIs(','.join(record.keys())), tuple(record.values())))
    affected_rows = affected_rows+cur.rowcount
records.clear()
conn.commit()
print("total number of lines updated: %s" % affected_rows)

cur.close()
if conn is not None:
    conn.close()
    print('Database connection closed.')     


Connecting to the PostgreSQL database...
total number of lines updated: 0
total of 6 records prepared
total number of lines updated: 6
Database connection closed.


### Other traits


In [64]:
traits=('vegetative_regeneration','fire_response_juvenile','fire_response','fire_cued_seeding','resprouting_proportion_individuals')
traits=('dispersal_appendage', 'dispersal_syndrome',)
ss =(ATtraits['trait_name'].isin(traits))
ATtraits[ss].value.unique()

array(['aril', 'funicle', 'hairs spines', 'barbs', 'floral_parts',
       'floral_parts hairs', 'bristles', 'wings', 'bract', 'hairs',
       'awns', 'pappus', 'none', 'caruncle', 'style', 'paddles',
       'hairs pappus', 'spines', 'beak', 'glumes', 'curved_awn',
       'awns hairs', 'feathery_style', 'placental_endocarp',
       'inflated_parts', 'awns bristles', 'bracts hairs', 'pseudo-wing',
       'short_hairs', 'strophiole', 'enclosing_wing', 'scales', 'bracts',
       'elaiosome', 'barbs hairs', 'bristles style', 'animal_vector',
       'undefined', 'endozoochory', 'adhesion', 'wind', 'unassisted',
       'myrmecochory', 'mobile', 'anemochory', 'hydrochory',
       'unassisted wind', 'ballistic', 'animal_vector hydrochory',
       'anemochory animal_vector hydrochory', 'anemochory animal_vector',
       'water wind', 'indehiscent', 'winged_fruit', 'seed_airsac',
       'bladdery_wings', 'fleshy_wings_capsule',
       'fleshy_dehiscent_capsule', 'fleshy_fruit', 'seed_wing_obsolet

### References

In [117]:
#bib_data.entries.keys()
for key in bib_data.entries.keys():
    print(key)
bib_data.entries['Adams_1984']
# dir(bib_data.entries['Adams_1984'])

Adams_1984
Ahrens_2019
ANBG_2019
Angevin_2011
Apgaua_2015
Apgaua_2017
Ashton_1975
Ashton_1976
Atkinson_2020
Atkinson_2020_2
Attiwill_1980
Baker_2019
Barlow_1981
Bean_1997
Bell_1985
Bennett_1997
Bevege_1978
Birk_1992
Blackman_2010
Blackman_2014
Blackman_2018
Bloomfield_2018
Bolza_1975
Bragg_2002
BRAIN_2007
Briggs_2010
Brock_1993
Brodribb_2009
Buckton_2019
Burgess_2007
Burrows_2001
Butler_2011
CAB_2009
Caldwell_2016
Canham_2009
Carpenter_1994
Carpenter_2005
Catford_2011
Catford_2014
Cernusak_2006
Cernusak_2011
Chandler_2002
Chave_2009
Cheal_2017
Cheesman_2020
Chen_2017
Chinnock_2007
Choat_2005
Choat_2006
Choat_2012
Chudnoff_1984
CIRAD_2009
Clarke_2015
Cooper_2004
Cooper_2013
Cornwell_2006
CPBR_2002
Craven_1987
Craven_2010
Crisp_2017
Cromer_1975
Cross_2009
Crous_2013
Crous_2019
Cunningham_1999
Curran_2009
Curtis_2012
Denton_2007
Desh_1996
Detombeur_2021
Dong_2017
Dong_2020
Du_2018
Du_2019
Duan_2015
Duncan_1998
Duncan_2011
Dwyer_2017
Dwyer_2018
Eamus_1998
Eamus_1999
Eamus_1999_2
Edwards_20

Entry('article',
  fields=[
    ('year', '1984'), 
    ('journal', 'Australian Journal of Botany'), 
    ('title', '{Role of Acacia spp. in nutrient balance and cycling in regenerating Eucalyptus regnans F. Muell. forests. I. Temporal changes in biomass and nutrient content}'), 
    ('volume', '32'), 
    ('number', '2'), 
    ('pages', '205--215'), 
    ('doi', '10.1071/bt9840205')],
  persons=OrderedCaseInsensitiveDict([('author', [Person('Adams, M. A.'), Person('M, P.'), Person('{Attiwill}')])]))

In [176]:
print(bib_data.entries['Adams_1984'].fields['year'])
##''.join(person for person in bib_data.entries['Adams_1984'].persons['author'])
for person in bib_data.entries['Adams_1984'].persons['author']:
    print(person.__str__())

dir(person)


1984
Adams, M. A.
M, P.
{Attiwill}


['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_parse_string',
 'bibtex_first',
 'bibtex_first_names',
 'first',
 'first_names',
 'get_part',
 'get_part_as_text',
 'last',
 'last_names',
 'lineage',
 'lineage_names',
 'middle',
 'middle_names',
 'prelast',
 'prelast_names',
 'rich_first_names',
 'rich_last_names',
 'rich_lineage_names',
 'rich_middle_names',
 'rich_prelast_names',
 'style1_re',
 'style2_re',
 'valid_roles']