# Status for data in the litrev schema
This represents the existing sources data stream

Compare from version v1.0 and v1.1

In [1]:
# work with paths in operating system
from pathlib import Path
import os, sys
import pandas as pd
# Pyprojroot for easier handling of working directory
import pyprojroot

### Define paths for input and output

Define project directory using the `pyprojroot` functions, and add this to the execution path.

In [2]:
repodir = pyprojroot.find_root(pyprojroot.has_dir(".git"))
sys.path.append(str(repodir))

### Load own functions
Load functions from `lib` folder, we will use a function to read db credentials, one for executing database queries and three functions for extracting data from the reference description string

In [4]:
from lib.parseparams import read_dbparams
from lib.firevegdb import dbquery

### Database credentials

🤫 We use a folder named "secrets" to keep the credentials for connection to different services (database credentials, API keys, etc). This checked this folder in our `.gitignore` so that its content are not tracked by git and not exposed. Future users need to copy the contents of this folder manually.

We read database credentials stored in a `database.ini` file using our own `read_dbparams` function.

In [5]:
db_v1_0 = read_dbparams(repodir / 'secrets' / 'database.ini', 
                         section='fireveg-db-v1.0')
db_v1_1 = read_dbparams(repodir / 'secrets' / 'database.ini', 
                         section='fireveg-db-v1.1')

In [6]:
qrystr = "select count(*) from litrev.ref_list;"
dbquery(qrystr,db_v1_0)

[[309]]

In [7]:
dbquery(qrystr,db_v1_1)

[[347]]

In [8]:
qrystr = "select norm_value,count(*) from litrev.disp1 group by norm_value;"

In [11]:
dbquery(qrystr,db_v1_0)

[['animal-ingestion', 1320],
 ['ant', 4399],
 ['animal-unspec.', 1840],
 ['passive', 696],
 ['ballistic', 141],
 [None, 4038],
 ['wind-hairs', 21],
 ['water', 440],
 ['animal-cohesion', 2179],
 ['wind-unspec.', 4023],
 ['wind-wing', 224]]

In [12]:
dbquery(qrystr,db_v1_1)

[[None, 8433],
 ['animal-cohesion', 2154],
 ['wind-wing', 258],
 ['animal-unspec.', 9378],
 ['animal-ingestion', 1531],
 ['water', 957],
 ['wind-unspec.', 9319],
 ['ant', 2412],
 ['ballistic', 125],
 ['wind-hairs', 24]]

In [60]:
qrystr="select main_source,count(*) from litrev.{} group by main_source;"
records=list()
for trait in ["surv1","surv4", "surv5", "surv6", "surv7", 
              "germ1","germ8",
              "repr2","repr3","repr3a","repr4",
              "grow1",
              "rect2",
              "disp1"]:
    qry = qrystr.format(trait)
    res = dbquery(qry, db_v1_1)
    record={'trait': trait,'version': 'v1.1'}
    for row in res:
        record[row[0]]=row[1]
    records.append(record)
    res = dbquery(qry, db_v1_0)
    record={'trait': trait,'version': 'v1.0'}
    for row in res:
        record[row[0]]=row[1]
    records.append(record)



In [61]:

df = pd.DataFrame(records)


In [64]:
df

Unnamed: 0,trait,version,austraits-6.0.0,NSWFFRDv2.1,Bell Vollmer Gellie 1993,austraits-3.0.2,Ooi Myerscough Auld 2007,None
0,surv1,v1.1,29896.0,11563.0,,,,
1,surv1,v1.0,,11564.0,1.0,18344.0,,
2,surv4,v1.1,,1411.0,,,,
3,surv4,v1.0,,1411.0,,,,
4,surv5,v1.1,,1262.0,,,,
5,surv5,v1.0,,1263.0,,,,
6,surv6,v1.1,,,,,,
7,surv6,v1.0,,7.0,,,,
8,surv7,v1.1,,87.0,,,,
9,surv7,v1.0,,87.0,,,,


In [62]:
ss=df['version']=='v1.1'
df[ss]


Unnamed: 0,trait,version,austraits-6.0.0,NSWFFRDv2.1,Bell Vollmer Gellie 1993,austraits-3.0.2,Ooi Myerscough Auld 2007,None
0,surv1,v1.1,29896.0,11563.0,,,,
2,surv4,v1.1,,1411.0,,,,
4,surv5,v1.1,,1262.0,,,,
6,surv6,v1.1,,,,,,
8,surv7,v1.1,,87.0,,,,
10,germ1,v1.1,4164.0,1635.0,,,,
12,germ8,v1.1,4171.0,,,,,
14,repr2,v1.1,431.0,139.0,,,,1.0
16,repr3,v1.1,,838.0,,,,
18,repr3a,v1.1,,662.0,,,,


In [63]:
ss=df['trait']=='germ1'
df[ss]

Unnamed: 0,trait,version,austraits-6.0.0,NSWFFRDv2.1,Bell Vollmer Gellie 1993,austraits-3.0.2,Ooi Myerscough Auld 2007,None
10,germ1,v1.1,4164.0,1635.0,,,,
11,germ1,v1.0,,1635.0,,,,
