In [1]:
from pathlib import Path
import os
from configparser import ConfigParser
import psycopg2
from psycopg2.extensions import AsIs

repodir = Path("../../") 
filename = repodir / 'secrets' / 'database.ini'
section = 'aws-lght-sl'

parser = ConfigParser()
parser.read(filename)

dbparams = {}
if parser.has_section(section):
    params = parser.items(section)
    for param in params:
        dbparams[param[0]] = param[1]
else:
    raise Exception('Section {0} not found in the {1} file'.format(section, filename))

# Populate VA groups table

In [2]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()
affected_rows=0

qry = """
INSERT INTO vag.va_groups(species,species_code,comments)
SELECT "scientificName", "speciesID",
    '{"Match by speciesID","Import species names from BioNET"}' 
FROM species.caps
ON CONFLICT DO NOTHING;
"""
cur.execute(qry)
affected_rows = affected_rows+cur.rowcount
print("%s rows updated" % affected_rows)
conn.commit()
cur.close()

if conn is not None:
    conn.close()
    print('Database connection closed.')     


Connecting to the PostgreSQL database...
15732 rows updated
Database connection closed.


# Establishment

In [3]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

qry="""
CREATE TEMP TABLE va_estab (species,spid,species_code,n,rect2) AS (
SELECT species,"speciesID",species_code,
COUNT(DISTINCT r2.record_id),
array_agg(r2.norm_value) 
FROM litrev.rect2 r2
LEFT JOIN species.caps 
  ON "speciesCode_Synonym"=r2.species_code 
WHERE species_code IS NOT NULL 
GROUP BY species,"speciesID",species_code
);
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


qry="""
UPDATE vag.va_groups 
SET establishment=array_append(establishment,'I'),
    status_establishment='{"All literature records refer to this category"}'
WHERE species_code IN (
    SELECT spid FROM va_estab 
    WHERE 'Intolerant'=ANY(rect2)
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

qry="""
UPDATE vag.va_groups 
SET establishment=array_append(establishment,'T'),
    status_establishment='{"All literature records refer to this category"}'
WHERE species_code IN (
    SELECT spid FROM va_estab 
    WHERE 'Tolerant'=ANY(rect2)
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


qry="""
UPDATE vag.va_groups 
SET establishment=array_append(establishment,'R'),
    status_establishment='{"All literature records refer to this category"}'
WHERE species_code IN (
    SELECT spid FROM va_estab 
    WHERE 'Requiring'=ANY(rect2)
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


qry="""
UPDATE vag.va_groups 
SET establishment=array_append(establishment,'I/T'),
    status_establishment='{"All literature records refer to this category"}'
WHERE species_code IN (
    SELECT spid FROM va_estab 
    WHERE 'Intolerant-Tolerant'=ANY(rect2)
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

qry="""
UPDATE vag.va_groups 
SET establishment=array_append(establishment,'T/r'),
    status_establishment='{"All literature records refer to this category"}'
WHERE species_code IN (
    SELECT spid FROM va_estab 
    WHERE 'Tolerant-Requiring'=ANY(rect2)
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)



Connecting to the PostgreSQL database...
1026 rows updated
634 rows updated
318 rows updated
34 rows updated
3 rows updated
0 rows updated


In [4]:
qry="""
UPDATE vag.va_groups 
SET status_establishment='{"There are literature records for two different categories"}'
WHERE cardinality(establishment)=2; 
""" ## prob. need some 'unique' function to avoid duplicates
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


50 rows updated


In [5]:

cur.close()
if conn is not None:
    conn.commit()
    conn.close()
    print('Database connection closed.')     


Database connection closed.


# Persistence

## First Rule

    1. If Disp1 = ‘wind-wing/’ OR ‘animal-ingestion/’ THEN 2, otherwise 3
    2. If Surv4 = ‘None’ THEN Persistence VA = D, otherwise Persistence VA = Δ
    3. ...

In [6]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()
affected_rows=0

qry = """
CREATE TEMP TABLE va_rule1 (species,spid,species_code,n_records,n_valid) AS (
SELECT species,"speciesID",species_code,
COUNT(DISTINCT record_id),
SUM(
  CASE WHEN norm_value IN ('wind-wing','animal-ingestion') THEN 1
  ELSE 0 END
)
FROM litrev.disp1 
LEFT JOIN species.caps 
  ON "speciesCode_Synonym"=species_code 
WHERE species_code IS NOT NULL 
GROUP BY species,"speciesID",species_code
--LIMIT 10
);
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

Connecting to the PostgreSQL database...
6253 rows updated


In [7]:
qry="""
UPDATE vag.va_groups 
SET persistence='{D,Δ}',
    rationale_persistence[1]='disp1 = wind-wing OR animal-ingestion',
    rationale_persistence[2]='TO DO',
    status_persistence='{"criteria met for two possible categories","outcome not resolved yet"}'
WHERE species_code IN (
    SELECT spid FROM va_rule1 WHERE n_records > 0 AND n_valid > 0
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

735 rows updated


In [8]:
qry="""
UPDATE vag.va_groups 
SET persistence='{S,Σ,G,Γ,V,U,C,W}',
    rationale_persistence[1]='disp1 does not meet criteria',
    rationale_persistence[2]='skip',
    status_persistence='{"criteria not met","several possible categories","outcome not resolved yet"}'
WHERE species_code IN (
    SELECT spid FROM va_rule1 WHERE n_records > 0 AND n_valid = 0
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

5516 rows updated


In [9]:
qry = """
UPDATE vag.va_groups 
SET rationale_persistence[1]='No records for disp1',
    status_persistence='{"missing information","cannot proceed"}'
WHERE species_code NOT IN (
    SELECT spid FROM va_rule1 
    );
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

9481 rows updated


In [10]:
qry = """
SELECT persistence, establishment, count(*)
FROM vag.va_groups
GROUP BY persistence,establishment;
"""
cur.execute(qry)
res=cur.fetchall()
cur.close()
conn.commit()
if conn is not None:
    conn.close()
    print('Database connection closed.')     


Database connection closed.


In [11]:
print(res)

[(['D', 'Δ'], ['I', 'T'], 10), (None, None, 9345), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], ['T'], 204), (['D', 'Δ'], ['R'], 6), (['D', 'Δ'], ['I'], 53), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], ['T', 'R'], 4), (None, ['R'], 2), (['D', 'Δ'], ['T', 'R'], 3), (['D', 'Δ'], ['T'], 27), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], ['I/T'], 3), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], ['I', 'T'], 33), (None, ['I'], 97), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], ['R'], 19), (None, ['T'], 37), (['D', 'Δ'], None, 636), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], None, 4812), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], ['I'], 441)]


## Second Rule

    1. ...
    2. If Surv4 = ‘None’ THEN Persistence VA = D, otherwise Persistence VA = Δ

In [12]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

qry="""
CREATE TEMP TABLE va_rule2 (species,spid,species_code,n_records,n_valid) AS (
SELECT species,"speciesID",species_code,
COUNT(DISTINCT record_id),
SUM(
  CASE WHEN norm_value IN ('None') THEN 1
  ELSE 0 END
)
FROM litrev.surv1 
LEFT JOIN species.caps 
  ON "speciesCode_Synonym"=species_code 
WHERE species_code IS NOT NULL 
GROUP BY species,"speciesID",species_code
);
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

Connecting to the PostgreSQL database...
6816 rows updated


In [13]:
qry="""
UPDATE vag.va_groups 
SET persistence='{D}', 
    rationale_persistence[2]='surv1 = None',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule2 
    WHERE n_records > 0 AND n_valid > 0) 
AND rationale_persistence[2]='TO DO';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

436 rows updated


In [14]:
qry="""
UPDATE vag.va_groups 
SET persistence='{Δ}',
    rationale_persistence[2]='surv1 other than None',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule2 
    WHERE n_records > 0 AND n_valid = 0) 
AND rationale_persistence[2]='TO DO';
"""

cur.execute(qry)
print("%s rows updated" % cur.rowcount)




240 rows updated


In [15]:
qry = """
SELECT persistence, status_persistence, count(*)
FROM vag.va_groups
GROUP BY persistence,status_persistence;
"""
cur.execute(qry)

res=cur.fetchall()

print(res)
cur.close()

if conn is not None:
    conn.commit()
    conn.close()
    print('Database connection closed.')     


[(['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], ['criteria not met', 'several possible categories', 'outcome not resolved yet'], 5516), (None, ['missing information', 'cannot proceed'], 9481), (['Δ'], ['criteria met for one category'], 240), (['D'], ['criteria met for one category'], 436), (['D', 'Δ'], ['criteria met for two possible categories', 'outcome not resolved yet'], 59)]
Database connection closed.


## Third Rule (and fourth and fifth)

    1. If Disp1 = ‘wind-wing/’ OR ‘animal-ingestion/’ THEN 2, otherwise 3
    2. ...
    3. If Surv1 = ‘None’ OR ‘Few’ OR Surv2 = ‘Stem mortality and no resprouting’ OR Surv3 = ‘None’ OR ‘Few’ THEN 4, otherwise 6
    4. If Germ1 = ‘Canopy’ OR ‘Transient’ OR Germ8 = ‘ND’ OR Surv7 <1 THEN Persistence VA = C, otherwise 5

In [16]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

##cur.execute("DROP TABLE va_rule3")
qry="""
CREATE TEMP TABLE va_rule3 (species,spid,species_code,n_records,surv1,germ1, germ8,surv7) AS (
SELECT species,"speciesID",species_code,
COUNT(DISTINCT s1.record_id),
SUM(
  CASE WHEN s1.norm_value IN ('None','Few') THEN 1
  ELSE 0 END
),
SUM(
  CASE WHEN g1.norm_value IN ('Canopy','Transient') THEN 1
  ELSE 0 END
),
SUM(
  CASE WHEN g8.norm_value IN ('ND') THEN 1
  ELSE 0 END
),
SUM(
  CASE 
  WHEN s7.best < 1 THEN 1
  WHEN s7.lower < 1 THEN 1
  ELSE 0 END
)

FROM litrev.surv1 s1
FULL JOIN litrev.germ1 as g1 
  USING(species,species_code)
FULL JOIN litrev.germ8 as g8 
  USING(species,species_code)
FULL JOIN litrev.surv7 as s7 
  USING(species,species_code)
LEFT JOIN species.caps 
  ON "speciesCode_Synonym"=species_code 
WHERE species_code IS NOT NULL 
GROUP BY species,"speciesID",species_code
);
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

Connecting to the PostgreSQL database...
6816 rows updated


In [17]:
qry="""
UPDATE vag.va_groups 
SET persistence='{C}', 
    rationale_persistence[3]='surv1 = None OR Few',
    rationale_persistence[4]= 'germ1 = Cannopy OR Transient',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule3 
    WHERE surv1 > 0 AND (germ1 > 0)
    )
AND rationale_persistence[2]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


227 rows updated


In [18]:

qry="""
UPDATE vag.va_groups 
SET persistence='{C}', 
    rationale_persistence[3]='surv1 = None OR Few',
    rationale_persistence[4]= 'germ8 ' ,
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule3 
    WHERE surv1 > 0 AND (germ8 > 0)
    )
AND rationale_persistence[2]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


2 rows updated


In [19]:
qry="""
UPDATE vag.va_groups 
SET persistence='{C}', 
    rationale_persistence[3]='surv1 = None OR Few',
    rationale_persistence[4]= 'surv7 <1',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule3 
    WHERE surv1 > 0 AND (surv7 > 0)
    )
AND rationale_persistence[2]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


0 rows updated


In [20]:
qry="""
UPDATE vag.va_groups 
SET persistence='{G,S}', 
    rationale_persistence[3]='surv1 = None OR Few' ,
    rationale_persistence[4]='germ1, germ8, surv7 do not meet criteria',
    rationale_persistence[5]='TO DO',
    status_persistence='{"criteria met for two possible categories","outcome not resolved yet"}'
WHERE species_code IN (
    SELECT spid FROM va_rule3 
    WHERE surv1 > 0 AND (germ1 = 0 AND germ8 = 0 AND surv7 = 0)
    )
AND rationale_persistence[2]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


2925 rows updated


In [21]:
qry="""
UPDATE vag.va_groups 
SET persistence='{Σ,Γ,U,V,W}', 
    rationale_persistence[3]='surv1 is not None OR Few',
    rationale_persistence[4]= 'skip',
    rationale_persistence[5]= 'skip',
    rationale_persistence[6]= 'TO DO',
    status_persistence='{"criteria not met","several possible categories","outcome not resolved yet"}'
WHERE species_code IN (
    SELECT spid FROM va_rule3 
    WHERE surv1 = 0 
    )
AND rationale_persistence[2]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


1895 rows updated


In [22]:
qry = """
SELECT persistence, count(*)
FROM vag.va_groups
GROUP BY persistence;
"""
cur.execute(qry)
print(cur.fetchall())

cur.close()

if conn is not None:
    conn.commit()
    conn.close()
    print('Database connection closed.')     


[(None, 9481), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], 477), (['Δ'], 240), (['D'], 436), (['C'], 227), (['D', 'Δ'], 59), (['Σ', 'Γ', 'U', 'V', 'W'], 1895), (['G', 'S'], 2917)]
Database connection closed.


## Sixth Rule (and seventh)

    6. If Germ1 = ‘Soil Persistent’ THEN 7, otherwise 8
    7. If Surv1 = ‘Half’ OR Surv3 = ‘Half’ OR Surv4 = ‘Epicormic’ OR ‘Basal’ THEN Persistence VA = Γ, otherwise Persistence VA = Σ
    8. ...


In [23]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

##cur.execute("DROP TABLE va_rule3")
qry="""
CREATE TEMP TABLE va_rule6 (species,spid,species_code,n_records,surv1,germ1,surv4) AS (
SELECT species,"speciesID",species_code,
COUNT(DISTINCT s1.record_id),
SUM(
  CASE WHEN s1.norm_value IN ('Half') THEN 1
  ELSE 0 END
),
SUM(
  CASE WHEN g1.norm_value IN ('Soil-persistent') THEN 1
  ELSE 0 END
),
SUM(
  CASE WHEN s4.norm_value IN ('Epicormic','Basal') THEN 1
  ELSE 0 END
)
FROM litrev.surv1 s1
LEFT JOIN species.caps 
  ON "speciesCode_Synonym"=s1.species_code 
LEFT JOIN litrev.germ1 as g1 
  USING(species,species_code)
LEFT JOIN litrev.surv4 as s4 
  USING(species,species_code)
WHERE species_code IS NOT NULL 
GROUP BY species,"speciesID",species_code
);
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

Connecting to the PostgreSQL database...
6816 rows updated


In [24]:
qry="""
UPDATE vag.va_groups 
SET persistence='{U,V,W}', 
    rationale_persistence[6]='germ1 is not Soil persistent',
    rationale_persistence[7]= 'skip',
    status_persistence='{"criteria not met","several possible categories","outcome not resolved yet"}'
WHERE species_code IN (
    SELECT spid FROM va_rule6 
    WHERE germ1 = 0
    )
AND rationale_persistence[5]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


1754 rows updated


In [25]:
qry="""
UPDATE vag.va_groups 
SET persistence='{Σ,Γ}', 
    rationale_persistence[6]='germ1 = Soil persistent',
    rationale_persistence[7]= 'TO DO',
    status_persistence='{"criteria met for two possible categories","outcome not resolved yet"}'
    
WHERE species_code IN (
    SELECT spid FROM va_rule6 
    WHERE germ1 > 0
    )
AND rationale_persistence[5]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


147 rows updated


In [26]:
qry="""
UPDATE vag.va_groups 
SET persistence='{Γ}', 
    rationale_persistence[6]='germ1 = Soil persistent',
    rationale_persistence[7]= 'surv1 = Half',
     status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule6 
    WHERE germ1 > 0 AND surv1 > 0
    )
AND rationale_persistence[5]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


12 rows updated


In [27]:
qry="""
UPDATE vag.va_groups 
SET persistence='{Γ}', 
    rationale_persistence[6]='germ1 = Soil persistent',
    rationale_persistence[7]= 'surv4 = Epicormic OR basal',
     status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule6 
    WHERE germ1 > 0 AND surv4 > 0
    )
AND rationale_persistence[5]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


49 rows updated


In [28]:
qry="""
UPDATE vag.va_groups 
SET persistence='{Σ}', 
    rationale_persistence[6]='germ1 = Soil persistent',
    rationale_persistence[7]= 'surv4 and surv1 do not meet criteria',
     status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule6 
    WHERE germ1 > 0 AND surv4 = 0 AND surv1 = 0
    )
AND rationale_persistence[5]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


95 rows updated


In [29]:
qry = """
SELECT persistence,  count(*)
FROM vag.va_groups
GROUP BY persistence;
"""
cur.execute(qry)

res=cur.fetchall()

print(res)
cur.close()

if conn is not None:
    conn.commit()
    conn.close()
    print('Database connection closed.')     


[(None, 9481), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], 477), (['Δ'], 240), (['D'], 436), (['C'], 227), (['U', 'V', 'W'], 1748), (['Γ'], 52), (['D', 'Δ'], 59), (['Σ'], 95), (['G', 'S'], 2917)]
Database connection closed.


## Eight Rule 
    6. If Germ1 = ‘Soil Persistent’ THEN 7, otherwise 8
    7. ...
    8. If Surv4 = ‘Apical’ OR ‘Tuber’ OR ‘Tussock’ OR ‘Long rhizome’ OR ‘Short rhizome’ OR ‘Stolon’ OR Repr3a ≤1 THEN Persistence VA = U, otherwise 9
    9. If Surv1 = ‘Half’ OR Surv3 = ‘Half’ OR Surv4 = ‘Epicormic’ OR ‘Basal’ THEN Persistence VA = W, otherwise Persistence VA = V


In [30]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

##cur.execute("DROP TABLE va_rule3")
qry="""
CREATE TEMP TABLE va_rule8 (species,spid,species_code,n_records,surv4,repr3a) AS (
SELECT species,"speciesID",species_code,
COUNT(DISTINCT s4.record_id),
SUM(
  CASE WHEN s4.norm_value IN ('Apical','Tuber','Tussock','Long rhizome or root sucker','Short rhizome','Stolon') THEN 1
  ELSE 0 END
),
SUM(
  CASE WHEN r3.best <= 1 THEN 1
      WHEN r3.lower <= 1 THEN 1
  ELSE 0 END
)
FROM litrev.surv4 s4
LEFT JOIN species.caps 
  ON "speciesCode_Synonym"=s4.species_code 
LEFT JOIN litrev.repr3a as r3 
  USING(species,species_code)
WHERE species_code IS NOT NULL 
GROUP BY species,"speciesID",species_code
);
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

Connecting to the PostgreSQL database...
1257 rows updated


In [31]:
qry="""
UPDATE vag.va_groups 
SET persistence='{U}', 
    rationale_persistence[8]= 'surv4  meet criteria',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule8 
    WHERE surv4 > 0 
    )
AND rationale_persistence[7]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


148 rows updated


In [32]:
qry="""
UPDATE vag.va_groups 
SET persistence='{U}', 
    rationale_persistence[8]= 'repr3a  meet criteria',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule8 
    WHERE repr3a > 0 
    )
AND rationale_persistence[7]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


69 rows updated


In [33]:
qry="""
UPDATE vag.va_groups 
SET persistence='{V,W}', 
    rationale_persistence[8]= 'surv4, repr3a do not meet criteria',
    rationale_persistence[9]= 'TO DO',
     status_persistence='{"criteria met for two possible categories","outcome not resolved yet"}'
WHERE species_code IN (
    SELECT spid FROM va_rule8 
    WHERE surv4 =0 AND repr3a = 0
    )
AND rationale_persistence[7]='skip';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)


131 rows updated


In [34]:
qry = """
SELECT persistence, count(*)
FROM vag.va_groups
GROUP BY persistence;
"""
cur.execute(qry)

res=cur.fetchall()

print(res)
cur.close()

if conn is not None:
    conn.commit()
    conn.close()
    print('Database connection closed.')     


[(None, 9481), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], 477), (['Δ'], 240), (['V', 'W'], 131), (['U'], 171), (['D'], 436), (['C'], 227), (['U', 'V', 'W'], 1446), (['Γ'], 52), (['D', 'Δ'], 59), (['Σ'], 95), (['G', 'S'], 2917)]
Database connection closed.


## Ninth Rule 
    8. If Surv4 = ‘Apical’ OR ‘Tuber’ OR ‘Tussock’ OR ‘Long rhizome’ OR ‘Short rhizome’ OR ‘Stolon’ OR Repr3a ≤1 THEN Persistence VA = U, otherwise 9
    9. If Surv1 = ‘Half’ OR Surv3 = ‘Half’ OR Surv4 = ‘Epicormic’ OR ‘Basal’ THEN Persistence VA = W, otherwise Persistence VA = V


In [35]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

##cur.execute("DROP TABLE va_rule3")
qry="""
CREATE TEMP TABLE va_rule9 (species,spid,species_code,n_records,surv4,surv1) AS (
SELECT species,"speciesID",species_code,
COUNT(DISTINCT s4.record_id),
SUM(
  CASE WHEN s4.norm_value IN ('Epicormic','Basal') THEN 1
  ELSE 0 END
),
SUM(
  CASE WHEN s1.norm_value IN ('Half') THEN 1
  ELSE 0 END
)
FROM litrev.surv4 s4
LEFT JOIN species.caps 
  ON "speciesCode_Synonym"=s4.species_code 
LEFT JOIN litrev.surv1 as s1 
  USING(species,species_code)
WHERE species_code IS NOT NULL 
GROUP BY species,"speciesID",species_code
);
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

Connecting to the PostgreSQL database...
1257 rows updated


In [36]:
qry="""
UPDATE vag.va_groups 
SET persistence='{W}', 
    rationale_persistence[9]= 'Surv4 = Epicormic OR Basal',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule9 
    WHERE surv4 > 0
    )
AND rationale_persistence[9]='TO DO';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

108 rows updated


In [37]:
qry="""
UPDATE vag.va_groups 
SET persistence='{W}', 
    rationale_persistence[9]= 'Surv1 = Half',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule9 
    WHERE surv1 > 0
    )
AND rationale_persistence[9]='TO DO';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

0 rows updated


In [38]:
qry="""
UPDATE vag.va_groups 
SET persistence='{V}', 
    rationale_persistence[9]= 'Surv1, Surv4 do not meet criteria',
    status_persistence='{"criteria met for one category"}'
WHERE species_code IN (
    SELECT spid FROM va_rule9 
    WHERE surv1 = 0 and surv4 = 0
    )
AND rationale_persistence[9]='TO DO';
"""
cur.execute(qry)
print("%s rows updated" % cur.rowcount)

23 rows updated


In [39]:
qry = """
SELECT persistence, count(*)
FROM vag.va_groups
GROUP BY persistence;
"""
cur.execute(qry)

res=cur.fetchall()

print(res)
cur.close()

if conn is not None:
    conn.commit()
    conn.close()
    print('Database connection closed.')     


[(None, 9481), (['S', 'Σ', 'G', 'Γ', 'V', 'U', 'C', 'W'], 477), (['Δ'], 240), (['U'], 171), (['V'], 23), (['D'], 436), (['C'], 227), (['U', 'V', 'W'], 1446), (['Γ'], 52), (['W'], 108), (['D', 'Δ'], 59), (['Σ'], 95), (['G', 'S'], 2917)]
Database connection closed.


# Test of pandas dataframe for summarizing information

In [40]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

qry = "select unnest(persistence) p, unnest(establishment) as e, count(distinct species_code) from vag.va_groups group by p, e ORDER BY p,e"
cur.execute(qry)
res=cur.fetchall()

print(res)
cur.close()

if conn is not None:
    conn.close()
    print('Database connection closed.')     


Connecting to the PostgreSQL database...
[('C', 'I', 74), ('C', 'R', 15), ('C', 'T', 35), ('C', None, 580), ('D', 'I', 52), ('D', 'R', 4), ('D', 'T', 25), ('D', None, 414), ('G', 'I', 300), ('G', 'I/T', 2), ('G', 'R', 1), ('G', 'T', 97), ('G', None, 2994), ('S', 'T', 24), ('S', None, 3370), ('U', 'I', 29), ('U', 'I/T', 1), ('U', 'R', 3), ('U', 'T', 34), ('U', None, 2027), ('V', 'I', 8), ('V', 'T', 1), ('V', None, 1937), ('W', 'I', 14), ('W', 'T', 25), ('W', None, 1992), ('Γ', 'I', 21), ('Γ', 'T', 11), ('Γ', None, 497), ('Δ', 'I', 11), ('Δ', 'R', 2), ('Δ', 'T', 5), ('Δ', None, 281), ('Σ', 'I', 28), ('Σ', 'T', 5), ('Σ', None, 539), (None, 'I', 97), (None, 'R', 9), (None, 'T', 56)]
Database connection closed.


In [41]:
from pandas import DataFrame
df = DataFrame(res)
df=df.rename(columns={0:"Persistence",1:"Establishment",2:"Records"})
df['Records'] = df['Records'].astype(int)
tbl=df.pivot(index='Persistence', columns='Establishment', values='Records')
tbl.fillna(0,inplace=True)

In [42]:
tbl.astype(int)

Establishment,NaN,I,I/T,R,T
Persistence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,0,97,0,9,56
C,580,74,0,15,35
D,414,52,0,4,25
G,2994,300,2,1,97
S,3370,0,0,0,24
U,2027,29,1,3,34
V,1937,8,0,0,1
W,1992,14,0,0,25
Γ,497,21,0,0,11
Δ,281,11,0,2,5


In [43]:
tbl.astype(int).values.tolist()

[[0, 97, 0, 9, 56],
 [580, 74, 0, 15, 35],
 [414, 52, 0, 4, 25],
 [2994, 300, 2, 1, 97],
 [3370, 0, 0, 0, 24],
 [2027, 29, 1, 3, 34],
 [1937, 8, 0, 0, 1],
 [1992, 14, 0, 0, 25],
 [497, 21, 0, 0, 11],
 [281, 11, 0, 2, 5],
 [539, 28, 0, 0, 5]]

In [44]:
tbl.columns.values

array([nan, 'I', 'I/T', 'R', 'T'], dtype=object)

In [45]:
tbl.index.values

array([nan, 'C', 'D', 'G', 'S', 'U', 'V', 'W', 'Γ', 'Δ', 'Σ'],
      dtype=object)