In [1]:
import openpyxl
from pathlib import Path
import os
from datetime import datetime
from configparser import ConfigParser
import psycopg2
from psycopg2.extensions import AsIs
import pyprojroot
import re
import pandas as pd

Load functions from `lib` folder, we will use a function to read db credentials and one for batch insert and updates:

In [2]:
from lib.parseparams import read_dbparams
from lib.firevegdb import batch_upsert
from lib.firevegdb import validate_and_update_site_records

import lib.fireveg as fv

Define path to workbooks

In [3]:
repodir = pyprojroot.find_root(pyprojroot.has_dir(".git"))

Database credentials are stored in a database.ini file

In [4]:
dbparams = read_dbparams(repodir / 'secrets' / 'database.ini', section='aws-lght-sl')

In [5]:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

Connecting to the PostgreSQL database...


In [6]:
qry="""
SELECT \"speciesID\",\"taxonID\",\"currentScientificNameCode\",\"scientificName\",
"speciesCode_Synonym"
FROM species.caps
WHERE \"scientificName\" IN 
(SELECT species FROM form.quadrat_samples where species_code is NULL); 
"""

In [7]:
cur.execute(qry)
res = cur.fetchall()
cur.close()
if conn is not None:
    conn.close()

In [8]:
colnames=['speciesID','taxonID','currentScientificNameCode','scientificName','speciesCode_Synonym']
splist = pd.DataFrame(res,columns=colnames,dtype=object)

In [9]:
splist["taxonID"] = pd.Series(splist["taxonID"], dtype=int)

In [10]:
#splist=splist.head()
## skip rows with non-numeric species codes
splist=splist[pd.to_numeric(splist['speciesCode_Synonym'], errors='coerce').notnull()]
splist

Unnamed: 0,speciesID,taxonID,currentScientificNameCode,scientificName,speciesCode_Synonym
0,2399.0,2399,7121,Goodenia lunata,7121
1,2433.0,2433,8498,Eucalyptus leptophylla,8498
2,2480.0,2480,2855,Eutaxia microphylla,2855
3,2505.0,2505,1712,Vittadinia dissecta,1712
4,2538.0,2538,3721,Acacia brachybotrya,3721
...,...,...,...,...,...
155,13110.0,13110,10393,Austrostipa mollis,10393
156,13281.0,13281,10563,Adriana tomentosa var. hookeri,10563
158,15777.0,15777,11377,Xerochrysum bracteatum,11377
159,17462.0,17462,12232,Convolvulus crispifolius,12232


In [11]:
item=splist.loc[0]
item['taxonID'],item['scientificName']


(2399, 'Goodenia lunata')

In [12]:
splist = splist.reset_index()  # make sure indexes pair with number of rows


In [13]:
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**dbparams)
cur = conn.cursor()

Connecting to the PostgreSQL database...


In [14]:
qrystr="""UPDATE form.quadrat_samples SET species_code=%s WHERE species=%s AND species_code is NULL; """
updated_rows=0
for index, row in splist.iterrows():
    qry = cur.mogrify(qrystr, (AsIs(row['speciesCode_Synonym']),row['scientificName']))
    cur.execute(qry)
    if cur.rowcount > 0:
        updated_rows = updated_rows + cur.rowcount


In [15]:
conn.commit()        
cur.close()
print("%s rows updated" % (updated_rows))
conn.close()
print('Database connection closed.')

8408 rows updated
Database connection closed.
