In [1]:
import psycopg2
from psycopg2.extensions import AsIs
from uuid import uuid4
import sqlite3, pdb
import pandas as pd

In [2]:
def createTeam1SamplingFeatures():
    try:
        conn = psycopg2.connect("dbname='ncpfast_dev' user='django_dev' host='ncpfast-db.edc.renci.org' port='5432' password='MKeeJn9zEqs2T9wm4V3UX'")
        cur = conn.cursor()
        
        cur.execute("""SET CLIENT_ENCODING TO UTF8""")
        cur.execute("""SET STANDARD_CONFORMING_STRINGS TO ON""")
        cur.execute("""BEGIN""")
        # Create cluster geometry table
        # samplingfeatureuuid uuid DEFAULT uuid_generate_v4 (),
        cur.execute("""CREATE TABLE team1_sampling_features (
                samplingfeatureid SERIAL PRIMARY KEY,
                samplingfeatureuuid uuid NOT NULL,
                samplingfeaturecode VARCHAR (50) NOT NULL,
                samplingfeaturename VARCHAR (255) NOT NULL,
                samplingfeaturedescription VARCHAR (500) NOT NULL,
                elevation_m double precision,
                samplingfeaturegeotypecv VARCHAR (255),
                samplingfeaturetypecv VARCHAR (255) NOT NULL,
                sitetypecv VARCHAR (255) NOT NULL,
                samplingfeatureexternalidentifier VARCHAR (255) NOT NULL,
                propertyvalue VARCHAR (255),
                longitude NUMERIC NOT NULL,
                latitude NUMERIC NOT NULL
            )""")
        # Create geometry column
        cur.execute("""ALTER TABLE team1_sampling_features ADD COLUMN featuregeometry geometry(POINT,4326)""")
        # Index geometry using SPGIST.
        cur.execute("""CREATE INDEX index_featuregeometry 
                       ON team1_sampling_features 
                       USING SPGIST ( featuregeometry )""")

        cur.execute("""COMMIT""")
        cur.execute("""ANALYZE team1_sampling_features""")

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if cur is not None:
            cur.close()
        if conn is not None:
            conn.close()

In [3]:
#createTeam1SamplingFeatures()

In [4]:
def selectRoundOneFeatures():
    conn = None
    try:
        conn = sqlite3.connect('../RoundOneDB/RoundOneDB_final.db')
        cur = conn.cursor()
        cur.execute("""SELECT DISTINCT q.station_id, q.station_name, g.src_name, g.src_locati, 
                       g.lat,g.lon, g.src_type, q.epa_srcid, g.depth
                       FROM quant_summary q 
                       INNER JOIN geolocation g ON g.EPA_srcID = q.EPA_srcID
                       ORDER BY q.epa_srcid""")

        rows = cur.fetchall()
        cur.close()
        conn.close()
    
        df = pd.DataFrame(columns = ['station_id','station_name', 'src_name', 'src_locati',
                                     'lon','lat','src_type', 'epa_srcid', 'depth']) 
        for row in rows:
            df = df.append({'station_id': row[0],'station_name': row[1], 'src_name': row[2], 'src_locati': row[3],
                            'lon': row[4],'lat': row[5],'src_type': row[6], 'epa_srcid': row[7], 'depth': row[8]}, 
                            ignore_index = True)

        return df
    except Error as e:
        print(e)

In [5]:
#df = selectRoundOneFeatures()
#df.head()

In [6]:
def insertTeam1Features(df):
    try:
        conn = psycopg2.connect("dbname='ncpfast_dev' user='django_dev' host='ncpfast-db.edc.renci.org' port='5432' password='MKeeJn9zEqs2T9wm4V3UX'")
        cur = conn.cursor()
        
        cur.execute("""SET CLIENT_ENCODING TO UTF8""")
        cur.execute("""SET STANDARD_CONFORMING_STRINGS TO ON""")
        cur.execute("""BEGIN""")
        
        for index, row in df.iterrows():
            samplingfeatureuuid = str(uuid4())
            samplingfeaturecode = row[0] #station_id=samplingfeaturecode
            samplingfeaturename = row[1] #station_name=samplingfeaturename
            if row[3] != None:
                samplingfeaturedescription = row[2]+'; '+row[3] #src_name+src_locati=samplingfeaturedescription
            else:
                samplingfeaturedescription = row[2]   
            longitude = row[4]
            latitude = row[5]
            if row[6].lower() == 's':
                sitetypecv = 'Stream' #src_type=sitetypecv s=Stream
            elif row[6].lower() == 'g':
                sitetypecv = 'Well' #src_type=sitetypecv g=Well
            else:
                sitetypecv = row[6]+' CHECK'
            samplingfeatureexternalidentifier = row[7] #epa_srcid=samplingfeatureexternalidentifier,
            propertyvalue = row[8] #depth=propertyvalue
                
            cur.execute("""INSERT INTO team1_sampling_features(samplingfeatureuuid, samplingfeaturecode, 
                                        samplingfeaturename, samplingfeaturedescription, samplingfeaturetypecv, 
                                        samplingfeaturegeotypecv, longitude, latitude, sitetypecv, 
                                        samplingfeatureexternalidentifier, propertyvalue)
                            VALUES (%(vsamplingfeatureuuid)s,%(vsamplingfeaturecode)s,%(vsamplingfeaturename)s, 
                                    %(vsamplingfeaturedescription)s,'waterQualityStation','Point',
                                    %(vlongitude)s,%(vlatitude)s,%(vsitetypecv)s,
                                    %(vsamplingfeatureexternalidentifier)s,%(vpropertyvalue)s)""",
                        {'vsamplingfeatureuuid': samplingfeatureuuid,
                         'vsamplingfeaturecode': samplingfeaturecode,
                         'vsamplingfeaturename': samplingfeaturename,
                         'vsamplingfeaturedescription': samplingfeaturedescription,
                         'vlongitude': AsIs(longitude),'vlatitude': AsIs(latitude),'vsitetypecv': sitetypecv,
                         'vsamplingfeatureexternalidentifier': AsIs(samplingfeatureexternalidentifier),
                         'vpropertyvalue': AsIs(propertyvalue)})
            # Create geometry data from lon, lat variables, and update table with those values.
            cur.execute("""UPDATE team1_sampling_features 
                           SET featuregeometry = ST_SetSRID(ST_MakePoint(longitude,latitude),4326)""")
            cur.execute("""COMMIT""")
            cur.execute("""ANALYZE team1_sampling_features""")
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if cur is not None:
            cur.close()
        if conn is not None:
            conn.close()  

In [7]:
#insertTeam1Features(df)

In [12]:
def insertFeatures():
    try:
        conn = psycopg2.connect("dbname='ncpfast_dev' user='django_dev' host='ncpfast-db.edc.renci.org' port='5432' password='MKeeJn9zEqs2T9wm4V3UX'")
        cur = conn.cursor()
        
        cur.execute("""SET CLIENT_ENCODING TO UTF8""")
        cur.execute("""SET STANDARD_CONFORMING_STRINGS TO ON""")
        cur.execute("""BEGIN""")
        cur.execute("""SELECT * FROM team1_sampling_features""")
        rows = cur.fetchall()
        for row in rows:
            samplingfeatureidin = row[0]
            samplingfeatureuuid = row[1]
            samplingfeaturecode = row[2]
            samplingfeaturename = row[3]
            samplingfeaturedescription = row[4]
            elevation_m = row[5]
            samplingfeaturegeotypecv = row[6]
            samplingfeaturetypecv = row[7]
            sitetypecv = row[8]
            samplingfeatureexternalidentifier = row[9]
            propertyvalue = row[10]
            longitude = row[11]
            latitude = row[12]
            featuregeometry = row[13]
            
            cur.execute("""INSERT INTO sampling_features (samplingfeatureuuid, samplingfeaturecode, 
                                       samplingfeaturename, samplingfeaturedescription, featuregeometry,
                                       samplingfeaturegeotypecv, samplingfeaturetypecv)
                            VALUES (%(vamplingfeatureuuid)s, %(vsamplingfeaturecode)s, 
                                    %(vsamplingfeaturename)s, %(vsamplingfeaturedescription)s, 
                                    %(vfeaturegeometry)s, %(vsamplingfeaturegeotypecv)s, 
                                    %(vsamplingfeaturetypecv)s)
                            RETURNING samplingfeatureid""",
                        {'vamplingfeatureuuid':amplingfeatureuuid, 
                         'vsamplingfeaturecode':samplingfeaturecode, 
                         'vsamplingfeaturename':samplingfeaturename, 
                         'vsamplingfeaturedescription':samplingfeaturedescription, 
                         'vfeaturegeometry':featuregeometry,
                         'vsamplingfeaturegeotypec':samplingfeaturegeotypecv, 
                         'vsamplingfeaturetypecv':samplingfeaturetypecv})
            samplingfeatureid = cur.fetchone()[0]
            
            cur.execute("""INSERT INTO sites (samplingfeatureid, longitude, latitude, 
                                              sitetypecv,spatialreferenceid)
                           VALUES (%(vsamplingfeatureid)s, %(vlongitude)s, %(vlatitude)s, 
                                   %(vsitetypecv)s,10001)""",
                        {'vsamplingfeatureid':samplingfeatureid,'vlongitude':longitude,'vlatitude':latitude,
                         'vsitetypecv':sitetypecv}),

            cur.execute("""INSERT INTO sampling_feature_external_identifiers 
                                          (samplingfeatureexternalidentifier,
                                           samplingfeatureexternalidentifieruri,
                                           externalidentifiersystemid,
                                           samplingfeatureid)
                           VALUES (%(vsamplingfeatureexternalidentifier)s, 'TBD', 99999,
                                   %(vsamplingfeatureid)s)""",
                        {'vsamplingfeatureexternalidentifier':samplingfeatureexternalidentifier,
                         'vsamplingfeatureid':samplingfeatureid})

            if propertyvalue NOT NULL:
                cur.execute("""INSERT INTO sampling_feature_extension_property_values 
                                              (propertyvalue,propertyid,samplingfeatureid)
                               VALUES (%(vpropertyvalue)s, 10015, %(vsamplingfeatureid)s)""",
                            {'vsamplingfeatureexternalidentifier':samplingfeatureexternalidentifier,
                             'vsamplingfeatureid':samplingfeatureid})
            else:
                continue
            
            cur.execute("""INSERT INTO specimens (samplingfeatureid, isfieldspecimen,
                                                  specimenmediumcv, specimentypecv)
                           VALUES (%(vsamplingfeatureid)s,'t','Liquid aqueous', 'Individual sample')""",
                        {'vsamplingfeatureid':samplingfeatureid})
            
        #cur.execute("""COMMIT""")
        #cur.execute("""ANALYZE sampling_features""")
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if cur is not None:
            cur.close()
        if conn is not None:
            conn.close()  

In [13]:
#insertFeatures()

11606 329 -81.1341105203952 36.5123167818752
11736 500 -81.4963763142934 36.4085678513581
11747 255 -81.4576713367255 36.4127268470303
11748 605 -81.5103772956702 36.4970008158182
11886 230 -81.8682131191255 36.162781981536
11894 175 -81.9893390473742 36.1625129906483
12111 505 -82.3346759011257 35.6282902400988
12369 400 -82.2972349201922 35.6548802263792
14068 205 -83.8029431008024 35.0285896005142
15144 325 -83.852698034692 35.3677844619655
17170 305 -82.1572590052473 35.6411692217204
17612 374 -82.6616336815422 35.8617341663973
17615 440 -82.6701676768075 35.8559731693505
17616 375 -82.8401475712148 35.9057561612564
18600 240 -82.0442440458854 35.8838751115774
18601 None -82.1558239643851 36.017686064078
18648 405 -82.2139140121586 35.2618933866837
18765 645 -80.6752358805351 35.7308220788862
19326 400 -82.2291619854926 35.4282693167467
19637 327 -80.3277751397672 35.2352482668688
20844 602 -80.9477686635127 36.2305258862715
31723 396 -77.1090679951322 36.227395626322
31734 390 -76