### Import libraries

In [7]:
import prestodb  
import datetime
import pandas as pd

def makeconn(hstb,uname,pswd):
    return prestodb.dbapi.connect(
    host=hstb,
    port=80,
    user=uname.strip(),
    catalog='geospock',
    schema='default',
    http_scheme='http'
    )
    
def runquery(cnn,cls,querytext):
    sttime = datetime.datetime.now()
    print("Starting query at: "+str(sttime)+".....")
    cur = cnn.cursor()
    cur.execute(querytext)
    rows = cur.fetchall()
    cur.close()
    endtime = datetime.datetime.now()
    print("Finished query at: "+str(endtime)+".")
    tdelt=(endtime-sttime).seconds
    print("Query duration: "+str(tdelt)+" seconds.")
    print("Rows returned: "+str(len(rows)))
    return pd.DataFrame(rows,columns=cls)

### Create connection object

In [3]:
# Remember to change GeoSpockDBDiscoveryHostname for its respective value
# e.g disco-Prest-26Z2LD7S11IPM-2177915702.ap-southeast-1.elb.amazonaws.com
# username and password are to be left with these default values
host           = """GeoSpockDBDiscoveryHostname""" 
username       = 'username'
password       = ''
#this command returns the connection object but there's no visible printed output
newconn        = makeconn(host, username, password)

### Explore datasets

In [8]:
QUERY_tables = """
SHOW TABLES 
FROM geospock.default
"""
cls     = ['tables']
tables = runquery(newconn,cls,QUERY_tables)
tables

Starting query at: 2020-10-10 15:09:37.007294.....
Finished query at: 2020-10-10 15:09:58.034397.
Query duration: 21 seconds.
Rows returned: 17


Unnamed: 0,tables
0,sg_air_temperature
1,sg_erp1_camera_location
2,sg_erp2_synthesis
3,sg_planning_area_census
4,sg_pm25
5,sg_psi
6,sg_rainfall
7,sg_region_census
8,sg_relative_humidity
9,sg_road_segments


In [10]:
QUERY_describe = """
DESCRIBE geospock.default.sg_erp2_synthesis
"""
cls           = ['column', 'type', 'extra', 'comment']
describe_erp2 = runquery(newconn,cls,QUERY_describe)
describe_erp2

Starting query at: 2020-10-10 15:26:51.920573.....
Finished query at: 2020-10-10 15:26:59.789220.
Query duration: 7 seconds.
Rows returned: 5


Unnamed: 0,column,type,extra,comment
0,longitude,double,,LONGITUDE index
1,latitude,double,,LATITUDE index
2,erp_vehicle_id,varchar,,Nullable
3,timestamp,timestamp,,TIME index
4,erp_vehicle_types,varchar,,Nullable


In [12]:
QUERY_tables = """
SELECT *
FROM geospock.default.sg_erp2_synthesis
LIMIT 10
"""
cls         = ['longitude', 'latitiude', 'vehicle_id', 'timestamp', 'vehicle_type']
sample_erp2 = runquery(newconn,cls,QUERY_tables)
sample_erp2

Starting query at: 2020-10-10 15:28:18.792122.....
Finished query at: 2020-10-10 15:28:25.510147.
Query duration: 6 seconds.
Rows returned: 10


Unnamed: 0,longitude,latitiude,vehicle_id,timestamp,vehicle_type
0,103.780255,1.362286,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:40.085,HGV
1,103.780071,1.362477,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:41.085,HGV
2,103.780042,1.36266,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:42.085,HGV
3,103.779983,1.362844,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:43.085,HGV
4,103.779893,1.363053,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:44.085,HGV
5,103.779944,1.363245,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:45.085,HGV
6,103.779783,1.363428,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:46.085,HGV
7,103.779793,1.363646,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:47.085,HGV
8,103.779806,1.363845,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:48.085,HGV
9,103.779731,1.364045,1001144435f236ea7f82ff55c569760,2019-01-05 21:56:49.085,HGV
