# SQL CARTO QUERY FUNCTION
# written for PUI2016_Python2

##### This notebook is set up to link as a default to the fb55 account.  To turn in the homework use the same account you used in the lab (hvt201)  and query the database that you were querying in class (citibike_feb_2015)

In [2]:
try:
    import urllib2 as ulib
except ImportError:
    import urllib3 as ulib

try:
    from StringIO import BytesIO as io
except ImportError:
    from io import BytesIO as io

try:
    from urllib import urlencode as urlencode
except ImportError:
    from urllib.parse import urlencode as urlencode
    
try:
    from urllib import urlopen as urlopen
except ImportError:
    from urllib.request import urlopen as urlopen
    
try:
    from urllib2 import HTTPError as HTTPError
except ImportError:
    from urllib.error import HTTPError as HTTPError

import ast
import pandas as pd
import geocoder

In [3]:
API_URL = 'https://fb55.carto.com:443/api/v2/sql?q='

def queryCartoDB(query, formatting = 'CSV', source = API_URL):
    '''queries carto datasets from a given carto account
    Arguments: 
    query - string: a valid sql query string
    format - outlut format  OPTIONAL (default CSV)
    source - a valid sql api endpoint OPTIONAL (default carto fb55 account)
    Returns:
    the return of the sql query AS A STRING
    NOTES:
    designed for the carto API, tested only with CSV return format'''
    
    
    data = urlencode({'format': formatting, 'q': query}).encode("utf-8")
    try:
        response = urlopen(source, data)
        return response.read()
    except HTTPError as e:
        raise (ValueError('\n'.join(ast.literal_eval(e.readline())['error'])))
        
def get_data(query):
    try:
        return pd.read_csv(io(queryCartoDB(query)), sep = ',')
    except ValueError as v:
        print (str(v))

## Task 1

In [4]:
## sort data by start_station_id, tripduration (only trips w/ duration <= 3 hours)

query = '''
SELECT *
FROM citibike
WHERE tripduration <= (60*60*3)
ORDER BY start_station_id, tripduration desc;
'''
table = get_data(query)
table.head(5)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,W 52 St & 11 Ave,,9115,-73.993929,2099,2015-02-03 10:28:00+00,2015-02-03 11:02:00+00,72,9054,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,18653,Subscriber,1966.0,2
1,,W 52 St & 11 Ave,,2051,-73.993929,1944,2015-02-01 12:39:00+00,2015-02-01 13:12:00+00,72,1983,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,17849,Customer,,0
2,,W 52 St & 11 Ave,,40355,-73.993929,1914,2015-02-07 09:49:00+00,2015-02-07 10:21:00+00,72,40341,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,21520,Subscriber,1966.0,2
3,,W 52 St & 11 Ave,,12996,-73.993929,1801,2015-02-04 06:32:00+00,2015-02-04 07:02:00+00,72,12938,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,15161,Subscriber,1959.0,2
4,,W 52 St & 11 Ave,,11359,-73.993929,1678,2015-02-03 18:21:00+00,2015-02-03 18:49:00+00,72,11298,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21500,Subscriber,1964.0,1


In [5]:
## only show the top/last 10 records

query = '''
SELECT *
FROM citibike
WHERE tripduration <= (60*60*3)
ORDER BY start_station_id, tripduration desc;
'''
table = get_data(query)

In [6]:
table.head(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,W 52 St & 11 Ave,,9115,-73.993929,2099,2015-02-03 10:28:00+00,2015-02-03 11:02:00+00,72,9054,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,18653,Subscriber,1966.0,2
1,,W 52 St & 11 Ave,,2051,-73.993929,1944,2015-02-01 12:39:00+00,2015-02-01 13:12:00+00,72,1983,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,17849,Customer,,0
2,,W 52 St & 11 Ave,,40355,-73.993929,1914,2015-02-07 09:49:00+00,2015-02-07 10:21:00+00,72,40341,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,21520,Subscriber,1966.0,2
3,,W 52 St & 11 Ave,,12996,-73.993929,1801,2015-02-04 06:32:00+00,2015-02-04 07:02:00+00,72,12938,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,15161,Subscriber,1959.0,2
4,,W 52 St & 11 Ave,,11359,-73.993929,1678,2015-02-03 18:21:00+00,2015-02-03 18:49:00+00,72,11298,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21500,Subscriber,1964.0,1
5,,W 52 St & 11 Ave,,13695,-73.993929,1537,2015-02-04 08:07:00+00,2015-02-04 08:33:00+00,72,13647,40.767272,487,E 20 St & FDR Drive,40.733143,-73.975739,18900,Subscriber,1977.0,2
6,,W 52 St & 11 Ave,,9802,-73.993929,1535,2015-02-03 14:23:00+00,2015-02-03 14:48:00+00,72,9742,40.767272,334,W 20 St & 7 Ave,40.742388,-73.997262,15117,Subscriber,1956.0,2
7,,W 52 St & 11 Ave,,11301,-73.993929,1518,2015-02-03 18:16:00+00,2015-02-03 18:42:00+00,72,11240,40.767272,476,E 31 St & 3 Ave,40.743943,-73.979661,19039,Subscriber,1971.0,1
8,,W 52 St & 11 Ave,,20364,-73.993929,1493,2015-02-04 19:47:00+00,2015-02-04 20:12:00+00,72,20309,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21080,Subscriber,1964.0,1
9,,W 52 St & 11 Ave,,24990,-73.993929,1388,2015-02-05 10:34:00+00,2015-02-05 10:58:00+00,72,24953,40.767272,127,Barrow St & Hudson St,40.731724,-74.006744,19672,Subscriber,1974.0,1


In [7]:
table.tail(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
46141,,South End Ave & Liberty St,,44847,-74.015756,162,2015-02-07 18:55:00+00,2015-02-07 18:57:00+00,3002,44843,40.711512,363,West Thames St,40.708347,-74.017134,17658,Subscriber,1981.0,2
46142,,South End Ave & Liberty St,,39910,-74.015756,151,2015-02-07 08:20:00+00,2015-02-07 08:23:00+00,3002,39892,40.711512,327,Vesey Pl & River Terrace,40.715338,-74.016584,15008,Subscriber,1961.0,1
46143,,South End Ave & Liberty St,,32655,-74.015756,150,2015-02-06 09:02:00+00,2015-02-06 09:05:00+00,3002,32618,40.711512,327,Vesey Pl & River Terrace,40.715338,-74.016584,20131,Subscriber,1978.0,2
46144,,South End Ave & Liberty St,,21924,-74.015756,134,2015-02-05 05:58:00+00,2015-02-05 06:00:00+00,3002,21869,40.711512,363,West Thames St,40.708347,-74.017134,20547,Subscriber,1974.0,1
46145,,South End Ave & Liberty St,,28336,-74.015756,132,2015-02-05 17:37:00+00,2015-02-05 17:39:00+00,3002,28292,40.711512,363,West Thames St,40.708347,-74.017134,20663,Subscriber,1968.0,2
46146,,South End Ave & Liberty St,,44134,-74.015756,118,2015-02-07 17:25:00+00,2015-02-07 17:27:00+00,3002,44128,40.711512,363,West Thames St,40.708347,-74.017134,17272,Subscriber,1982.0,1
46147,,South End Ave & Liberty St,,41351,-74.015756,108,2015-02-07 12:07:00+00,2015-02-07 12:09:00+00,3002,41340,40.711512,363,West Thames St,40.708347,-74.017134,18470,Subscriber,1979.0,1
46148,,South End Ave & Liberty St,,8917,-74.015756,99,2015-02-03 09:37:00+00,2015-02-03 09:39:00+00,3002,8857,40.711512,3002,South End Ave & Liberty St,40.711512,-74.015756,18107,Subscriber,1981.0,1
46149,,South End Ave & Liberty St,,13826,-74.015756,85,2015-02-04 08:16:00+00,2015-02-04 08:18:00+00,3002,13778,40.711512,3002,South End Ave & Liberty St,40.711512,-74.015756,16585,Subscriber,1967.0,1
46150,,South End Ave & Liberty St,,27401,-74.015756,79,2015-02-05 16:29:00+00,2015-02-05 16:30:00+00,3002,27360,40.711512,3002,South End Ave & Liberty St,40.711512,-74.015756,17061,Subscriber,1954.0,1


In [8]:
## List all unique start_station_id values

query = '''
SELECT DISTINCT start_station_id
FROM citibike
'''
table = get_data(query)
table.head(5)

Unnamed: 0,start_station_id,Unnamed: 1
0,120,
1,285,
2,251,
3,195,
4,453,


In [9]:
## Count the number of trips

query = '''
SELECT COUNT(cartodb_id)
FROM citibike
'''
table = get_data(query)
table.head(5)

Unnamed: 0,count,Unnamed: 1
0,46200,


In [10]:
## Find the average/min/max trip duration

query = '''
SELECT AVG(tripduration), MIN(tripduration), MAX(tripduration)
FROM citibike
'''
table = get_data(query)
table.head(5)

Unnamed: 0,avg,min,max
0,675.865823,60,43016


## TASK 2

In [11]:
## Select trips started on Feb-2-2015 only
## Note: Feb 2 should be '2015-01-02', but that doesn't return any trips

query = '''
SELECT cartodb_id
FROM citibike
WHERE CAST(starttime AS DATE) = '2015-02-01'
'''
table = get_data(query)
table.head(5)

Unnamed: 0,cartodb_id,Unnamed: 1
0,107,
1,1088,
2,2759,
3,4893,
4,6090,


In [12]:
## Select trips starting on the weekends

query = '''
SELECT cartodb_id
FROM citibike
WHERE EXTRACT(DOW FROM starttime) in (6,0)
'''
table = get_data(query)
table.head(5)

Unnamed: 0,cartodb_id,Unnamed: 1
0,107,
1,1088,
2,2759,
3,4893,
4,6090,


In [13]:
## What is the average trip duration during the weekend?

query = '''
SELECT AVG(tripduration)
FROM citibike
WHERE EXTRACT(DOW FROM starttime) in (6,0)
'''
table = get_data(query)
table.head(5)

Unnamed: 0,avg,Unnamed: 1
0,662.942181,


In [14]:
## Can we do the same for weekday?

query = '''
SELECT AVG(tripduration)
FROM citibike
WHERE EXTRACT(DOW FROM starttime) not in (6,0)
'''
table = get_data(query)
table.head(5)

Unnamed: 0,avg,Unnamed: 1
0,681.052292,


## Task 3

In [27]:
## Show the list of start station locations using GROUP BY

query = '''
SELECT DISTINCT start_station_name, start_station_latitude, start_station_longitude
FROM citibike
ORDER BY start_station_latitude DESC
'''
table = get_data(query)
table.head(5)

Unnamed: 0,start_station_name,start_station_latitude,start_station_longitude
0,11 Ave & W 59 St,40.771522,-73.990541
1,W 59 St & 10 Ave,40.770513,-73.988038
2,Broadway & W 60 St,40.769155,-73.981918
3,W 56 St & 10 Ave,40.768254,-73.988639
4,W 52 St & 11 Ave,40.767272,-73.993929


## Task 4

In [28]:
## Show the list of start station locations using GROUP BY

query = '''
SELECT DISTINCT start_station_name, start_station_latitude, start_station_longitude
FROM citibike
'''
table = get_data(query)
table.head(5)

Unnamed: 0,start_station_name,start_station_latitude,start_station_longitude
0,Broadway & W 53 St,40.763441,-73.982681
1,W 13 St & 6 Ave,40.736494,-73.997044
2,Cleveland Pl & Spring St,40.721816,-73.997203
3,Hudson St & Reade St,40.71625,-74.009106
4,Henry St & Poplar St,40.700469,-73.991454


In [None]:
g = geocoder.google([table['start_station_latitude'].astype(float), table['start_station_longitude'].astype(float)], method='reverse')

# extra credit: make the function python 2 and 3 compatible so that it works on the  PUI2016_Python3 kernel