### Imports (compatible for Python 2 and Python 3)

In [15]:
#import necassary packages#import 
try:
    import urllib2 as ulib
except ImportError:
    import urllib3 as ulib

try:
    from StringIO import BytesIO as io
except ImportError:
    from io import BytesIO as io

try:
    from urllib import urlencode as urlencode
except ImportError:
    from urllib.parse import urlencode as urlencode
    
try:
    from urllib import urlopen as urlopen
except ImportError:
    from urllib.request import urlopen as urlopen
    
try:
    from urllib2 import HTTPError as HTTPError
except ImportError:
    from urllib.error import HTTPError as HTTPError

import ast
import pandas as pd

In [20]:
API_URL = 'https://fb55.carto.com:443/api/v2/sql?q='

# functions from lab notes for querying data

def queryCartoDB(query, formatting = 'CSV', source = API_URL):
    '''queries carto datasets from a given carto account
    Arguments: 
    query - string: a valid sql query string
    format - outlut format  OPTIONAL (default CSV)
    source - a valid sql api endpoint OPTIONAL (default carto fb55 account)
    Returns:
    the return of the sql query AS A STRING
    NOTES:
    designed for the carto API, tested only with CSV return format'''
    
    
    data = urlencode({'format': formatting, 'q': query}).encode("utf-8")
    try:
        response = urlopen(source, data)
        return response.read()
    except HTTPError as e:
        raise (ValueError('\n'.join(ast.literal_eval(e.readline())['error'])))
        
def get_data(query):
    try:
        return pd.read_csv(io(queryCartoDB(query)), sep = ',')
    except ValueError as v:
        print (str(v))

In [16]:
test = '''
SELECT *
FROM citibike_short
'''

In [18]:
query = '''SELECT * FROM fb55.citibike; '''
table = get_data(query)
table.head(5)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,E 20 St & Park Ave,,175,-73.98752,1090,2015-02-01 01:23:00+00,2015-02-01 01:42:00+00,503,107,40.738274,229,Great Jones St,40.727434,-73.99379,19718,Subscriber,1961.0,1
1,,W 43 St & 10 Ave,,1159,-73.994618,682,2015-02-01 10:55:00+00,2015-02-01 11:07:00+00,515,1088,40.760094,490,8 Ave & W 33 St,40.751551,-73.993934,21501,Subscriber,1981.0,1
2,,E 6 St & Avenue B,,2827,-73.981854,751,2015-02-01 13:59:00+00,2015-02-01 14:11:00+00,317,2759,40.724537,466,W 25 St & 6 Ave,40.743954,-73.991449,14788,Subscriber,1990.0,1
3,,E 12 St & 3 Ave,,4961,-73.9889,272,2015-02-01 17:28:00+00,2015-02-01 17:32:00+00,483,4893,40.732233,345,W 13 St & 6 Ave,40.736494,-73.997044,16219,Subscriber,1961.0,1
4,,W 41 St & 8 Ave,,6156,-73.990026,240,2015-02-01 21:36:00+00,2015-02-01 21:40:00+00,477,6090,40.756405,490,8 Ave & W 33 St,40.751551,-73.993934,18266,Customer,,0


### Task 1.1
Sort data by start_station_id, tripduration
- Only checking trips with duration <= 3 hours 

In [23]:
# sort by start_station_id
query = '''
SELECT * FROM fb55.citibike
WHERE tripduration <= 10800
ORDER BY start_station_id DESC
limit 10
'''
table = get_data(query)
table.head(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,South End Ave & Liberty St,,1932,-74.015756,689,2015-02-01 12:25:00+00,2015-02-01 12:36:00+00,3002,1863,40.711512,358,Christopher St & Greenwich St,40.732916,-74.007114,19309,Subscriber,1996.0,1
1,,South End Ave & Liberty St,,2094,-74.015756,307,2015-02-01 12:42:00+00,2015-02-01 12:47:00+00,3002,2026,40.711512,147,Greenwich St & Warren St,40.715422,-74.01122,18386,Subscriber,1984.0,2
2,,South End Ave & Liberty St,,1698,-74.015756,745,2015-02-01 11:57:00+00,2015-02-01 12:09:00+00,3002,1628,40.711512,369,Washington Pl & 6 Ave,40.732241,-74.000264,20901,Subscriber,1980.0,1
3,,South End Ave & Liberty St,,1657,-74.015756,858,2015-02-01 11:53:00+00,2015-02-01 12:07:00+00,3002,1587,40.711512,355,Bayard St & Baxter St,40.716021,-73.999744,15748,Subscriber,1959.0,1
4,,South End Ave & Liberty St,,1402,-74.015756,1243,2015-02-01 11:27:00+00,2015-02-01 11:48:00+00,3002,1333,40.711512,360,William St & Pine St,40.707179,-74.008873,18288,Customer,,0
5,,South End Ave & Liberty St,,1542,-74.015756,683,2015-02-01 11:43:00+00,2015-02-01 11:55:00+00,3002,1472,40.711512,308,St James Pl & Oliver St,40.713079,-73.998512,16987,Subscriber,1980.0,2
6,,South End Ave & Liberty St,,361,-74.015756,301,2015-02-01 07:49:00+00,2015-02-01 07:54:00+00,3002,293,40.711512,417,Barclay St & Church St,40.712912,-74.010202,19484,Subscriber,1980.0,2
7,,South End Ave & Liberty St,,1297,-74.015756,1900,2015-02-01 11:14:00+00,2015-02-01 11:45:00+00,3002,1228,40.711512,525,W 34 St & 11 Ave,40.755942,-74.002116,17849,Customer,,0
8,,South End Ave & Liberty St,,1576,-74.015756,7225,2015-02-01 11:46:00+00,2015-02-01 13:47:00+00,3002,1506,40.711512,499,Broadway & W 60 St,40.769155,-73.981918,21202,Customer,,0
9,,South End Ave & Liberty St,,2271,-74.015756,790,2015-02-01 13:01:00+00,2015-02-01 13:14:00+00,3002,2205,40.711512,368,Carmine St & 6 Ave,40.730386,-74.00215,17004,Subscriber,1979.0,1


In [29]:
# sort by tripduration
query = '''
SELECT * FROM fb55.citibike
where tripduration <= 10800
order by tripduration desc
LIMIT 10;
'''
table = get_data(query)
table.head(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,Greenwich Ave & 8 Ave,,41403,-74.002638,10757,2015-02-07 12:17:00+00,2015-02-07 15:16:00+00,284,41392,40.739017,173,Broadway & W 49 St,40.760647,-73.984427,15082,Subscriber,1973,1
1,,W 29 St & 9 Ave,,10975,-73.998393,10660,2015-02-03 17:42:00+00,2015-02-03 20:40:00+00,512,10912,40.750073,435,W 21 St & 6 Ave,40.74174,-73.994156,16099,Subscriber,1950,1
2,,Washington St & Gansevoort St,,40237,-74.008119,10174,2015-02-07 09:31:00+00,2015-02-07 12:20:00+00,405,40221,40.739323,405,Washington St & Gansevoort St,40.739323,-74.008119,21323,Subscriber,1983,1
3,,Centre St & Worth St,,7538,-74.002345,9969,2015-02-02 18:22:00+00,2015-02-02 21:08:00+00,386,7472,40.714948,264,Maiden Ln & Pearl St,40.707065,-74.007319,19921,Subscriber,1987,2
4,,Broadway & W 55 St,,9756,-73.981923,9938,2015-02-03 14:10:00+00,2015-02-03 16:55:00+00,468,9696,40.765265,2023,E 55 St & Lexington Ave,40.759681,-73.970314,19070,Subscriber,1970,1
5,,W 21 St & 6 Ave,,15380,-73.994156,9807,2015-02-04 10:53:00+00,2015-02-04 13:36:00+00,435,15336,40.74174,435,W 21 St & 6 Ave,40.74174,-73.994156,18088,Subscriber,1985,1
6,,Broadway & W 58 St,,25241,-73.981693,9711,2015-02-05 11:12:00+00,2015-02-05 13:54:00+00,457,25203,40.766953,2021,W 45 St & 8 Ave,40.759291,-73.988597,14726,Subscriber,1962,1
7,,Broadway & E 22 St,,19952,-73.989551,9572,2015-02-04 19:09:00+00,2015-02-04 21:48:00+00,402,19897,40.740343,517,Pershing Square South,40.751581,-73.97791,14691,Subscriber,1966,2
8,,Broadway & W 60 St,,7139,-73.981918,9556,2015-02-02 13:25:00+00,2015-02-02 16:05:00+00,499,7073,40.769155,499,Broadway & W 60 St,40.769155,-73.981918,14802,Subscriber,1940,1
9,,DeKalb Ave & Hudson Ave,,11871,-73.981013,9373,2015-02-03 19:25:00+00,2015-02-03 22:01:00+00,324,11810,40.689888,324,DeKalb Ave & Hudson Ave,40.689888,-73.981013,15018,Subscriber,1968,1


In [26]:
#sort by both
query = '''
SELECT * FROM fb55.citibike
ORDER BY start_station_id ASC, tripduration DESC
LIMIT 10;
'''
table = get_data(query)
table.head(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,W 52 St & 11 Ave,,9115,-73.993929,2099,2015-02-03 10:28:00+00,2015-02-03 11:02:00+00,72,9054,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,18653,Subscriber,1966.0,2
1,,W 52 St & 11 Ave,,2051,-73.993929,1944,2015-02-01 12:39:00+00,2015-02-01 13:12:00+00,72,1983,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,17849,Customer,,0
2,,W 52 St & 11 Ave,,40355,-73.993929,1914,2015-02-07 09:49:00+00,2015-02-07 10:21:00+00,72,40341,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,21520,Subscriber,1966.0,2
3,,W 52 St & 11 Ave,,12996,-73.993929,1801,2015-02-04 06:32:00+00,2015-02-04 07:02:00+00,72,12938,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,15161,Subscriber,1959.0,2
4,,W 52 St & 11 Ave,,11359,-73.993929,1678,2015-02-03 18:21:00+00,2015-02-03 18:49:00+00,72,11298,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21500,Subscriber,1964.0,1
5,,W 52 St & 11 Ave,,13695,-73.993929,1537,2015-02-04 08:07:00+00,2015-02-04 08:33:00+00,72,13647,40.767272,487,E 20 St & FDR Drive,40.733143,-73.975739,18900,Subscriber,1977.0,2
6,,W 52 St & 11 Ave,,9802,-73.993929,1535,2015-02-03 14:23:00+00,2015-02-03 14:48:00+00,72,9742,40.767272,334,W 20 St & 7 Ave,40.742388,-73.997262,15117,Subscriber,1956.0,2
7,,W 52 St & 11 Ave,,11301,-73.993929,1518,2015-02-03 18:16:00+00,2015-02-03 18:42:00+00,72,11240,40.767272,476,E 31 St & 3 Ave,40.743943,-73.979661,19039,Subscriber,1971.0,1
8,,W 52 St & 11 Ave,,20364,-73.993929,1493,2015-02-04 19:47:00+00,2015-02-04 20:12:00+00,72,20309,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21080,Subscriber,1964.0,1
9,,W 52 St & 11 Ave,,24990,-73.993929,1388,2015-02-05 10:34:00+00,2015-02-05 10:58:00+00,72,24953,40.767272,127,Barrow St & Hudson St,40.731724,-74.006744,19672,Subscriber,1974.0,1


### Task 1.2
Only show the top/last 10 records (aka head and tail in SQL)

In [30]:
#only show last 10
query = '''
SELECT * FROM fb55.citibike
ORDER BY start_station_id ASC, tripduration DESC
LIMIT 10;
'''
table = get_data(query)
table.tail(10)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,W 52 St & 11 Ave,,9115,-73.993929,2099,2015-02-03 10:28:00+00,2015-02-03 11:02:00+00,72,9054,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,18653,Subscriber,1966.0,2
1,,W 52 St & 11 Ave,,2051,-73.993929,1944,2015-02-01 12:39:00+00,2015-02-01 13:12:00+00,72,1983,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,17849,Customer,,0
2,,W 52 St & 11 Ave,,40355,-73.993929,1914,2015-02-07 09:49:00+00,2015-02-07 10:21:00+00,72,40341,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,21520,Subscriber,1966.0,2
3,,W 52 St & 11 Ave,,12996,-73.993929,1801,2015-02-04 06:32:00+00,2015-02-04 07:02:00+00,72,12938,40.767272,328,Watts St & Greenwich St,40.724055,-74.00966,15161,Subscriber,1959.0,2
4,,W 52 St & 11 Ave,,11359,-73.993929,1678,2015-02-03 18:21:00+00,2015-02-03 18:49:00+00,72,11298,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21500,Subscriber,1964.0,1
5,,W 52 St & 11 Ave,,13695,-73.993929,1537,2015-02-04 08:07:00+00,2015-02-04 08:33:00+00,72,13647,40.767272,487,E 20 St & FDR Drive,40.733143,-73.975739,18900,Subscriber,1977.0,2
6,,W 52 St & 11 Ave,,9802,-73.993929,1535,2015-02-03 14:23:00+00,2015-02-03 14:48:00+00,72,9742,40.767272,334,W 20 St & 7 Ave,40.742388,-73.997262,15117,Subscriber,1956.0,2
7,,W 52 St & 11 Ave,,11301,-73.993929,1518,2015-02-03 18:16:00+00,2015-02-03 18:42:00+00,72,11240,40.767272,476,E 31 St & 3 Ave,40.743943,-73.979661,19039,Subscriber,1971.0,1
8,,W 52 St & 11 Ave,,20364,-73.993929,1493,2015-02-04 19:47:00+00,2015-02-04 20:12:00+00,72,20309,40.767272,79,Franklin St & W Broadway,40.719116,-74.006667,21080,Subscriber,1964.0,1
9,,W 52 St & 11 Ave,,24990,-73.993929,1388,2015-02-05 10:34:00+00,2015-02-05 10:58:00+00,72,24953,40.767272,127,Barrow St & Hudson St,40.731724,-74.006744,19672,Subscriber,1974.0,1


### Task 1.3
List all unique start_station_id values 

In [56]:
query = '''
SELECT DISTINCT start_station_id FROM fb55.citibike
ORDER BY start_station_id
'''
table = get_data(query)
table.head(5)

Unnamed: 0,start_station_id,Unnamed: 1
0,72,
1,79,
2,82,
3,83,
4,116,


### Task 1.4
Aggregation functions:
- Count the number of trips (aka wc -l in SQL)
- Find the average/min/max trip duration

In [32]:
query = '''
SELECT count(start_station_id) as trip_count,
AVG(tripduration),
MIN(tripduration),
MAX(tripduration)
FROM fb55.citibike
'''
table = get_data(query)
table.head()

Unnamed: 0,trip_count,avg,min,max
0,46200,675.865823,60,43016


### Task 2.1
Selecting trips started on Feb-02-2015 only

In [34]:
query = '''
SELECT * FROM fb55.citibike
WHERE starttime >= '2015-02-02 00:00' and starttime < '2015-02-03 00:00'
'''
table = get_data(query)
table.head(5)

Unnamed: 0,the_geom,start_station_name,the_geom_webmercator,field_1,start_station_longitude,tripduration,starttime,stoptime,start_station_id,cartodb_id,start_station_latitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender
0,,5 Ave & E 29 St,,7433,-73.986831,376,2015-02-02 17:44:00+00,2015-02-02 17:50:00+00,474,7367,40.745168,537,Lexington Ave & E 24 St,40.740259,-73.984092,18048,Subscriber,1977.0,1
1,,Allen St & Hester St,,7779,-73.991908,1217,2015-02-02 20:12:00+00,2015-02-02 20:32:00+00,361,7712,40.716059,461,E 20 St & 2 Ave,40.735877,-73.98205,19732,Customer,,0
2,,E 5 St & Avenue C,,11,-73.979955,1312,2015-02-02 11:07:00+00,2015-02-02 11:29:00+00,393,6920,40.722992,476,E 31 St & 3 Ave,40.743943,-73.979661,17720,Subscriber,1955.0,1
3,,9 Ave & W 16 St,,627,-74.004432,444,2015-02-02 00:40:00+00,2015-02-02 00:48:00+00,463,6477,40.742065,489,10 Ave & W 28 St,40.750664,-74.001768,20233,Subscriber,1971.0,1
4,,9 Ave & W 16 St,,991,-74.004432,438,2015-02-02 00:40:00+00,2015-02-02 00:47:00+00,463,6478,40.742065,489,10 Ave & W 28 St,40.750664,-74.001768,20264,Subscriber,1981.0,2


### Task 2.2
Selecting trips started on the weekends
- What are average trip duration during weekends? 

In [37]:
query = '''
SELECT avg(tripduration) as weekend_avg FROM fb55.citibike
WHERE extract(dow from starttime) in (0,6)
'''
table = get_data(query)
table.head()

Unnamed: 0,weekend_avg,Unnamed: 1
0,662.942181,


### Task 2.3
Can we do the same for weekday?

In [38]:
query = '''
SELECT avg(tripduration) as weekdat_avg FROM fb55.citibike
WHERE extract(dow from starttime) in (1,2,3,4,5)
'''
table = get_data(query)
table.head()

Unnamed: 0,weekdat_avg,Unnamed: 1
0,681.052292,


### Task 3.1
Showing the list of start station locations
- Using GROUP BY

In [45]:
query = '''
SELECT start_station_id, start_station_latitude, start_station_longitude FROM fb55.citibike
GROUP BY start_station_id, start_station_latitude, start_station_longitude
'''
table = get_data(query)
table.head()

Unnamed: 0,start_station_id,start_station_latitude,start_station_longitude
0,349,40.718502,-73.983299
1,470,40.743453,-74.00004
2,485,40.75038,-73.98339
3,335,40.729039,-73.994046
4,267,40.750977,-73.987654


### Task 3.2
Showing the number of trips started per station 

In [48]:
query = '''
SELECT count(start_station_id),
start_station_name
FROM fb55.citibike
GROUP BY start_station_id, start_station_name
'''
table = get_data(query)
table.head()

Unnamed: 0,count,start_station_name
0,123,Lispenard St & Broadway
1,218,W 22 St & 8 Ave
2,108,Madison St & Montgomery St
3,159,Centre St & Chambers St
4,13,Lafayette Ave & Classon Ave


### Task 3.3
 but only for stations within 500m of Time Square!
- The coordinates of Time Square is (40.7577,-73.9857)

In [49]:
query = '''
SELECT count(start_station_id),
start_station_name as start_station_name
FROM fb55.citibike
WHERE st_dwithin(CDB_LatLng(start_station_latitude, start_station_longitude)::geography, CDB_LatLng(40.7577, -73.9857)::geography, 500)
GROUP BY start_station_id, start_station_name
'''
table = get_data(query)
table.head()

Unnamed: 0,count,start_station_name
0,213,Broadway & W 49 St
1,251,Broadway & W 41 St
2,507,W 41 St & 8 Ave
3,141,W 45 St & 6 Ave
4,112,W 43 St & 6 Ave


### Task 4.1
Find the station that had the longest average trip duration during
weekends and within 500m of TimeSquare! 

In [58]:
query = '''
SELECT avg(tripduration) as average_duration, start_station_name FROM fb55.citibike
WHERE extract(dow from starttime) in (0,6) AND st_dwithin(CDB_LatLng(start_station_latitude, start_station_longitude)::geography, CDB_LatLng(40.7577, -73.9857)::geography, 500)
GROUP BY start_station_name, start_station_latitude, start_station_longitude
ORDER BY average_duration DESC
'''
table = get_data(query)
table.head(1)

Unnamed: 0,average_duration,start_station_name
0,1010.104167,Broadway & W 49 St


### Task 4.2
Extra: create lines for trips started from stations within 500m of Times
Squares and lasted less than 2 hours. The number of trips per each
pair of stations are output as attributes of these lines.