In [1]:
import pandas as pd
import os
import json
from shapely.geometry import Point
import geopandas as gp
import numpy as np
import urllib
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:
url = "https://feeds.citibikenyc.com/stations/stations.json"
response = urllib.urlopen(url)
stations = json.loads(response.read())
    
# https://feeds.citibikenyc.com/stations/stations.json

In [3]:
st = map(lambda x: (x['id'],x['latitude'], x['longitude'], x['stAddress1']),stations['stationBeanList'])

In [4]:
cb_stations = pd.DataFrame(st, columns=['id', 'latitude', 'longitude', 'station_address'])

In [5]:
cb_stations.to_csv('../data/citibike_stations.csv')

In [6]:
cb = pd.read_csv("../data/jersey_city/JC-201609-citibike-tripdata.csv")

In [7]:
cb.columns

Index([u'Trip Duration', u'Start Time', u'Stop Time', u'Start Station ID',
       u'Start Station Name', u'Start Station Latitude',
       u'Start Station Longitude', u'End Station ID', u'End Station Name',
       u'End Station Latitude', u'End Station Longitude', u'Bike ID',
       u'User Type', u'Birth Year', u'Gender'],
      dtype='object')

In [8]:
cb_jc = gp.read_file("../data/jersey_city/jc_stations/jc_cb_stations.shp")

In [9]:
cb_jc.head()

Unnamed: 0,geometry,id,latitude,longitude,station_ad
0,POINT (-74.03345880000001 40.7162469),3183,40.716247,-74.033459,Exchange Place
1,POINT (-74.03355190000001 40.7141454),3184,40.714145,-74.033552,Paulus Hook
2,POINT (-74.043845 40.7177325),3185,40.717732,-74.043845,City Hall
3,POINT (-74.0431174636 40.7195861165),3186,40.719586,-74.043117,Grove St PATH
4,POINT (-74.03805095 40.7211236),3187,40.721124,-74.038051,Warren St


In [10]:
cb_jc_blocks = gp.read_file("../data/jersey_city/jc_census_blocks/jc_census_blocks.shp")

In [11]:
cb_jc_blocks.head(2)

Unnamed: 0,ALAND10,AWATER10,BLOCKCE10,COUNTYFP10,FUNCSTAT10,GEOID10,HH100_RD,INTPTLAT10,INTPTLON10,MTFCC10,NAME10,POP100_RD,SHAPE_AREA,SHAPE_LEN,STATEFP10,TRACTCE10,UACE10,UR10,geometry
0,121029.0,0.0,4004,17,S,340170061004004,287,40.6960651,-74.1012406,G5040,Block 4004,813,1302517.0,6098.972938,34,6100,,,"POLYGON ((603128.8201636747 678343.7580376044,..."
1,28649.0,0.0,1008,17,S,340170001001008,65,40.7547592,-74.0553121,G5040,Block 1008,195,308317.8,2703.468388,34,100,,,"POLYGON ((615682.0929573439 700587.5409777761,..."


In [61]:
jc_blocks = cb_jc_blocks.loc[:,['BLOCKCE10', 'geometry']]

In [14]:
cb_picks = cb.loc[:,['Start Time', 'Start Station ID']]
cb_drops = cb.loc[:,['Stop Time', 'End Station ID']]

In [15]:
cb_picks['Start Time'] = pd.to_datetime(cb_picks['Start Time'])
cb_drops['Stop Time'] = pd.to_datetime(cb_drops['Stop Time'])

In [21]:
cb_grp_picks = cb_picks.set_index(['Start Time']).groupby([pd.TimeGrouper('D'), 
                                                               'Start Station ID']).agg({'Start Station ID':'count'})

In [25]:
cb_grp_drops = cb_drops.set_index(['Stop Time']).groupby([pd.TimeGrouper('D'), 
                                                               'End Station ID']).agg({'End Station ID':'count'})

In [30]:
cb_grp_picks.columns = ['pickups']
cb_grp_picks.reset_index(['Start Time', 'Start Station ID'], inplace=True)
cb_grp_picks.columns = ['Date', 'StationID', 'pickups']

cb_grp_drops.columns = ['drops']
cb_grp_drops.reset_index(['Stop Time', 'End Station ID'], inplace=True)
cb_grp_drops.columns = ['Date', 'StationID', 'drops']

In [35]:
citibike_all = pd.merge(cb_grp_picks, cb_grp_drops, on=['StationID', 'Date'], how='outer')

In [41]:
citibike_all.head()

Unnamed: 0,Date,StationID,pickups,drops
0,2016-09-01,3183.0,96.0,69.0
1,2016-09-01,3184.0,28.0,28.0
2,2016-09-01,3185.0,19.0,21.0
3,2016-09-01,3186.0,142.0,126.0
4,2016-09-01,3187.0,30.0,25.0


In [46]:
cb_jc.head()

Unnamed: 0,geometry,id,latitude,longitude,station_ad
0,POINT (-74.03345880000001 40.7162469),3183,40.716247,-74.033459,Exchange Place
1,POINT (-74.03355190000001 40.7141454),3184,40.714145,-74.033552,Paulus Hook
2,POINT (-74.043845 40.7177325),3185,40.717732,-74.043845,City Hall
3,POINT (-74.0431174636 40.7195861165),3186,40.719586,-74.043117,Grove St PATH
4,POINT (-74.03805095 40.7211236),3187,40.721124,-74.038051,Warren St


In [63]:
jc_blocks.head()

Unnamed: 0,BLOCKCE10,geometry
0,4004,"POLYGON ((603128.8201636747 678343.7580376044,..."
1,1008,"POLYGON ((615682.0929573439 700587.5409777761,..."
2,2001,"POLYGON ((616410.1010157578 700213.4636419415,..."
3,4001,"POLYGON ((621789.2306774259 690035.8127374426,..."
4,3013,"POLYGON ((605238.8641719259 686058.4824375212,..."


In [66]:
jc_blocks = jc_blocks.to_crs(epsg=4326)

In [67]:
jc_blocks.crs

{'init': 'epsg:4326', 'no_defs': True}

In [69]:
cb_jc = cb_jc.to_crs(epsg=4326)

In [70]:
cb_jc.crs

{'init': 'epsg:4326', 'no_defs': True}

In [77]:
citibike_geo = gp.sjoin(cb_jc, jc_blocks)

In [78]:
citibike_geo.head()

Unnamed: 0,geometry,id,latitude,longitude,station_ad,index_right,BLOCKCE10
0,POINT (-74.03345880000001 40.7162469),3183,40.716247,-74.033459,Exchange Place,1325,1004
1,POINT (-74.03355190000001 40.7141454),3184,40.714145,-74.033552,Paulus Hook,1750,1007
29,POINT (-74.0364857 40.7127742),3214,40.712774,-74.036486,Essex Light Rail,1750,1007
35,POINT (-74.0385255218 40.7124188238),3267,40.712419,-74.038526,Morris Canal,1750,1007
2,POINT (-74.043845 40.7177325),3185,40.717732,-74.043845,City Hall,174,2003


In [79]:
citibike_geo = citibike_geo.loc[:,['id', 'BLOCKCE10']]

In [80]:
citibike_geo.head()

Unnamed: 0,id,BLOCKCE10
0,3183,1004
1,3184,1007
29,3214,1007
35,3267,1007
2,3185,2003


In [81]:
cb_jc_citibike = pd.merge(citibike_all, citibike_geo, left_on='StationID', right_on='id', how='left')

In [83]:
cb_jc_citibike.drop('id',axis=1, inplace=True)

In [84]:
cb_jc_citibike.columns = ['Date', 'StationID', 'pickups', 'dropoffs', 'census_block']

In [None]:
os.system("mkdir ../data/jersey_city/aggrega")

In [85]:
cb_jc_citibike.to_csv('../data/jersey_city/jc_s')

Unnamed: 0,Date,StationID,pickups,dropoffs,census_block
0,2016-09-01,3183.0,96.0,69.0,1004
1,2016-09-01,3184.0,28.0,28.0,1007
2,2016-09-01,3185.0,19.0,21.0,2003
3,2016-09-01,3186.0,142.0,126.0,3010
4,2016-09-01,3187.0,30.0,25.0,1012
5,2016-09-01,3188.0,1.0,1.0,4002
6,2016-09-01,3190.0,6.0,2.0,2001
7,2016-09-01,3191.0,2.0,1.0,1005
8,2016-09-01,3192.0,18.0,21.0,1013
9,2016-09-01,3193.0,15.0,25.0,1008
