# Example: CitiBike data

Adapted from Kelsey Jordahl
https://gist.github.com/kjordahl/5957573

In [1]:
import requests  
import json
from pyproj import Proj
from shapely.geometry import Point
import geopandas as gp

NYC borough boundaries downloaded from [Bytes of the Big Apple](http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip)

In [2]:
boros = gp.GeoDataFrame.from_file('nybb_15b/nybb.shp')
boros

Unnamed: 0,BoroCode,BoroName,Shape_Leng,Shape_Area,geometry
0,5,Staten Island,330466.075042,1623827000.0,(POLYGON ((970217.0223999023 145643.3322143555...
1,3,Brooklyn,741185.900596,1937597000.0,(POLYGON ((1021176.479003906 151374.7969970703...
2,4,Queens,897040.298576,3045168000.0,(POLYGON ((1029606.076599121 156073.8142089844...
3,1,Manhattan,358408.460709,636446700.0,(POLYGON ((981219.0557861328 188655.3157958984...
4,2,Bronx,464400.198868,1186973000.0,(POLYGON ((1012821.805786133 229228.2645874023...


Load real time bike station data from [CitiBike](http://citibikenyc.com) json API:

In [3]:
# get the bike data and convert it to a ditcionary
endpoint_url = 'http://citibikenyc.com/stations/json'
response = requests.get(endpoint_url)
data = json.loads(response.text)


In [4]:
# convert the relevant part to a geodataframe
df = gp.GeoDataFrame(data['stationBeanList'])
df.head()

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks
0,,26,5,,304,,2019-02-19 12:46:15 PM,40.704633,,-74.013617,,Broadway & Battery Pl,,Broadway & Battery Pl,1,In Service,False,33
1,,26,34,,359,,2019-02-19 12:46:10 PM,40.755103,,-73.974987,,E 47 St & Park Ave,,E 47 St & Park Ave,1,In Service,False,64
2,,12,33,,377,,2019-02-19 12:45:10 PM,40.722438,,-74.005664,,6 Ave & Canal St,,6 Ave & Canal St,1,In Service,False,45
3,,4,32,,402,,2019-02-19 12:45:15 PM,40.740343,,-73.989551,,Broadway & E 22 St,,Broadway & E 22 St,1,In Service,False,39
4,,36,5,,520,,2019-02-19 12:46:13 PM,40.759923,,-73.976485,,W 52 St & 5 Ave,,W 52 St & 5 Ave,1,In Service,False,41


In [5]:
# there is one row for each bike station.  How many stations are there? 
len(df)

814

In [6]:
# in the file above, there are lon-lats, but no geometry field
# we need to set that up

s = gp.GeoSeries([Point(x, y) for x, y in zip(df['longitude'], df['latitude'])])
df['geometry'] = s
df.crs = {'init': 'epsg:4326', 'no_defs': True}
df.geometry.total_bounds

array([-74.0836394 ,  40.65539977, -73.9077436 ,  40.81439444])

In [7]:
# make sure they are on the same CRS.  
# checking the bounds is a nice way of seeing this
df.to_crs(boros.crs, inplace=True)
df.geometry.total_bounds

array([ 961064.73659426,  178056.37414829, 1009803.53178432,
        235986.42166716])

In [8]:
# the geometry objects can do lots of cool stuff.  For example: 

manhattan = boros.geometry[3]
in_mn = df.geometry.within(manhattan)
print(sum(in_mn), 'stations in Manhattan')

414 stations in Manhattan


# Your turn

You can read about the range of operations available in geopandas here: 
    
http://geopandas.org/index.html

Your assignment is to: 

1. Calculate how many stations are in each borough
2. Calculate how many bikes are currently available in each borough
3. Read about and try at least two new spatial or geometric operations (beyond what I've covered here). 

In [9]:
bronx = boros.geometry[4]
in_bx = df.geometry.within(bronx)

In [10]:
queens = boros.geometry[2]
in_Q = df.geometry.within(queens)

In [11]:
brooklyn = boros.geometry[1]
in_bk = df.geometry.within(brooklyn)

In [12]:
statenisland = boros.geometry[0]
in_si = df.geometry.within(statenisland)

In [13]:
print(sum(in_mn), 'stations in Manhattan')
print(sum(in_bx), 'stations in Bronx')
print(sum(in_Q), 'stations in Queens')
print(sum(in_bk), 'stations in Brooklyn')
print(sum(in_si), 'stations in Staten Island')

414 stations in Manhattan
0 stations in Bronx
82 stations in Queens
268 stations in Brooklyn
0 stations in Staten Island


In [14]:
df.availableBikes

0      26
1      26
2      12
3       4
4      36
5       2
6      12
7       4
8      23
9      25
10     35
11      5
12     13
13     30
14     26
15     21
16     13
17     18
18      0
19     26
20      3
21     33
22     30
23      3
24     41
25     38
26      4
27     13
28     27
29      7
       ..
784    32
785    36
786    25
787     4
788    36
789     0
790    11
791    25
792     4
793     3
794     0
795    25
796    11
797    46
798    27
799    44
800     1
801     2
802     6
803    25
804     0
805     4
806    21
807     2
808    15
809    10
810     1
811     5
812     8
813     7
Name: availableBikes, Length: 814, dtype: int64

In [15]:
bikes_bx = 0
for row in range(0,len(df)):
    if df['geometry'][row].within(boros.geometry[4])==True:
        bikes_bx = bikes_bx + df['availableBikes'][row]

In [16]:
bikes_Q = 0
for row in range(0,len(df)):
    if df['geometry'][row].within(boros.geometry[2])==True:
        bikes_Q = bikes_Q + df['availableBikes'][row]

In [17]:
bikes_bk = 0
for row in range(0,len(df)):
    if df['geometry'][row].within(boros.geometry[1])==True:
        bikes_bk = bikes_bk + df['availableBikes'][row]

In [18]:
bikes_si = 0
for row in range(0,len(df)):
    if df['geometry'][row].within(boros.geometry[0])==True:
        bikes_si = bikes_si + df['availableBikes'][row]

In [19]:
bikes_mn = 0
for row in range(0,len(df)):
    if df['geometry'][row].within(boros.geometry[3])==True:
        bikes_mn = bikes_mn + df['availableBikes'][row]

In [20]:
print ('available bikes in Manhattan', bikes_mn)
print ('available bikes in Staten Island', bikes_si)
print ('available bikes in Brooklyn', bikes_bk)
print ('available bikes in Queens', bikes_Q)
print ('available bikes in Bronx', bikes_bx)

available bikes in Manhattan 6422
available bikes in Staten Island 0
available bikes in Brooklyn 3313
available bikes in Queens 417
available bikes in Bronx 0
