# Example: CitiBike data

Adapted from Kelsey Jordahl
https://gist.github.com/kjordahl/5957573

In [69]:
%matplotlib inline
import requests  
import json
from pyproj import Proj
from shapely.geometry import Point
import geopandas as gp
import matplotlib.pyplot as plt
import pandas as pd

NYC borough boundaries downloaded from [Bytes of the Big Apple](http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip)

In [40]:
boros = gp.GeoDataFrame.from_file('nybb_15b/nybb.shp')

Load real time bike station data from [CitiBike](http://citibikenyc.com) json API:

In [41]:
# get the bike data and convert it to a ditcionary
endpoint_url = 'http://citibikenyc.com/stations/json'
response = requests.get(endpoint_url)
data = json.loads(response.text)


In [42]:
# convert the relevant part to a geodataframe
df = gp.GeoDataFrame(data['stationBeanList'])
df.head()

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks
0,,6,31,,72,,2017-02-16 04:16:00 PM,40.767272,,-73.993929,,W 52 St & 11 Ave,,W 52 St & 11 Ave,1,In Service,False,39
1,,23,8,,79,,2017-02-16 04:18:02 PM,40.719116,,-74.006667,,Franklin St & W Broadway,,Franklin St & W Broadway,1,In Service,False,33
2,,17,9,,82,,2017-02-16 04:15:35 PM,40.711174,,-74.000165,,St James Pl & Pearl St,,St James Pl & Pearl St,1,In Service,False,27
3,,28,33,,83,,2017-02-16 04:15:37 PM,40.683826,,-73.976323,,Atlantic Ave & Fort Greene Pl,,Atlantic Ave & Fort Greene Pl,1,In Service,False,62
4,,8,31,,116,,2017-02-16 04:18:56 PM,40.741776,,-74.001497,,W 17 St & 8 Ave,,W 17 St & 8 Ave,1,In Service,False,39


In [43]:
# there is one row for each bike station.  How many stations are there? 
len(df)

666

In [44]:
# in the file above, there are lon-lats, but no geometry field
# we need to set that up

s = gp.GeoSeries([Point(x, y) for x, y in zip(df['longitude'], df['latitude'])])
df['geometry'] = s
df.crs = {'init': 'epsg:4326', 'no_defs': True}
df.geometry.total_bounds

(-74.096936600000006,
 40.661063371900603,
 -73.929891100000006,
 40.804212999999997)

In [45]:
# make sure they are on the same CRS.  
# checking the bounds is a nice way of seeing this
df.to_crs(boros.crs, inplace=True)
df.geometry.total_bounds

(957370.14732175611,
 180120.27032614074,
 1003695.8507545569,
 232275.23054640222)

In [46]:
# the geometry objects can do lots of cool stuff.  For example: 

manhattan = boros.geometry[3]
in_mn = df.geometry.within(manhattan)
print(sum(in_mn), 'stations in Manhattan')

367 stations in Manhattan


# Your turn

You can read about the range of operations available in geopandas here: 
    
http://geopandas.org/index.html

Your assignment is to: 

1. Calculate how many stations are in each borough
2. Calculate how many bikes are currently available in each borough
3. Read about and try at least two new spatial or geometric operations (beyond what I've covered here). 

In [47]:
def bike_to_boro(x):
    boro = boros.geometry[x]
    in_boro = df.geometry.within(boro)
    return print(sum(in_boro), 'stations in ', boros.loc[x,'BoroName'])

In [48]:
#This function calls for the number of bikes in each boro
for i in range(0,5):
    bike_to_boro(i)

0 stations in  Staten Island
236 stations in  Brooklyn
13 stations in  Queens
367 stations in  Manhattan
0 stations in  Bronx


In [49]:
def bike_in_boro(x):  
    boro = boros.geometry[x]
    in_boro = df.geometry.within(boro)
    return in_boro

In [52]:
b = bike_in_boro(0)
df['Statisland']=b
df.head()

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks,geometry,Statisland
0,,6,31,,72,,2017-02-16 04:16:00 PM,40.767272,,-73.993929,,W 52 St & 11 Ave,,W 52 St & 11 Ave,1,In Service,False,39,POINT (985931.706502895 218814.7571059909),False
1,,23,8,,79,,2017-02-16 04:18:02 PM,40.719116,,-74.006667,,Franklin St & W Broadway,,Franklin St & W Broadway,1,In Service,False,33,POINT (982402.0068864136 201269.770346975),False
2,,17,9,,82,,2017-02-16 04:15:35 PM,40.711174,,-74.000165,,St James Pl & Pearl St,,St James Pl & Pearl St,1,In Service,False,27,POINT (984204.131576321 198376.4207269395),False
3,,28,33,,83,,2017-02-16 04:15:37 PM,40.683826,,-73.976323,,Atlantic Ave & Fort Greene Pl,,Atlantic Ave & Fort Greene Pl,1,In Service,False,62,POINT (990816.693171227 188413.5778061197),False
4,,8,31,,116,,2017-02-16 04:18:56 PM,40.741776,,-74.001497,,W 17 St & 8 Ave,,W 17 St & 8 Ave,1,In Service,False,39,POINT (983835.0432398689 209525.6347344065),False


In [61]:
for j in range(0,5):
    if j == 0:
        b = bike_in_boro(j)
        df['Statisland']=b
    elif j == 1:
        b = bike_in_boro(j)
        df['Brooklyn']=b
    elif j == 2:
        b = bike_in_boro(j)
        df['Queens']=b
    elif j == 3:
        b = bike_in_boro(j)
        df['Manhattan']=b
    elif j == 4:
        b = bike_in_boro(j)
        df['Bronx']=b

In [66]:
df.head()

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,...,statusKey,statusValue,testStation,totalDocks,geometry,Statisland,Brooklyn,Queens,Manhattan,Bronx
0,,6,31,,72,,2017-02-16 04:16:00 PM,40.767272,,-73.993929,...,1,In Service,False,39,POINT (985931.706502895 218814.7571059909),False,False,False,True,False
1,,23,8,,79,,2017-02-16 04:18:02 PM,40.719116,,-74.006667,...,1,In Service,False,33,POINT (982402.0068864136 201269.770346975),False,False,False,True,False
2,,17,9,,82,,2017-02-16 04:15:35 PM,40.711174,,-74.000165,...,1,In Service,False,27,POINT (984204.131576321 198376.4207269395),False,False,False,True,False
3,,28,33,,83,,2017-02-16 04:15:37 PM,40.683826,,-73.976323,...,1,In Service,False,62,POINT (990816.693171227 188413.5778061197),False,True,False,False,False
4,,8,31,,116,,2017-02-16 04:18:56 PM,40.741776,,-74.001497,...,1,In Service,False,39,POINT (983835.0432398689 209525.6347344065),False,False,False,True,False


In [80]:
bikes = pd.DataFrame(columns = ['Statisland','Brooklyn','Queens','Manhattan','Bronx'])
bikes

Unnamed: 0,Statisland,Brooklyn,Queens,Manhattan,Bronx


In [78]:
df.iloc[0,23]

False

In [89]:
for r in range (0,667):
        if df.iloc[r,19] == True:
            bikes.iloc[r,0] = df.iloc[r,1]
        else:
            bikes.iloc[r,0] = 0

IndexError: single positional indexer is out-of-bounds

In [87]:
for k in range (19,24):
    for r in range (0,667):
        if df.iloc[r,k] == True:
            bikes.iloc[r,k-19] = df.iloc[r,1]
        else:
            bikes.iloc[r,k-19] = 0

IndexError: single positional indexer is out-of-bounds