Let's get the data from the Citibike API

In [None]:
import requests
url = 'http://www.citibikenyc.com/stations/json'
data = requests.get(url).json()["stationBeanList"]

And for convenience we put the data in a dataframe:

In [None]:
import pandas
df = pandas.DataFrame(data)
df

We setup for plotting within the notebook

In [None]:
%matplotlib inline
import matplotlib as plt
import seaborn as sns

# This defines the size of the image below
plt.rcParams['figure.figsize'] = (6, 4)

Let's create first a histogram of available bikes across the stations

In [None]:
df["availableBikes"].hist(bins=20)

And now let's plot the same, but let's normalize by the size of each station. We will also add a little bit of transparency in the plot using the `alpha` parameter. (Try setting it using different values from 0 to 1.)

In [None]:
(df["availableBikes"]/df['totalDocks']).hist(alpha=0.5)
#df["totalDocks"].hist(alpha=0.5)

And now let's do a plot of available bikes vs total docs in a station:

In [None]:
df.plot(kind='scatter', x="totalDocks", y="availableBikes", alpha=0.5)

#### Poor man's mapping

We will now do a small attempt at using a scatterplot, together with the longitude and latitude coordinates to plot things in a map.

In [None]:
# Let's do a first plot. Put the stations on a scatterplot based on
# their longitude and latitude. 
plt.rcParams['figure.figsize'] = (6, 6) # reset the plot to be a square
df.plot(kind='scatter', x='longitude', y='latitude')

In [None]:
# Lets plot the stations that are out of service in red
inservice = (df["statusValue"] == 'In Service')
outofservice = (df["statusValue"] == 'Not In Service')

df[inservice].plot(kind='scatter', 
                   x='longitude', 
                   y='latitude', 
                   color='DarkBlue', 
                   label='In Service')

df[outofservice].plot(kind='scatter', 
                      x='longitude', 
                      y='latitude', 
                      color='Red', 
                      label = 'Out of Service')

In [None]:
# I do not like having to separate plots for that. There should be a way to merge the two plots
# After checking the online documentation, it seem that we will use the "ax" convention
inservice = (df["statusValue"] == 'In Service')
outofservice = (df["statusValue"] == 'Not In Service')

firstplot = df[inservice].plot(kind='scatter', 
                               x='longitude', 
                               y='latitude', 
                               color='DarkBlue', 
                               label='In Service')

secondplot = df[outofservice].plot(kind='scatter', 
                      x='longitude', 
                      y='latitude', 
                      color='Red', 
                      label = 'Out of Service', 
                      ax = firstplot)


In [None]:
# And now make the size of each point proportional to the size of available bikes
# using the s option in the plot
inservice = (df["statusValue"] == 'In Service')
outofservice = (df["statusValue"] == 'Not In Service')

# We also add the "alpha=0.5" to add transparency
firstplot = df[inservice].plot(kind='scatter', 
                               x='longitude', 
                               y='latitude', 
                               color='DarkBlue', 
                               alpha=0.5, 
                               label='In Service', 
                               s=50*(df['availableBikes']/df['totalDocks']))

secondplot = df[outofservice].plot(kind='scatter', 
                                   x='longitude', 
                                   y='latitude', 
                                   color='Red', 
                                   ax = firstplot, 
                                   s = 10, alpha = 0.5,
                                   label = 'Out of Service')
