In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

# Configure for presentation
np.set_printoptions(threshold=50, linewidth=50)
import matplotlib as mpl
mpl.rc('font', size=16)

## Drinks

In [None]:
drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([
    ['Milk Tea', 'Panda Tea Lounge', 4],
    ['Espresso', 'Gimme',  2],
    ['Latte',    'Gimme',  3],
    ['Espresso', "Cafe Gola",   2]
])
drinks

In [None]:
discounts = Table().with_columns(
    'Coupon % off', make_array(25, 50, 5),
    'Location', make_array('Panda Tea Lounge', 'Gimme', 'Gimme')
)
discounts

In [None]:
t = drinks.join('Cafe', discounts, 'Location')
t

In [None]:
t.with_column('Discounted', t.column(2) * (1 - t.column(3)/ 100))

In [None]:
two = drinks.join('Cafe', drinks)
two

In [None]:
two.with_column('Total', two.column('Price') + two.column('Price_2'))

## Bikes

In [None]:
citi = Table.read_table('citibike_nyc_201707_500000.csv')
citi

In [None]:
trips = citi.relabeled("start station name", "start")\
            .relabeled("end station name", "end")\
            .select("start", "end")\
            .with_column("duration", citi.column("tripduration")/60)
trips

How long do people generally borrow a bike for?

In [None]:
trips.hist("duration")

In [None]:
longest = trips.column("duration").max()
print("Longest trip took", longest/60, "hours.")

Citibike charges an extra $2.50 for rides longer than 45 minutes

In [None]:
commute = trips.where('duration', are.below(46))
commute.hist('duration', bins=45, unit='minute')

Where did people pick up bikes?

In [None]:
starts = commute.group('start').sort('count', descending=True)
starts

* West St & Chambers St is near Stuyvesant High School, Borough of Manhattan Community College, and another ferry terminal, on the Hudson greenway bike trail
* 12 Ave & W 40 St is a ferry terminal on the Hudson greenway bike trail
* Pershing Square North is across from Grand Central Station

In [None]:
commute.pivot('start', 'end')

In [None]:
shortest = commute.group(["start", "end"], min)
shortest

Which stations can you get to fastest from Grand Central Station ('Pershing Square North')?

In [None]:
from_gc = shortest.where("start", are.containing('Pershing Square North')).sort(2)
from_gc

## Maps

In [None]:
stations = citi.relabeled("start station name", "name")\
                .relabeled("start station latitude", "lat")\
                .relabeled("start station longitude", "lon")\
                .select("name", "lat", "lon")
stations

How many trips started at each station?

In [None]:
unique_stations = stations.group(["name", "lat", "lon"])
unique_stations

Map it!

In [None]:
Marker.map_table(unique_stations.select("lat", "lon", "name"))

In [None]:
Circle.map_table(unique_stations.select("lat", "lon", "name"), color='green', radius=100)

In [None]:
blue_stations = unique_stations.with_columns(
    "color", np.full(unique_stations.num_rows, "blue"),
    "area", 50*unique_stations.column("count"))
blue_stations

In [None]:
Circle.map_table(blue_stations.select("lat", "lon", "name", "color", "area"))

How long does it take to get to any other station from grand central?

In [None]:
from_gc

In [None]:
gc_dest = from_gc.join("end", unique_stations, "name")
gc_dest

In [None]:
color_from_gc = gc_dest.with_columns(
    "color", np.full(gc_dest.num_rows, "blue"),
    "area", 10000*gc_dest.column("duration min"))
color_from_gc

In [None]:
Circle.map_table(color_from_gc.select("lat", "lon", "end", "color", "area"))

About how long do people spend on trips from each station?

In [None]:
stations = citi.relabeled("start station name", "name")\
                .relabeled("start station latitude", "lat")\
                .relabeled("start station longitude", "lon")\
                .select("name", "lat", "lon")\
                .with_column("duration", citi.column("tripduration")/60)
stations

In [None]:
stations.group(["name", "lat", "lon"], np.median)

In [None]:
def round_median(duration, interval=5):
    """Round the numbers in the array duration to bins of size interval"""
    return np.round(np.median(duration) / interval, 0) * interval

In [None]:
round_median([10,15,5])

In [None]:
round_median([11,15,5])

In [None]:
round_median([11,14,16])

In [None]:
round_median([21,22,5])

In [None]:
duration_by_station = stations.group(["name", "lat", "lon"], round_median)
duration_by_station

In [None]:
duration_by_station = duration_by_station.relabeled("duration round_median", "duration")
duration_by_station

In [None]:
duration_by_station.group('duration')

In [None]:
colors = duration_by_station.group('duration')\
                        .where('duration', are.below(30))\
                        .with_column('color', 
                make_array('blue', 'green', 'purple', 'red', 'orange'))
colors

In [None]:
colored = duration_by_station.join('duration', colors)\
                             .select('lat', 'lon', 'name', 'color')
colored

In [None]:
Marker.map_table(colored)