Visualization of pickup and drop-off locations with bokeh inspired by https://anaconda.org/jbednar/nyc_taxi/notebook

In [1]:
import math
import pandas as pd
from functools import partial
from IPython.core.display import HTML, display


from bokeh.plotting import figure, output_notebook, show
from bokeh.tile_providers import STAMEN_TERRAIN

import datashader as ds
from datashader import transfer_functions as tf
from datashader.bokeh_ext import InteractiveImage
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno

output_notebook()

In [2]:
train = pd.read_csv('../input/train.csv',usecols= \
                       ['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'passenger_count'])
test = pd.read_csv('../input/test.csv',usecols= \
                       ['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'passenger_count'])
print(train.size, test.size)

In [3]:
# adapted from http://www.neercartography.com/latitudelongitude-tofrom-web-mercator/
def toWebMercator(LonLat):
    xLon = LonLat[0]
    yLat = LonLat[1]
    # Check if coordinate out of range for Latitude/Longitude
    if (abs(xLon) > 180) and (abs(yLat) > 90):
        return
 
    semimajorAxis = 6378137.0  # WGS84 spheriod semimajor axis
    east = xLon * 0.017453292519943295
    north = yLat * 0.017453292519943295
 
    northing = 3189068.5 * math.log((1.0 + math.sin(north)) / (1.0 - math.sin(north)))
    easting = semimajorAxis * east
 
    return [easting, northing]

In [4]:
%%time
train[['pickup_x', 'pickup_y']] = train[['pickup_longitude', 'pickup_latitude']].apply(toWebMercator, axis=1)
train[['dropoff_x', 'dropoff_y']] = train[['dropoff_longitude', 'dropoff_latitude']].apply(toWebMercator, axis=1)

In [5]:
%%time
test[['pickup_x', 'pickup_y']] = test[['pickup_longitude', 'pickup_latitude']].apply(toWebMercator, axis=1)
test[['dropoff_x', 'dropoff_y']] = test[['dropoff_longitude', 'dropoff_latitude']].apply(toWebMercator, axis=1)

In [6]:
NYC = x_range, y_range = ((-8226000,-8224000), (4963500,4980000))

plot_width  = int(790)
plot_height = int(plot_width//1.5)
tools = 'pan, wheel_zoom, box_zoom, undo, reset'

def base_plot(tools=tools,plot_width=plot_width, plot_height=plot_height, **plot_args):
    plot = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
        x_range=x_range, y_range=y_range, outline_line_color=None,
        min_border=0, min_border_left=0, min_border_right=0,
        min_border_top=0, min_border_bottom=0, **plot_args)
    plot.axis.visible = False
    plot.xgrid.grid_line_color = None
    plot.ygrid.grid_line_color = None
    return plot
    
options = dict(line_color=None, fill_color='green', size=5)

In [7]:
troptions = dict(line_color=None, fill_color='red', size=1, alpha=0.1)
trsamples = train.sample(n=100000)

p = base_plot(webgl=True)
# Depending on your setup you may need to use this instead
# p = base_plot()
# p.output_backend = "webgl"

p.add_tile(STAMEN_TERRAIN)
p.circle(x=trsamples['pickup_x'], y=trsamples['pickup_y'], **troptions)
show(p)

In [8]:
teoptions = dict(line_color=None, fill_color='blue', size=1, alpha=0.1)
tesamples = test.sample(n=100000)
p = base_plot(webgl=True)
# Depending on your setup you may need to use this instead
# p = base_plot()
# p.output_backend = "webgl"


p.add_tile(STAMEN_TERRAIN)
p.circle(x=tesamples['pickup_x'], y=tesamples['pickup_y'], **teoptions)
show(p)

In [9]:
NYC = x_range, y_range = ((-8242500,-8210000), (4958000,4995000))

plot_width  = int(780)
plot_height = int(plot_width//1.2)

background = "black"
export = partial(export_image, export_path="export", background=background)
cm = partial(colormap_select, reverse=(background=="black"))

def create_image(dataset, x_range, y_range, width, height):
    cvs = ds.Canvas(plot_width=width, plot_height=height, x_range=x_range, y_range=y_range)
    agg = cvs.points(dataset, 'dropoff_x', 'dropoff_y',  ds.count('passenger_count'))
    img = tf.shade(agg, cmap=Hot, how='eq_hist')
    return tf.dynspread(img, threshold=0.4, max_px=5)

def create_image_train(x, y, w=plot_width, h=plot_height):
    return create_image(train, x, y, w, h)

plot = base_plot(background_fill_color=background)
export(create_image_train(*NYC),"NYCT_hot")
InteractiveImage(plot, create_image_train)

In [10]:
def create_image_test(x, y, w=plot_width, h=plot_height):
    return create_image(test, x, y, w, h)

plot = base_plot(background_fill_color=background)
export(create_image_test(*NYC),"NYCT_hot_test")
InteractiveImage(plot, create_image_test)

In [11]:
def merged_images(dataset, x_range, y_range, width, height, how='log'):
    cvs = ds.Canvas(plot_width=width, plot_height=height, x_range=x_range, y_range=y_range)
    picks = cvs.points(dataset, 'pickup_x',  'pickup_y',  ds.count('passenger_count'))
    drops = cvs.points(dataset, 'dropoff_x', 'dropoff_y', ds.count('passenger_count'))
    more_drops = tf.shade(drops.where(drops > picks), cmap=["darkgreen", 'cyan'], how=how)
    more_picks = tf.shade(picks.where(picks > drops), cmap=["darkred", 'orangered'],  how=how)
    img = tf.stack(more_picks,more_drops)
    return tf.dynspread(img, threshold=0.4, max_px=4)

def merged_images_train(x, y, w=plot_width, h=plot_height):
    return merged_images(train, x, y, w, h)

plot = base_plot(background_fill_color=background)
export(merged_images_train(*NYC),"NYCT_pickups_vs_dropoffs")
InteractiveImage(plot, merged_images_train)

In [12]:
testsize = test.shape[0]
df = train.sample(n=testsize)
# print(df.shape, test.shape)

def compare_images(x_range, y_range, width=plot_width, height=plot_height, how='log'):
    cvs = ds.Canvas(plot_width=width, plot_height=height, x_range=x_range, y_range=y_range)
    picks = cvs.points(df, 'pickup_x',  'pickup_y',  ds.count('passenger_count'))
    drops = cvs.points(test, 'pickup_x', 'pickup_y', ds.count('passenger_count'))
    more_drops = tf.shade(drops, cmap='white', how=how)
    more_picks = tf.shade(picks, cmap='red',  how=how)
    img = tf.stack(more_picks,more_drops)
    return tf.dynspread(img, threshold=0.3, max_px=4)

plot = base_plot(background_fill_color=background)
export(compare_images(*NYC),"NYCT_pickups_in_train_vs_test")
InteractiveImage(plot, compare_images)