In [None]:
import datashader as ds
import datashader.transfer_functions as tf

import pandas as pd

from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource, CustomJS, Range1d
from bokeh.io import push_notebook
from bokeh.tile_providers import STAMEN_TONER

output_notebook()

## Load NYC Taxi data (takes a dozen seconds or so...)

In [None]:
df = pd.read_csv('data/nyc_taxi.csv',usecols=['pickup_x','pickup_y','dropoff_x','dropoff_y','passenger_count'])
x_range = (-8240227.037,-8231283.905)
y_range = (4974203,4979238)
df.tail()

## Define a simple plot

In [None]:
def base_plot():
    p = figure(tools='pan,wheel_zoom,box_zoom', plot_width=800, plot_height=500, x_range=x_range, y_range=y_range)
    p.add_tile(STAMEN_TONER)
    p.axis.visible = False
    return p
    
options = dict(line_color='black', fill_color='red')

## A few points are fine

In [None]:
samples = df.sample(n=1000)
p = base_plot()
p.circle(x=samples['pickup_x'], y=samples['pickup_y'], **options)
show(p)

## When plotting more than a couple thousand points, the study area is saturated.

In [None]:
samples = df.sample(n=10000)
p = base_plot()
p.circle(x=samples['pickup_x'], y=samples['pickup_y'], **options)
show(p)

## Using datashader, you can easily aggregate points and conquer over-saturation

In [None]:
from datashader.callbacks import IPythonKernelCallback

def create_image(ranges, agg_fn=ds.count):
    x_range, y_range = ranges['x_range'], ranges['y_range']
    h, w = ranges['h'], ranges['w']
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(df, 'pickup_x', 'pickup_y', agg_fn('passenger_count'))
    pix = tf.interpolate(agg, (255, 204, 204), 'red', how='log')
    dh = y_range[1] - y_range[0]
    dw = x_range[1] - x_range[0]
    p.image_rgba(image=[pix.img], x=x_range[0], y=y_range[0], dw=dw, dh=dh, dilate=False)

p = base_plot()
dsplot = IPythonKernelCallback(p, create_image, throttle=500, agg_fn=ds.count)
create_image(dict(x_range=x_range, y_range=y_range, w=p.plot_width, h=p.plot_height))
show(p)