In [None]:
import datashader as ds
import datashader.transfer_functions as tf
import pandas as pd

from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource, CustomJS, Range1d
from bokeh.io import push_notebook
from bokeh.tile_providers import STAMEN_TONER

output_notebook()

## Load NYC Taxi data (may take about 30 seconds...)

In [None]:
df = pd.read_csv('data/nyc_taxi.csv')
df = df[['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'passenger_count']]
xmin = -8240227.037
ymin = 4974203
xmax = -8231283.905
ymax = 4979238
print 'Data loaded...'

## A few points are fine

In [None]:
a_few_points = df.sample(n=1000)

p = figure(tools='pan,wheel_zoom', plot_width=800, plot_height=500, x_range=(xmin, xmax), y_range=(ymin, ymax))
p.add_tile(STAMEN_TONER)
p.axis.visible = False
p.circle(x=a_few_points['pickup_longitude'], y=a_few_points['pickup_latitude'], line_color='black', fill_color='red')

In [None]:
show(p)

## When plotting more than a couple thousand points, the study area is saturated.

In [None]:
output_notebook()
a_few_points_more = df.sample(n=10000)

p = figure(tools='pan,wheel_zoom', plot_width=800, plot_height=500, x_range=(xmin, xmax), y_range=(ymin, ymax))
p.add_tile(STAMEN_TONER)
p.axis.visible = False
p.circle(x=a_few_points_more['pickup_longitude'], y=a_few_points_more['pickup_latitude'], line_color='black', fill_color='red')

In [None]:
show(p)

In [None]:
## Using datashader, you can easily aggregate points and conquer over-saturation

In [None]:
output_notebook()
jscode="""
        // Define a callback to capture errors on the Python side
        function callback(msg){
            console.log("Python callback returned unexpected message:", msg)
        }
        
        function getPlotByTag(tagName) {
            var plot;
            $.each(Bokeh.index, function(index, value) {
                if ($.inArray(tagName, value.model.attributes.tags) > -1) {
                    plot = value;
                    return;
                }
            });
            return plot;
        };
        
        function update_plot() {
            callbacks = {iopub: {output: callback}};
            var plot = getPlotByTag('datashader-plot');
            
            // Generate a command to execute in Python
            var ranges = {xmin: x_range.attributes.start,
                          ymin: y_range.attributes.start,
                          xmax: x_range.attributes.end,
                          ymax: y_range.attributes.end,
                          w: plot.frame.get('width'),
                          h: plot.frame.get('height')}
            console.log(ranges);
                          
            var range_str = JSON.stringify(ranges)
            var cmd = "%s(" + range_str + ")"

            // Execute the command on the Python kernel
            var kernel = IPython.notebook.kernel;
            kernel.execute(cmd, callbacks, {silent : false});
        }
        
        if (window._range_timer) {
            clearTimeout(window._range_timer);
        }

        window._range_timer = setTimeout(update_plot, 150, "replace");    
"""


p = figure(tools='pan,wheel_zoom', plot_width=800, plot_height=500, x_range=(xmin, xmax), y_range=(ymin, ymax))
p.add_tile(STAMEN_TONER)
p.axis.visible = False
p.tags = ['datashader-plot']

def create_image(ranges):
    xmin, ymin, xmax, ymax = ranges['xmin'], ranges['ymin'], ranges['xmax'], ranges['ymax']
    h, w = ranges['h'], ranges['w']
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=(xmin, xmax), y_range=(ymin, ymax))
    agg = cvs.points(df, 'pickup_longitude', 'pickup_latitude', count=ds.count('passenger_count'))
    pix = tf.interpolate(agg.count, (255, 204, 204), 'red', how='log')
    dh = ymax - ymin
    dw = xmax - xmin
    p.image_rgba(image=[pix.img], x=xmin, y=ymin, dw=dw, dh=dh, dilate=False)

def update_image(ranges):
    p.renderers.pop()
    create_image(ranges)
    push_notebook()

p.x_range.callback = CustomJS(args=dict(x_range=p.x_range, y_range=p.y_range), code=jscode % 'update_image')
p.y_range.callback = CustomJS(args=dict(x_range=p.x_range, y_range=p.y_range), code=jscode % 'update_image')

# note the hard coded size...
create_image(dict(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, w=700, h=400))

In [None]:
show(p)