In [None]:
# This notebook is a copy of the following notebook
# https://anaconda.org/jbednar/nyc_taxi/notebook
# I have made this notebook while learning how to use bokeh.
# I plan to make other notebooks in the future.

import pandas as pd
import numpy as np
import os
print(os.listdir("../input"))

In [None]:
def convert_lon(d, latvar):    
    '''
    Convert longitude to web mercator
    '''
    
    k = d[latvar].copy()
    k = (20037508.34 / 180) * (np.log(np.tan((90. + d[latvar]) * np.pi/360))/(np.pi/180.))
    return k

def convert_df(df):
    '''
    Expect as DF and returns a new DF with converted Latitude and Longitude
    '''
    
    pickup_x = df.pickup_longitude * (20037508.34 / 180)
    dropout_x = df.dropoff_longitude * (20037508.34 / 180)
    
    pickup_y = convert_lon(df, "pickup_latitude")
    dropout_y = convert_lon(df, "pickup_latitude")



    df = pd.DataFrame({"key": df["key"],
                       "fare_amount": df["fare_amount"],
                       "pickup_x":pickup_x,
                       "pickup_y":pickup_y,
                       "dropoff_x":dropout_x,
                       "dropoff_y":dropout_y,
                       "passenger_count":df["passenger_count"]})
        
    return df

In [None]:
%time df = convert_df(pd.read_csv("../input/train.csv", nrows = 100000))

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
from bokeh.models import BoxZoomTool #allows us to draw a rectange and to be able to zoon in
from bokeh.plotting import figure, output_notebook, show

output_notebook()

NYC = x_range, y_range = ((-8242000, -8210000), (4965000, 4990000))

plot_width = int(750)
plot_height = int(plot_width//1.2)

def base_plot(tools = "pan, wheel_zoom, reset", plot_width = plot_width, plot_height = plot_height, **plot_args ):
    p = figure(tools = tools, plot_width = plot_width, plot_height = plot_height,
               x_range = x_range, y_range = y_range, outline_line_color = None, 
               min_border = 0, min_border_left = 0, min_border_right = 0,
               min_border_top = 0, min_border_bottom = 0, **plot_args,
               x_axis_type = "mercator", y_axis_type = "mercator")
    p.axis.visible = False
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    
    return p

options = dict(line_color = None, fill_color = "blue", size = 5)


In [None]:
%%time

from bokeh.tile_providers import STAMEN_TERRAIN, CARTODBPOSITRON

nr_sample = 10000
samples = df.sample(n = nr_sample)
p = base_plot()
p.add_tile(STAMEN_TERRAIN)
# without options we can also visualize the plot
p.circle(x = samples["pickup_x"], y = samples["pickup_y"], **options)
show(p)

In [None]:
%%time

samples = df.sample(n=nr_sample)
p = base_plot()
p.circle(x = samples["pickup_x"], y = samples["pickup_y"], **options)
show(p)

In [None]:
df.info()
df.dropna(inplace = True)
df.info()

In [None]:
%%time
options = dict(line_color=None, fill_color='blue', size=1, alpha=0.1)
samples = df.sample(n=100000)
p = base_plot()
p.circle(x=samples['dropoff_x'], y=samples['dropoff_y'], **options)
show(p)

In [None]:
#df = df.astype(np.float32)
df = df[df["pickup_x"] != 0][:100000]

In [None]:
import datashader as ds
from datashader import transfer_functions as tf
from datashader.colors import Greys9
Greys9_r = list(reversed(Greys9))[:-2]

cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height, x_range=x_range, y_range=y_range)
agg = cvs.points(df, 'dropoff_x', 'dropoff_y',  ds.count('passenger_count'))
img = tf.shade(agg, cmap=["white", 'darkblue'], how='linear')
img

In [None]:
import numpy as np

def histogram(x,colors=None):
    hist,edges = np.histogram(x, bins=100)
    p = figure(y_axis_label="Pixels",
               tools='', height=130, outline_line_color=None,
               min_border=0, min_border_left=0, min_border_right=0,
               min_border_top=0, min_border_bottom=0)
    p.quad(top=hist[1:], bottom=0, left=edges[1:-1], right=edges[2:])
    print("min: {}, max: {}".format(np.min(x),np.max(x)))
    show(p)
    
histogram(agg.values)


In [None]:
histogram(np.log1p(agg.values))

tf.shade(agg, cmap=Greys9_r, how='log')

In [None]:
histogram(tf.eq_hist(agg.values))
    
tf.shade(agg, cmap=Greys9_r, how='eq_hist')

In [None]:
import datashader as ds
from datashader.bokeh_ext import InteractiveImage
from functools import partial
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9, Hot, inferno

background = "black"
export = partial(export_image, export_path="export", background=background)
cm = partial(colormap_select, reverse=(background=="black"))

def create_image(x_range, y_range, w=plot_width, h=plot_height):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(df, 'dropoff_x', 'dropoff_y',  ds.count('passenger_count'))
    img = tf.shade(agg, cmap=Hot, how='eq_hist')
    return tf.dynspread(img, threshold=0.5, max_px=4)

p = base_plot(background_fill_color=background)
export(create_image(*NYC),"NYCT_hot")
InteractiveImage(p, create_image)

In [None]:
import numpy as np
from functools import partial

def create_image90(x_range, y_range, w=plot_width, h=plot_height):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(df, 'dropoff_x', 'dropoff_y',  ds.count('passenger_count'))
    img = tf.shade(agg.where(agg>np.percentile(agg,90)), cmap=inferno, how='eq_hist')
    return tf.dynspread(img, threshold=0.3, max_px=4)
    
p = base_plot()
p.add_tile(STAMEN_TERRAIN)
export(create_image(*NYC),"NYCT_90th")
InteractiveImage(p, create_image90)

In [None]:
def merged_images(x_range, y_range, w=plot_width, h=plot_height, how='log'):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    picks = cvs.points(df, 'pickup_x',  'pickup_y',  ds.count('passenger_count'))
    drops = cvs.points(df, 'dropoff_x', 'dropoff_y', ds.count('passenger_count'))
    drops = drops.rename({'dropoff_x': 'x', 'dropoff_y': 'y'})
    picks = picks.rename({'pickup_x': 'x', 'pickup_y': 'y'})
    more_drops = tf.shade(drops.where(drops > picks), cmap=["darkblue", 'cornflowerblue'], how=how)
    more_picks = tf.shade(picks.where(picks > drops), cmap=["darkred", 'orangered'], how=how)
    img = tf.stack(more_picks, more_drops)
    return tf.dynspread(img, threshold=0.3, max_px=4)

p = base_plot(background_fill_color=background)
export(merged_images(*NYC),"NYCT_pickups_vs_dropoffs")
InteractiveImage(p, merged_images)