In [1]:
# Import Modules
%load_ext autoreload
%autoreload 2

from math import pi
import pandas as pd
import sys
import numpy as np
from pathlib import Path
from dotenv import load_dotenv

import bokeh
from bokeh.plotting import figure, output_notebook, show
from bokeh.layouts import gridplot
from bokeh.models import HoverTool

In [2]:
# Set paths

PROJECT_ROOT = !git rev-parse --show-toplevel
PROJECT_ROOT = Path(PROJECT_ROOT[0])
print(PROJECT_ROOT)
sys.path.append(str(PROJECT_ROOT))
sys.path.append("/workspaces/template.python.project/tests")

/workspaces/template.python.project


In [3]:
from mock_dataset import mock_dataset
from src.data.utils import bin_and_agg

In [4]:
specs = {"float": [200, 1, 0] \
                        ,"int": [200, 1, 0] \
                        ,"categorical": [200, 1, 0] \
                        ,"bool": [200, 1, 0] \
                        ,"str": [200, 1, 0] \
                        ,"datetime": [200, 1, 0] \
                        }
df = mock_dataset(specs=specs)


In [5]:
df

Unnamed: 0,float_0,int_0,categorical_0,bool_0,str_0,datetime_0
0,0.157690,33619.0,CBA,1.0,FAIWTUHOSAWAUBGHJAXGCLLWZEMSWPGHEUQJIRPPFFSAIU...,2021-02-10 21:17:00.790316
1,0.190693,219953.0,ADC,1.0,RCAGONMBAKUEVUOXMLGOTOYFZZEUHVSLYPCQYJBKYVZBKC...,2021-02-11 21:17:00.790316
2,0.123471,490804.0,DDC,1.0,ASYEJELMZQSDVVMSNJNTQQWSSJONODTAWKZTHLKPBLICLJ...,2021-02-12 21:17:00.790316
3,0.949201,150439.0,BBD,0.0,SOXTEENRKQJOCVKTXSPKWUVQLVQTEERWTKVLJKQEQLREGT...,2021-02-13 21:17:00.790316
4,0.844878,141164.0,CDB,0.0,PMEHZEOBDHYFLYBVLLQCDTIQUJACPBYOBFYTLQTUUDBRMM...,2021-02-14 21:17:00.790316
...,...,...,...,...,...,...
195,0.447951,54791.0,ADA,1.0,NDCUYAAAPYGEPBMGXMGMFXJLOXFMSWUOJCANTZMEAXIUCS...,2021-08-24 21:17:00.790316
196,0.022397,2981.0,CAD,0.0,PNLYPRNRUIPAFKTNGWNOWZUZSXREFZMQXZMNKPFEWCNSIZ...,2021-08-25 21:17:00.790316
197,0.257260,161225.0,DCD,1.0,OMYAAYOISDDNCNVYGJOYBISYXAFPMKRQTKYKLJTVPVURAH...,2021-08-26 21:17:00.790316
198,0.816394,225668.0,CCB,1.0,XINZRLMPHLSBXVCCDCSHRQIZNTGCLHNXTQJMUILDWWKLPE...,2021-08-27 21:17:00.790316


In [6]:
data = bin_and_agg("datetime_0", data=df, secondary_feature="bool_0", bins_boundaries="M")
data.reset_index(inplace=True)
data

Unnamed: 0,datetime_0,count_bool_0,cum_count_bool_0,proportions_bool_0,cum_proportions_bool_0,min_bool_0,mean_bool_0,25%_bool_0,50%_bool_0,75%_bool_0,max_bool_0
0,2021-02-28,19,19,0.095,0.095,0.0,0.631579,0.0,1.0,1.0,1.0
1,2021-03-31,31,50,0.155,0.25,0.0,0.451613,0.0,0.0,1.0,1.0
2,2021-04-30,30,80,0.15,0.4,0.0,0.433333,0.0,0.0,1.0,1.0
3,2021-05-31,31,111,0.155,0.555,0.0,0.548387,0.0,1.0,1.0,1.0
4,2021-06-30,30,141,0.15,0.705,0.0,0.6,0.0,1.0,1.0,1.0
5,2021-07-31,31,172,0.155,0.86,0.0,0.483871,0.0,0.0,1.0,1.0
6,2021-08-31,28,200,0.14,1.0,0.0,0.571429,0.0,1.0,1.0,1.0


# Line Plot

## Simple Plot

In [10]:
x = "datetime_0"
y = "float_0"

# create a plot and style its properties
line = figure(x_axis_type="datetime", plot_width=1000, plot_height=400, title=f"{y} vs {x}", toolbar_location="above")
line.yaxis.axis_label = y
line.xaxis.axis_label = x

# Format the tooltip
tooltips = [(y, f"@{y}"), (x, f"@{x}")]

# Add the HoverTool to the figure
hover = HoverTool(tooltips=tooltips)

line.add_tools(HoverTool(tooltips=tooltips))

# create line renderer
line.line(x=x, y=y, line_width=2, source=df)
line.circle(x=x, y=y, source=df)

show(line)

# Bar Plot

In [9]:
output_notebook()

x = "datetime_0"
y = "int_0"

bar = figure(x_axis_type="datetime", plot_width=1000, plot_height=400, title=f"{y} vs {x}")

bar.vbar(x=x, top=y, source=df)

# Format the tooltip
tooltips = [(y, f"@{y}"), (x, f"@{x}")]

# Configure a renderer to be used upon hover
hover_glyph = bar.circle(x=x, y=y, source=df, size=15, alpha=0, hover_fill_color='black', hover_alpha=0.5)

# Add the HoverTool to the figure
bar.add_tools(HoverTool(tooltips=tooltips, renderers=[hover_glyph]))

bar.grid.grid_line_alpha=0.3

show(bar)

# Scatter Plot

In [11]:
output_notebook()

x = "float_0"
y = "int_0"

scatter = figure(plot_width=1000, plot_height=400, title=f"{y} vs {x}")

# add a circle renderer with a size, color, and alpha
scatter.circle(x=x, y=y, size=10, source=df)

# Format the tooltip
tooltips = [(y, f"@{y}"), (x, f"@{x}")]

# Add the HoverTool to the figure
scatter.add_tools(HoverTool(tooltips=tooltips))

scatter.grid.grid_line_alpha=0.3

# show the results
show(scatter)

# Histogram

## Simple

In [13]:
# src: https://stackoverflow.com/questions/45807960/bokeh-histogram-will-not-plot

output_notebook()

x = "float_0"
hist, edges = np.histogram(df[x], density=True, bins=50)

histogram = figure(plot_width=1000, plot_height=400, title=f"{y} vs {x}")
histogram.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], line_color="white")

histogram.grid.grid_line_alpha=0.3

show(histogram)

## Interactive

In [15]:
from bokeh.models import ColumnDataSource

# src: https://towardsdatascience.com/interactive-histograms-with-bokeh-202b522265f3

output_notebook()

x = "float_0"
hist, edges = np.histogram(df[x], density=True, bins=50)

hist_df = pd.DataFrame({x: hist,
                        "left": edges[:-1],
                        "right": edges[1:]})
hist_df["interval"] = ["%0.2f to %0.2f" % (left, right) for left, 
                        right in zip(hist_df["left"], hist_df["right"])]

src = ColumnDataSource(hist_df)

plot = figure(plot_height = 600, plot_width = 600,
      title = "Histogram of {}".format(x.capitalize()),
      x_axis_label = x.capitalize(),
      y_axis_label = "Count")  

plot.quad(bottom = 0, top = x,left = "left", 
    right = "right", source = src, 
    line_color = "black", fill_alpha = 0.7,
    hover_fill_alpha = 1.0)

hover = HoverTool(tooltips = [('Interval', '@interval'),
                          ('Count', str("@" + x))])
plot.add_tools(hover)

show(plot)

# Combine Line and Bar Plots

In [16]:
output_notebook()

x = "datetime_0"
y_line = "float_0"
y_bar = "int_0"

# create a plot and style its properties
line = figure(plot_width=1000, plot_height=400, title=f"{y_line} vs {x}")
line.yaxis.axis_label = y_line

# create line renderer
line.line(x=x, y=y_line, line_width=2, source=df)
line.circle(x=x, y=y_line, source=df)

line.grid.visible = False
line.xaxis.visible = False

# Format the tooltip
tooltips = [(y_line, f"@{y_line}"), (x, f"@{x}")]

# Configure a renderer to be used upon hover
hover_glyph = line.circle(x=x, y=y_line, source=df, size=15, alpha=0, hover_fill_color='black', hover_alpha=0.5)

# Add the HoverTool to the figure
line.add_tools(HoverTool(tooltips=tooltips, renderers=[hover_glyph]))

bar = figure(plot_width=1000, plot_height=400, title=f"{y_bar} vs {x}", x_range=line.x_range)

bar.vbar(x=x, top=y, source=df)

# Format the tooltip
tooltips = [(y_bar, f"@{y_bar}"), (x, f"@{x}")]

# Configure a renderer to be used upon hover
hover_glyph = bar.circle(x=x, y=y_bar, source=df, size=15, alpha=0, hover_fill_color='black', hover_alpha=0.5)

# Add the HoverTool to the figure
bar.add_tools(HoverTool(tooltips=tooltips, renderers=[hover_glyph]))

# 
bar.yaxis.axis_label = y_bar
bar.xaxis.axis_label = x
bar.grid.visible = False


show(gridplot([[line],[bar]]))

# Interactive Line Plot

In [17]:
# src: https://stackoverflow.com/questions/61792141/select-and-update-pandas-dataframe-columns-in-bokeh-plot
# https://stackoverflow.com/questions/64103793/how-to-update-axis-labels-from-a-callback-with-bokeh


#!TODO: fix issue with hover (https://stackoverflow.com/questions/56518254/trigger-display-of-the-hovertool-tooltips-via-customjs-in-bokeh)

from bokeh.layouts import column
from bokeh.models import ColumnDataSource, CustomJS, Select, TextInput

output_notebook()

ds = ColumnDataSource(data)

x = "datetime_0"
y = "mean_bool_0"

# create a plot and style its properties
line = figure(x_axis_type="datetime", plot_width=1000, plot_height=400, title=f"{y} vs {x}", toolbar_location="above")
line.yaxis.axis_label = y
line.xaxis.axis_label = x

# Format the tooltip
tooltips = [(y, f"@{y}"), (x, f"@{x}")]

# Add the HoverTool to the figure
hover = HoverTool(tooltips=tooltips)

line.add_tools(HoverTool(tooltips=tooltips))

# create line renderer
line_renderer = line.line(x=x, y=y, line_width=2, source=ds)
circle_renderer = line.circle(x=x, y=y, source=ds)

line.grid.grid_line_alpha=0.3

select = Select(title="Column", options=list(data.columns.drop(x)))
handler = CustomJS(args=dict(line_renderer=line_renderer
                            ,circle_renderer=circle_renderer
                            ,plot=line
                            ,x=x
                            ,y=select
                            )
                    ,code="""
                        line_renderer.glyph.y = {field: cb_obj.value};
                        circle_renderer.glyph.y = {field: cb_obj.value};
                        var y = y.value;
                        plot.title.text = y + " vs " + x;
                        """
                    )

select.js_on_change('value', handler)

show(column(select, line, id="myplot"))

# Test Zone

In [192]:
output_notebook()

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Slider, HoverTool, CustomJS, ColumnDataSource
from bokeh.layouts import column
output_notebook()

N = 100
x = np.random.random(size=N) * 100
y = np.random.random(size=N) * 100

radii = np.random.random(size=N) * 1.5
colors = [
    "#%02x%02x%02x" % (int(r), int(g), 150) for r, g in zip(50+2*x, 30+2*y)
]

TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

p = figure(tools=TOOLS)

source = ColumnDataSource(dict(x=x, y=y, radius=radii, colors=colors))

renderer = p.scatter("x", "y", radius="radius",
          fill_color="colors", fill_alpha=0.6,
          line_color=None, source=source)

slider = Slider(start=0, end=len(x), value=0, step=1)
code = """
let ind = slider.value;
let x = source.data.x[ind];
let y = source.data.y[ind];
let fig_view = Bokeh.index["myplot"].child_views[1];
let hover_view = fig_view.tool_views[hovertool.id];
let renderer_view = fig_view.renderer_views[renderer.id];
let xs = renderer_view.xscale.compute(x);
let ys = renderer_view.yscale.compute(y);
hover_view._inspect(xs, ys);
"""

callback = CustomJS(args=dict(
    fig=p,
    slider=slider, 
    hovertool=p.select_one(HoverTool),
    source=source,
    renderer=renderer
    ), code=code)
slider.js_on_change('value', callback)

show(column(slider, p, id="myplot"))