In [1]:
import numpy as np
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
import bokeh

In [2]:
output_notebook()

# Bokeh Notes

For a good tutorial see /projects/cloned/bokeh-notebooks

## Styles

### Colors
Color reference: https://bokeh.pydata.org/en/latest/docs/reference/colors.html

I can use any of the named colors as an argument to functions that accepts colors.

### Labels
Title for the figure is an argument to the `figure` function.
For setting the x- and y-axis labels use `Figure.xaxis.axis_label` and `Figure.yaxis.axis_label`.

### Grids
By default bokeh charts will always show a grid. To not show the grid use `Figure.xgrid.grid_line_color=None`. I can also change the alpha of the line as `Figure.ygrid.grid_line_alpha=0.8`.

## Scatter Plots

In [3]:
x = np.arange(1, 11)
y = np.random.randint(5, 50, x.shape[0]) + np.random.random(x.shape[0])

In [4]:
print(list(zip(x, y)))

[(1, 28.794710363143633), (2, 19.035379920812854), (3, 28.961559277462296), (4, 22.51917672383398), (5, 49.896263877638816), (6, 24.711822940233333), (7, 8.289340374679252), (8, 40.95105518417206), (9, 27.818875019751108), (10, 20.79433792342964)]


In [5]:
p = figure(plot_width=400, plot_height=400)
p.circle(x, y, size=15, line_color="navy", fill_color="orange", fill_alpha=0.5)
show(p)

In the above, `size` parameter is in pixels. I can also use the `radius` parameter, but these will be specified in data space units. This is a bit unintuitive in case of x- and y-axis having very different units, which one will it take? So best to use the `size` param.

I can also create Bubble Charts by varying the `size` parameter.

In [6]:
z = np.linspace(1000, 2000, 10)
np.random.shuffle(z)
sizes = z//100
sizes

array([11., 14., 18., 17., 13., 20., 12., 15., 10., 16.])

In [7]:
p = figure()
p.circle(x, y, size=sizes)
show(p)

In addition to circles bokeh has the following marker types:

  * asterisk
  * circle_cross
  * circle_x
  * cross
  * diamond
  * diamond_cross
  * hex
  * interverted_triangle
  * square
  * square_cross
  * square_x
  * triangle
  * x

## Line Plots

In [8]:
p = figure(plot_width=400, plot_height=400)
p.line(x, y)
show(p)

## Timeseries

In [10]:
from bokeh.sampledata.glucose import data
data.head()

Unnamed: 0_level_0,isig,glucose
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-03-24 09:51:00,22.59,258
2010-03-24 09:56:00,22.52,260
2010-03-24 10:01:00,22.23,258
2010-03-24 10:06:00,21.56,254
2010-03-24 10:11:00,20.79,246


In [11]:
data.describe()

Unnamed: 0,glucose
count,52281.0
mean,137.421912
std,53.880476
min,40.0
25%,99.0
50%,126.0
75%,163.0
max,400.0


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 52281 entries, 2010-03-24 09:51:00 to 2010-10-10 23:57:00
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   isig     52281 non-null  object
 1   glucose  52281 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.2+ MB


In [13]:
week = data["2010-10-01": "2010-10-08"]
week.shape

(2217, 2)

In [14]:
p = figure(title="Glucose Range", x_axis_type="datetime", plot_height=350, plot_width=800)
p.xgrid.grid_line_color=None
p.ygrid.grid_line_alpha=0.5
p.xaxis.axis_label = "Time"
p.yaxis.axis_label = "Value"
p.line(week.index, week.glucose)
show(p)

In [15]:
from bokeh.sampledata.stocks import AAPL

# This dataset is already in "columnar" format.
print(type(AAPL))
print(AAPL.keys())
print(len(AAPL["date"]))

<class 'dict'>
dict_keys(['date', 'open', 'high', 'low', 'close', 'volume', 'adj_close'])
3270


In [16]:
dates = np.array(AAPL["date"], dtype=np.datetime64)
p = figure(x_axis_type="datetime", title="AAPL")
p.line(dates, AAPL["open"])
show(p)

## Segment

In [17]:
p = figure(plot_width=400, plot_height=400)
start_points = np.array([[1, 1],
                         [2, 2],
                         [3, 3]])
end_points = np.array([[1.2, 1.2],
                       [2.4, 2.5],
                       [3.1, 3.7]])
p.segment(
    x0=start_points[:,0], 
    y0=start_points[:,1], 
    x1=end_points[:,0], 
    y1=end_points[:,1])
show(p)

## Rays

In [18]:
start_points = np.array([[1, 1],
                         [2, 2],
                         [3, 3]])
p = figure(plot_width=400, plot_height=400)
p.ray(
    x=start_points[:,0], 
    y=start_points[:,1], 
    length=None, 
    angle=[np.pi/6, np.pi/4, np.pi/3]
)
show(p)

## Multiple Plots

In [19]:
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]
p = figure(plot_width=400, plot_height=400)
p.line(x, y, line_width=2)
p.circle(x, y, fill_color="white", size=8)
show(p)

## Categorical Charts

In [20]:
nodes = ["master", "worker1", "worker2"]
cpus = [0.1, 0.6, 0.4]
p = figure(x_range=nodes, plot_height=250, title="CPU Utilization")
# p.vbar(x=nodes, top=cpus, width=0.9)
p.line(x=nodes, y=cpus)
p.xgrid.grid_line_color = None
p.y_range.start = 0
show(p)

In [21]:
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6

fruits = ["Apples", "Pears", "Nectarines", "Plums", "Grapes", "Strawberries"]
counts = [5, 3, 4, 2, 4, 6]
data = {
    "fruits": fruits,
    "counts": counts,
    "color": Spectral6
}
source = ColumnDataSource(data=data)

p = figure(x_range=fruits, plot_height=250, y_range=(0, 9), title="Fruit Counts")
p.vbar(source=source, x="fruits", top="counts", width=0.9, color="color", legend="fruits")

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

