# Basics Plotting with Bokeh

## Plotting with Glyphs

In [1]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()
plot = figure(plot_width=400, plot_height=400, tools ="pan,box_zoom")
plot.circle([1,2,3,4,5], [8,6,5,3,2], size = [10,20,30,40,50])
show(plot)

## A simple scatter plot
Create a figure, assign x-axis and y-axis labels, and plot female_literacy vs fertility using the circle glyph

In [2]:
# load the data
import pandas as pd
df = pd.read_csv("data/literacy_birth_rate.csv")
fertility = df["fertility"]
female_literacy = df["female literacy"]

In [3]:
# import bokeh
from bokeh.plotting import figure, output_notebook
from bokeh.io import show
output_notebook()
p = figure(x_axis_label = "fertility", y_axis_label = "female literacy")
p.circle(fertility, female_literacy)
show(p)

## A Scatter Plot with different shapes
- By calling multiple glyph functions on the same figure object, we can overlay multiple data sets in the same figure.
- plot female literacy vs fertility for two different regions, Africa and Latin America
- plot the Latin America data with the circle() glyph, and the Africa data with the x() glyph.

In [4]:
# Prep data
fertility_africa = df.loc[df.Continent == "AF"]["fertility"]
fertility_latam = df.loc[df.Continent == "LAT"]["fertility"]
literacy_africa = df.loc[df.Continent == "AF"]["female literacy"]
literacy_latam = df.loc[df.Continent == "LAT"]["female literacy"]

In [5]:
p = figure(x_axis_label = "Fertility", y_axis_label = "Female Literacy")
p.circle(fertility_africa, literacy_africa)
p.x(fertility_latam, literacy_latam, color="red")
show(p)

## Customizing the scatter plots
The three most important arguments to customize scatter glyphs are color, size, and alpha. 
- Bokeh accepts colors as hexadecimal strings, tuples of RGB values between 0 and 255, and any of the 147 CSS color names. 
- Size values are supplied in screen space units with 100 meaning the size of the entire figure.
- The alpha parameter controls transparency. It takes in floating point numbers between 0.0, meaning completely transparent, and 1.0, meaning completely opaque.

In [6]:
p = figure(x_axis_label = "fertility", y_axis_label="female literacy")
p.circle(fertility_africa, literacy_africa, color="blue", size=10, alpha=0.5)
p.circle(fertility_latam, literacy_latam, color="red", size=10, alpha=0.5)
show(p)

## Additional Glyphs
- Line
- Line and marker together

In [7]:
import numpy as np
x = np.random.randn(10)
y = np.random.randn(10)
plot = figure(plot_width=500, plot_height=400)
plot.line(x,y,line_width=1)
plot.circle(x,y, size =5)
show(plot)

- Patches
  - Useful for showing geographic regions
  - Data given as "list of lists"

In [8]:
xs = [np.random.rand(5),np.random.rand(5),np.random.rand(5),np.random.rand(5)]
ys = [np.random.rand(5),np.random.rand(5),np.random.rand(5),np.random.rand(5)]
plot = figure()
plot.patches(xs, ys, fill_color=["red", "blue", "green", "orange"], alpha=0.6)
show(plot)

In [9]:
p = figure(plot_width=600, plot_height=400, x_axis_label="buckets", y_axis_label="Savings in USD")

base = 1500000
threshold = 500000
best = 800000

adjusted_savings = base + threshold - best
p.quad(top=[base, base+threshold, base+threshold, adjusted_savings], bottom=[0, base, adjusted_savings, 0], left=[1, 2, 3, 4],
       right=[1.5, 2.5, 3.5, 4.5], color=["#2ca25f", "#2ca25f", "#dd1c77", "#3182bd"], alpha=0.8)
show(p)

## Lines
- can draw lines on Bokeh plots with the line() glyph function
- plot the daily adjusted closing price of Apple Inc.'s stock (AAPL) from 2000 to 2013
  - The data points are provided for you as lists. date is a list of datetime objects to plot on the x-axis and price is a list of prices to plot on the y-axis.
  - Since we are plotting dates on the x-axis, you must add x_axis_type='datetime' when creating the figure object.

In [10]:
import pandas as pd
from bokeh.plotting import figure

aapl = pd.read_csv("data/aapl.csv")
price_close = aapl.close
date = pd.to_datetime(aapl.date)
p = figure(x_axis_type="datetime", x_axis_label="Date", y_axis_label="US Dollars")
p.line(date, price_close)
#p.circle(date, price_close, fill_color="white", size=4)
show(p)

## Data Format: Numpy

In [11]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
import numpy as np
output_notebook()
x = np.linspace(0,10,1000)
y = np.sin(x) + np.random.random(1000) * 0.1
plot = figure(plot_height=400)
plot.line(x,y)
show(plot)

In [12]:
import numpy as np
x = np.linspace(0, 5, 100)
y = np.cos(x)
p=figure(plot_height=300)
p.circle(x,y)
show(p)

## Data Formats: Pandas

In [13]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.sampledata.iris import flowers
output_notebook()
plot = figure(plot_height=400)
plot.circle(flowers.sepal_width, flowers.sepal_length, size=10, fill_alpha=0.5)
show(plot)

In [14]:
import pandas as pd
df = pd.read_csv("data/auto-mpg.csv")
from bokeh.plotting import figure
p = figure(x_axis_label = "HP", y_axis_label="MPG", plot_height=400, plot_width=600)
p.circle(df.hp, df.mpg, color=df.color, size=10, fill_alpha=0.5)
show(p)

## Data Formats: Column Data Source
- Common fundamental data structure for Bokeh
- Maps string column names to sequences of data
- Often created automatically
- Can be shared between glyphs to link selections
- Extra columns can be used with hover tooltips
- **All the columns in the CDS must have the same lenght**

In [15]:
df = pd.read_csv("data/sprint.csv")
df.head()

Unnamed: 0,Name,Country,Medal,Time,Year,color
0,Usain Bolt,JAM,GOLD,9.63,2012,goldenrod
1,Yohan Blake,JAM,SILVER,9.75,2012,silver
2,Justin Gatlin,USA,BRONZE,9.79,2012,saddlebrown
3,Usain Bolt,JAM,GOLD,9.69,2008,goldenrod
4,Richard Thompson,TRI,SILVER,9.89,2008,silver


In [16]:
from bokeh.models import ColumnDataSource
source = ColumnDataSource(df)
p = figure(plot_height=400)
p.circle(x="Year", y="Time", source=source, color="color", size=8)
show(p)

In [17]:
p_select = figure(x_axis_label="Year", y_axis_label="Time", tools=["box_select","reset"])
p_select.circle(x="Year", y="Time", source=source, selection_color="red", nonselection_alpha=0.1)
show(p_select)

## Hover glyphs
- plot the blood glucose levels for an unknown patient. The blood glucose levels were recorded every 5 minutes on October 7th starting at 3 minutes past midnight

In [18]:
df = pd.read_csv("data/glucose.csv")
df.head()
x = pd.to_datetime(df.datetime)
y = df.glucose

In [19]:
from bokeh.models import HoverTool
p = figure(plot_height=400,x_axis_type="datetime")
p.circle(x, y, size=10, fill_color="grey", 
         alpha=0.1, line_color=None, hover_fill_color="firebrick",
        hover_alpha=0.5, hover_line_color="white")
hover = HoverTool(tooltips=None, mode="vline")
p.add_tools(hover)
show(p)

## Colormapping
- using the CategoricalColorMapper to color each glyph by a categorical property.

In [20]:
df = pd.read_csv("data/auto-mpg.csv")
df.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name,color,size
0,18.0,6,250.0,88,3139,14.5,71,US,ford mustang,blue,15.0
1,9.0,8,304.0,193,4732,18.5,70,US,hi 1200d,blue,20.0
2,36.1,4,91.0,60,1800,16.4,78,Asia,honda civic cvcc,red,10.0
3,18.5,6,250.0,98,3525,19.0,77,US,ford granada,blue,15.0
4,34.3,4,97.0,78,2188,15.8,80,Europe,audi 4000,green,10.0


In [21]:
from bokeh.models import CategoricalColorMapper, ColumnDataSource
source = ColumnDataSource(df)
color_mapper = CategoricalColorMapper(factors=["Europe", "Asia", "US"], palette=["red", "green","blue"])
p = figure(plot_height=400)
p.circle(x="weight", y="mpg", source=source, legend="origin", color=dict(field="origin", transform=color_mapper))
show(p)