In [1]:
from bokeh.plotting import figure
from bokeh.io import output_notebook, output_file, show
import pandas as pd
from bokeh.models.sources import ColumnDataSource
from datetime import datetime as dt

output_notebook()

#### Corn prices dataset.
Download the dataset here: https://www.kaggle.com/nickwong64/corn2015-2017#corn2015-2017.txt

This lists the price of one bushel of corn in the United States between 2015 and 2017. 

In [2]:
corn = pd.read_csv('datasets/corn_prices.csv')
corn.head()

Unnamed: 0,Date,Price
0,2015-01-04,5.21365
1,2015-01-11,5.1813
2,2015-01-18,5.00922
3,2015-01-25,5.006
4,2015-02-01,4.8708


#### Convert the date to datetime format
This will be required for us to be able to use the dates in a time series plot. If we don't do this, the dates will be treated as strings which will be interpreted as categorical values

In [3]:
corn['FormattedDate'] = pd.to_datetime(corn['Date'])
corn.head()

Unnamed: 0,Date,Price,FormattedDate
0,2015-01-04,5.21365,2015-01-04
1,2015-01-11,5.1813,2015-01-11
2,2015-01-18,5.00922,2015-01-18
3,2015-01-25,5.006,2015-01-25
4,2015-02-01,4.8708,2015-02-01


#### Examine the data
We are interested in knowing the range of data. Among other things, it will help us define the range for the axes

In [4]:
corn.describe(include='all')

Unnamed: 0,Date,Price,FormattedDate
count,144,144.0,144
unique,144,,144
top,2015-06-21,,2015-07-19 00:00:00
freq,1,,1
first,,,2015-01-04 00:00:00
last,,,2017-10-01 00:00:00
mean,,4.260489,
std,,0.437833,
min,,3.507,
25%,,3.895335,


#### Create a ColumnDataSource from the data
This will make it easier for us to use the data in our plot

In [5]:
data_source = ColumnDataSource(corn)

#### Define a tooltip for the plot
We would like the plot to display the Date and Price when we hover over a particular point on the plot. For this, we define a tooltip which comprises a list of tuples. 

Each tuple contains a title for the field followed by a reference to a field in the data source. Note that we are referencing the 'Date' field in the data source rather than 'FormattedDate'. This is because the formatted date will not be in human readable form.

In [6]:
tooltips = [('Date', '@Date'),
            ('Price', '@Price')
           ]

#### Get the indexes for the maximum and minimum prices in the series
We will mark the maximum and minimum prices on our plot

In [7]:
max_index = corn['Price'].idxmax() 
min_index = corn['Price'].idxmin() 

print(max_index)
print(min_index)

28
142


#### Define the figure
* we specify the <b>X axis range</b> in datetime format. We leave some extra space on the right of our plot
* the <b>Y range</b> is set by our knowledge of the range of corn prices in our dataset
* for Bokeh to know that the X axis values are in datetime format, we set <b>x_axis_type</b> to be 'datetime'
* the <b>tooltips</b> we defined are added to the plot
* a <b>title</b> is set for our plot

In [8]:
p = figure(plot_width = 600, 
           plot_height = 300,
           
           x_range = (dt(2015,1,1), 
                      dt(2017,11,30)), 
           y_range = (3, 6),
           
           x_axis_type = 'datetime',
           
           tooltips = tooltips,
           
           title = 'Corn Prices')

#### Draw a line tracing the corn prices
* the X values are obtained from the FormattedDate field which is in datetime format
* the corn prices are represented by the Y axis
* we format the line by setting its width and color
* the source for the data is the ColumnDataSource we created

Hover over different points in the line to view the hovertext we have set with our tooltip

In [9]:
p.line(x = 'FormattedDate', 
       y = 'Price', 
       
       line_width=2,
       color='#FFD700',
       
       source = data_source
      )

show(p)

#### Get the dates when the highest and lowest prices were reached
We get these in date format (as opposed to datetime) which we will use as the x values for all annotation objects

In [10]:
maxpricedate = dt.date(corn['FormattedDate'].loc[max_index])
minpricedate = dt.date(corn['FormattedDate'].loc[min_index])

print('Max price date: ', maxpricedate)
print('Min price date: ', minpricedate)

Max price date:  2015-07-19
Min price date:  2017-09-24


#### Get the values of the maximum and minimum price
These will be the Y values for our annotation objects

In [11]:
maxprice = corn['Price'].loc[max_index]
minprice = corn['Price'].loc[min_index]

print('Max price: ', maxprice)
print('Min price: ', minprice)

Max price:  5.29672
Min price:  3.507


#### Mark the maximum and minimum price points with circles

In [12]:
p.circle(x = [maxpricedate, minpricedate], 
         y = [maxprice, minprice],
         
         size = 10
        )

show(p)

### Labels
We can use labels in order to add text annotation to our plot. Since we have more than one label, we will use a LabelSet object

In [13]:
from bokeh.models import LabelSet

#### Define a dictionary for our labels
This contains:
* the X and Y coordinates of the labels
* the text to display in the labels

In [14]:
data=dict(x=[maxpricedate, minpricedate],
          y=[maxprice, minprice],
          text=['Max','Min'])
data

{'x': [datetime.date(2015, 7, 19), datetime.date(2017, 9, 24)],
 'y': [5.29672, 3.507],
 'text': ['Max', 'Min']}

#### Create a ColumnDataSource from the dictionary

In [15]:
label_source = ColumnDataSource(data)

#### Define a LabelSet with our label data source
We specify the fields in our data source from which the X and Y coordinates and the text will come from. 

The offset fields are needed to ensure that the labels do not overlap with the circle markers we have set previously. The offset values are in screen space units. Positive values mean that the labels will appear towards the right (due to x_offset) and top of the corresponding X and Y coordinates.

In [16]:
labels = LabelSet(x='x', 
                  y='y', 
                  text='text',
                  
                  x_offset=5, 
                  y_offset=5, 
                  
                  source=label_source
                                  
                 )

#### Add the labels to the figure's layout

In [17]:
p.add_layout(labels)

show(p)

### Span objects
We can have lines span the entire width or height of the plot using the Span object. We will use this to represent the mean price in the duration of our dataset

In [18]:
import numpy as np
from bokeh.models import Span

#### Calculate the mean price

In [19]:
price_mean = np.mean(corn['Price'])
price_mean

4.260488611111111

#### Draw a horizontal line using the Span object
The location along with the dimension sets the X or Y value of the line which will span the plot. Here, we draw a horizontal line by specifying the dimension to be 'width' and the Y value will correspond to the value of the location property.

Bokeh also supports a Slope object where we can define the sloped line rather than a simple horizontal or vertical line which can be created with Span.

We can format this line as we do with any other line. 

A Span object is added to a figure by using the renderers.extend() function

In [20]:
hline = Span(location=price_mean, 
             dimension='width', 
             
             line_color='green', 
             line_width=1,
             line_dash='dashed')

p.renderers.extend([hline])

In [21]:
show(p)

### Box Annotations
These are very useful to highlight a range of values within one's plot. In our plot, we use this to mark the range within one standard deviation of the mean

In [22]:
from bokeh.models import BoxAnnotation

#### Calculate the standard deviation

In [23]:
price_stdev = np.std(corn['Price'])
price_stdev

0.43631035086709263

#### Define a box annotation to highlight the range
We set the box attributes to the range which we wish to highlight. Though we have only set the top and bottom, we can also define the left and right of the range if needed.

In [24]:
box = BoxAnnotation(bottom = price_mean - price_stdev, 
                    top = price_mean + price_stdev, 
                    
                    fill_alpha=0.1, 
                    fill_color='green')

p.add_layout(box)

In [25]:
show(p)