In [25]:
import numpy as np
import pandas as pd

from bokeh.plotting import ColumnDataSource


# Import the necessary datasets
building_data = pd.read_csv('../datasets/Building_Energy_and_Water_Use_Metrics.csv', index_col=0)

building_cds = ColumnDataSource(building_data)
building_data.head(2)

Unnamed: 0,Property_Name,Address,ZIP,Property_Type,Gross_Sq_Ft,Property_Uses,Site_EUI,EnergyStar_Score,EnergyStar_Certified,Year_Built,GHG_Emissions,GHG_Intensity,Site_Energy_Use,Percent_Electr,Percent_Gas,Percent_Steam,built_before
0,50 West Broadway,50 West Broadway,2127,Multifamily Housing,250755.0,"Multifamily Housing, Parking",39.3,97,,2008,585.0,2.9,"* 7,898,243",48%,52%,0%,Built after 1950
1,Boston Trinity Academy,17 Hale Street,2136,K-12 School,53000.0,K-12 School,50.4,98,,1956,142.4,2.7,"* 2,673,498",0%,100%,0%,Built after 1950


## Basic Plot with no Data
* Review:
    * figure()
    * output_notebook() or output_file()
    * show()

In [26]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

p = figure(plot_width=300, plot_height=300)
output_notebook()
show(p)

<img src="images/blank-figure-small.png">

## Plotting Using NumPy Arrays
* NumPy - Scientific Computing Library
* ndarray - multidimensional array object
* Can pass ndarray object to Bokeh

In [27]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

p = figure(plot_width=300, plot_height=300)

# x and y are numpy arrays
x = np.linspace(0, 10, 101)
y = np.exp(x)

# circle glyph takes the numpy array outputs
p.circle(x, y)

output_notebook()
show(p)

print(type(x))
print(type(y))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


<img src="images/numpy.png">

## Using Pandas DataFrames
* pandas - Python package for relational data
* Built on top of the NumPy library.
* DataFrame - Pandas' primary data structure
    * analgous in appearance to excel workbook or R data frame

In [28]:
from bokeh.plotting import figure
from bokeh.io import output_notebook, show

ghg = building_data['GHG_Emissions']  # x-values
sqft = building_data['Gross_Sq_Ft']  # y-values

# Set up the figure
p = figure(plot_width=500,
           plot_height=300,
           x_axis_label='Greenhouse Gas Emissions',
           y_axis_label='Gross Square Feet')

p.circle(ghg, sqft)

output_notebook()
show(p)

print(type(ghg))
print(type(sqft))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>


<img src="images/pandas.png">

## Bokeh's ColumnDataSource
* In the background bokeh is transforming these data formats into the main data format for bokeh - **`ColumnDataSource`**
* **`ColumnDataSource`** is the main data structure in bokeh.
* **`ColumnDataSource`** has a data attribute that matches a string name to a sequence of data.
    * In the case of the pandas DataFrame the string name is the column name and the sequence of data is the values from the column

## Equivalent DataFrame and ColumnDataSource for comparison

In [29]:
table = pd.DataFrame(data=[['Greg', 2, 68], ['Tim', 4, 70]],
                     columns=['name', 'number', 'height'])

print(table)

   name  number  height
0  Greg       2      68
1   Tim       4      70


In [30]:
table = ColumnDataSource(data={
    'name': ['Greg', 'Tim'],
    'number': [2, 4],
    'height': [68, 70],
})

table.data

{'height': [68, 70], 'name': ['Greg', 'Tim'], 'number': [2, 4]}

* Benefits of the **`ColumnDataSource`**:
    * Can be used to link selections between plots
    * Can be used to create extra hover tooltips

## Transform a Pandas DataFrame to a ColumnDataSource

In [31]:
# pass the pandas DataFrame building_data to ColumnDataSource function
building_cds = ColumnDataSource(building_data)

building_cds.data.keys() # the keys are the column headers from the DataFrame

dict_keys(['Property_Name', 'Address', 'ZIP', 'Property_Type', 'Gross_Sq_Ft', 'Property_Uses', 'Site_EUI', 'EnergyStar_Score', 'EnergyStar_Certified', 'Year_Built', 'GHG_Emissions', 'GHG_Intensity', 'Site_Energy_Use', 'Percent_Electr', 'Percent_Gas', 'Percent_Steam', 'built_before', 'index'])

## Plotting with the ColumnDataSource
* pass the dictionary keys as inputs to the x and y parameters of the circle glyph
* **`GHG_Emissions`** and **`Gross_Sq_Ft`** are keys from the ColumnDataSource
* set the source equal to the ColumnDataSource object **`building_cds`**
* NOTE you are pulling from the ColumnDataSource object and NOT the pandas DataFrame

In [32]:
from bokeh.io import output_notebook, show
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure

building_cds = ColumnDataSource(building_data)

# Set up the figure
p = figure(plot_width=500,
           plot_height=300,
           x_axis_label='Greenhouse Gas Emissions',
           y_axis_label='Gross Square Feet')

p.circle('GHG_Emissions', 'Gross_Sq_Ft', source=building_cds)

output_notebook()
show(p)

print(type(building_cds))

<class 'bokeh.models.sources.ColumnDataSource'>


<img src="images/cds.png">

## Color Mapping
* You can color points based on categorical values
* **`from bokeh.models import CategoricalColorMapper`**
    * `CategoricalColorMapper` inputs:
        * factors
        * palette
* to the glyph property you have to pass a dictionary
    * field - which is the name of the column to map
    * transform - the color map for that value.

In [45]:
from bokeh.io import output_notebook, show
from bokeh.models import ColumnDataSource, CategoricalColorMapper
from bokeh.plotting import figure

building_cds = ColumnDataSource(building_data)

# Set up the figure
p_basic = figure(plot_width=500,
           plot_height=300,
           x_axis_label='Greenhouse Gas Emissions',
           y_axis_label='Gross Square Feet')

# Create the CategoricalColorMapper object
color_mapper = CategoricalColorMapper(factors=['Built after 1950', 'Built before 1950'],
                                      palette=['red', 'blue'])

p_basic.circle(x='GHG_Emissions', 
         y='Gross_Sq_Ft', 
         source=building_cds, 
         color={'field':'built_before', 'transform':color_mapper},
         legend='built_before')

output_notebook()
show(p_basic)

print(type(building_cds))

<class 'bokeh.models.sources.ColumnDataSource'>


<img src="images/cmap.png">

## Using an imported color palette
* bokeh includes a number of useful color palettes for import
* **`from bokeh.palettes import Colorblind`**
* specify the number of colors you want from the palette

In [34]:
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.models import CategoricalColorMapper
from bokeh.palettes import Colorblind, viridis

# Set up the figure
p_cat = figure(plot_width=500,
           plot_height=300,
           x_axis_label='Greenhouse Gas Emissions',
           y_axis_label='Gross Square Feet')

# create an list of unique values
built_before_list = list(building_data['built_before'].unique())

# Create the CategoricalColorMapper object
color_mapper = CategoricalColorMapper(factors=built_before_list,
                                      palette=Colorblind[3])

p_cat.circle(x='GHG_Emissions',
         y='Gross_Sq_Ft',
         source=building_cds,
         color={'field':'built_before', 'transform':color_mapper},
         legend='built_before')

output_notebook()
show(p_cat)

print(type(building_cds))

<class 'bokeh.models.sources.ColumnDataSource'>


<img src="images/cmap2.png">

### Larger Palettes
The bokeh.palettes module also has some larger palettes with 256 colors. 
* The large palettes available are shown below:

<img src="images/large-palettes.png">

In [36]:
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.models import CategoricalColorMapper
from bokeh.palettes import viridis

# Set up the figure
p_scale = figure(plot_width=500,
           plot_height=300,
           x_axis_label='Greenhouse Gas Emissions',
           y_axis_label='Gross Square Feet')

# create an list of unique values
year_built_list = sorted(list(building_cds.data['Year_Built'].unique()))

# Create the CategoricalColorMapper object
color_mapper = CategoricalColorMapper(factors=year_built_list,
                                      palette=viridis(len(year_built_list)))  # used a new color palette

p_scale.circle(x='GHG_Emissions',
         y='Gross_Sq_Ft',
         source=building_cds,
         color={'field':'Year_Built', 'transform':color_mapper},
         # legend='Year_Built'
        )

output_notebook()
show(p_scale)

print(type(building_cds))

<class 'bokeh.models.sources.ColumnDataSource'>


<img src="images/cmap3.png">

## Bokeh Layouts
* row method - aligns plots & menu objects in rows
* column method - aligns plots & menu objects in columns

## Row Layout

In [37]:
from bokeh.layouts import row, column

layout = row(p_basic, p_cat, p_scale)

show(layout)

## Column Layout

In [38]:
from bokeh.layouts import row, column

layout = column(p_basic, p_cat, p_scale)

show(layout)

## Combination Layout

In [39]:
from bokeh.layouts import row, column

layout = column(row(p_basic, p_cat), p_scale)

show(layout)

## Creating grid plots
* Benefit is that you have one toolbar for all the plots

In [44]:
from bokeh.layouts import gridplot

grid_layout = gridplot([[p_basic, p_cat], [p_scale, None]])

output_notebook()
#show(grid_layout)

<img src="images/combo.png">

## Changing the Toolbar Location
* use **`toolbar_location`** to modify

In [42]:
from bokeh.layouts import gridplot

grid_layout = gridplot(
    children=[[p_basic, p_cat], [p_scale, None]],
    sizing_mode='scale_width',
    toolbar_location='left')

output_notebook()
#show(grid_layout)

<img src="images/tbar-layout.png">