*data and tutorial provided by [Cloud Academy](https://github.com/cloudacademy/interactive-data-visualization-with-bokeh)*

# $Data$ $Visualization$ $with$ $Bokeh$

## Setup

*setup of imports, functions, and transformations for all superceeding exercises*

In [23]:
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Panel, Tabs
from bokeh.palettes import Colorblind3, Category20_5
from bokeh.layouts import row, column, gridplot
from bokeh.sampledata.autompg import autompg_clean as cars
from bokeh.transform import factor_cmap

In [3]:
apple = pd.read_csv('./data/AAPL.csv')
fb = pd.read_csv('./data/FB.csv')
google = pd.read_csv('./data/GOOGL.csv')
stocks = {'Apple (AAPL)': apple, 'Facebook (FB)': fb, 'Google (GOOGL)': google}

output_notebook()

In [4]:
def daily_change(row):
    return row.pct_change()

def creating_figure(data_src, title, colour, plot_width=500, plot_height=300):
    p = figure(
        plot_width = plot_width,
        plot_height=plot_height,
        x_axis_type='datetime',
        x_axis_label='Date',
        y_axis_label='Closing Price',
        title=title,
        toolbar_location=None
    )
    
    p.title.text_font_size='15pt'
    p.title.align='center'
    p.axis.axis_label_text_font_size='12pt'
    p.line('Date', 'Close', source=data_src, color=colour)
    return p

def compare_returns(src1, src2, title):
    one = src1.data['PDC']
    two = src2.data['PDC']
    
    p = figure(
        plot_width=1000,
        plot_height=500,
        title=title,
        toolbar_location=None
    )
    
    p.title.text_font_size='15pt'
    p.title.align='center'
    p.axis.axis_label_text_font_size='12pt'
    p.circle(one, two, color='rgb(150, 0, 150)')
    return p

def plot_returns(src, title, colour):
    p = figure(
        plot_width=500,
        plot_height=300,
        x_axis_type='datetime',
        toolbar_location=None,
        title=title
    )
    
    p.title.text_font_size='15pt'
    p.title.align='center'
    p.axis.axis_label_text_font_size='12pt'
    
    p.circle('Date', 'PDC', source=src, line_width=3, color=colour)
    return p

In [5]:
for key, value in stocks.items():
    value['Date'] = value[['Date']].apply(lambda x: pd.to_datetime(x))
    value['PDC'] = value[['Close']].apply(lambda x: daily_change(x))
    value['Diff'] = abs(value['Close'].diff())

for name, data, colour in zip(stocks.keys(), stocks.values(), Colorblind3):
    data['Date'] = data[['Date']].apply(lambda x: pd.to_datetime(x))
    
cars_origin = cars[['origin', 'name']].groupby('origin').count()
cars_origin.rename(columns={'name': 'n'}, inplace=True)
cars_origin.reset_index(inplace=True)

apple_src = ColumnDataSource(apple)
fb_src = ColumnDataSource(fb)
google_src = ColumnDataSource(google)
cars_src = ColumnDataSource(cars_origin)

## Part One: Plotting with Glyphs

* Create a plot of the FB.csv with Date as the x-axis and Close as the y-axis using ColumnDataSource
* Create a scatter plot of the relationship between daily closing price and volume for the fb dataset
* Create a bar plot of the relationship between date and volumne for the fb dataset

In [6]:
fb_plot = figure(x_axis_type='datetime')
fb_plot.line('Date', 'Close', source=fb_src)
show(fb_plot)

In [7]:
fb_scatterplot = figure(x_axis_type='datetime')
fb_scatterplot.circle('Volume', 'Diff', source=fb_src)
show(fb_scatterplot)

In [8]:
fb_barplot = figure(x_axis_type='datetime')
fb_barplot.vbar(x='Date', top='Volume', source=fb_src)
show(fb_barplot)

## Part Two: Customising Plots

* Change the figure width and height for fb_plot and add a title
* Add labels for the x and y axis
* Use the circle function to add markers to fb_plot
* Create a new plot showing fb, apple, and google stock closing price by date
* Add a legend to the plot
* Amend the plot to show daily change as the y-axis
* Finally, make the plot dynamic using the muted color parameters

In [9]:
fb_plot = figure(
    x_axis_type='datetime',
    plot_width=1000,
    plot_height=250,
    title="Facebook stock closing prices through 2020",
    toolbar_location=None,
    x_axis_label='Date',
    y_axis_label='Price'
    )
fb_plot.line('Date', 'Close', source=fb_src, legend_label="Facebook")
fb_plot.circle('Date', 'Close', source=fb_src, color='black', size=2)
fb_plot.title.text_font_size = '15pt'
fb_plot.title.text_color = 'rgb(0, 100, 150)'
fb_plot.title.align = 'center'
fb_plot.axis.axis_label_text_font_size = '12pt'
show(fb_plot)

In [10]:
stocks_price = figure(
    plot_width = 1000,
    plot_height = 400,
    x_axis_type = 'datetime',
    title = "Stock closing prices through 2020",
    toolbar_location=None,
    x_axis_label='Date',
    y_axis_label='Daily Change (%)'
)

for name, data, colour in zip(stocks.keys(), stocks.values(), Colorblind3):
    stocks_price.line(
        data['Date'],
        data['PDC'],
        line_width=1,
        color=colour,
        legend_label=name,
        muted_color=colour,
        muted_alpha = 0.2
        )
    
stocks_price.title.text_font_size = '15pt'
stocks_price.title.align = 'center'
stocks_price.axis.axis_label_text_font_size = '12pt'
stocks_price.legend.location = 'top_left'
stocks_price.legend.click_policy = 'mute'
stocks_price.legend.border_line_color = 'black'
show(stocks_price)

## Part Three: Bokeh Inspectors

* Create a scatterplot for the apple stock date and closing price with a colour of lightgrey and a low alpha
* Create a HoverTool that increases the alpha and changes the colour of the points to dark orange
* Add a tooltip to the hover object to display date, volume, and close price
* Set the date to display correctly, aggregate volume, and add the currency symbol to close price

In [11]:
apple_scatterplot = figure(
    x_axis_type='datetime',
    x_axis_label='Date',
    y_axis_label='Closing Price',
    plot_width=1000,
    plot_height=400,
    title='Apple stock closing prices through 2020',
)

apple_scatterplot.circle(
    'Date',
    'Close',
    source=apple_src,
    color='lightgrey',
    alpha=0.7,
    size=8,
    hover_fill_color = 'rgb(150, 50, 50)',
    hover_alpha = 1
    )

apple_scatterplot.title.text_color = 'rgb(150, 50, 50)'
apple_scatterplot.title.text_font_size = '15pt'
apple_scatterplot.title.align = 'center'

hover = HoverTool(
    tooltips=[
        ('Date: ', '@Date{%F}'),
        ('Volume Traded: ', '@Volume{0.00 a}'),
        ('Closing Price: ', '$@Close{0.2f}')
        ],
    formatters = {'@Date': 'datetime'},
    mode='mouse'
    )
apple_scatterplot.add_tools(hover)

show(apple_scatterplot)

## Part Four: Multiple Plots

* Use creating_figure() to generate lineplots for apple, facebook, and google
* Display these with rowplots
* Create a new plot comparing showing Googles daily returns in one row, and two columns comparing with Apple and Facebook respectively on a second row
* Create a gridplot showing the PDC for all three stocks
* Finally, recreate the gridplot as a tabplot

In [12]:
r1 = creating_figure(data_src=apple_src, title='Apple stock closing prices through 2020', colour='rgb(200, 100, 00)')
r2 = creating_figure(data_src=fb_src, title='Facebook stock closing prices through 2020', colour='rgb(0, 50, 150)')
r3 = creating_figure(data_src=google_src, title='Google stock closing prices through 2020', colour='rgb(0, 200, 50)')
stocks_rowplot = row([r1, r2, r3])
show(stocks_rowplot)

In [13]:
aapl_googl = compare_returns(src1=apple_src, src2=google_src, title='Google vs Apple')
googl_fb = compare_returns(src1=google_src, src2=fb_src, title='Google vs Facebook')
googl = creating_figure(data_src=google_src, title="Google Stock Prices", colour='rgb(150, 0, 150)', plot_width=600, plot_height=300)

rows = row([aapl_googl, googl_fb], sizing_mode='scale_width')
layout = column([googl, rows], sizing_mode='scale_width')
show(layout)

In [14]:
aapl_plot = plot_returns(src=apple_src, title='Daily Change (%) - AAPL', colour='rgb(200, 100, 00)')
fb_plot = plot_returns(src=fb_src, title='Daily Change (%) - FB', colour='rgb(0, 50, 150)')
googl_plot = plot_returns(src=google_src, title='Daily Change (%) - GOOGL', colour='rgb(0, 200, 50)')

PDR_grid = gridplot([[r1, aapl_plot], [r2, fb_plot], [r3, googl_plot]])
show(PDR_grid)

In [15]:
atab = Panel(child=row([r1, aapl_plot]), title='Apple')
ftab = Panel(child=row([r2, fb_plot]), title='Facebook')
gtab = Panel(child=row([r3, googl_plot]), title='Google')

tab_plot = Tabs(tabs=[atab, ftab, gtab])
show(tab_plot)

## Part Five: Categorical Variables

* Import the auto_mpg dataset and generate a simple barchart comparing car origin to frequency
* Recolour the barplot so each origin has a unique colour
* Add a second barplot which includes cylinders using the vbar_stack function

In [27]:
factors = cars['origin'].drop_duplicates().to_list()
colourmap = factor_cmap(field_name='origin', palette=Colorblind3, factors=factors)

cars_barplot = figure(
    x_range=factors,
    title="Cars by Origin",
    plot_width = 500,
    plot_height=300,
    toolbar_location=None
)

cars_barplot.vbar(
    x='origin',
    top='n',
    source=cars_src,
    width=0.5,
    legend_field='origin',
    line_color='white',
    fill_color=colourmap
    )

cars_barplot.title.text_font_size='15pt'
cars_barplot.title.align='center'
cars_barplot.y_range.start=0
cars_barplot.xgrid.grid_line_color=None
cars_barplot.legend.orientation='horizontal'
cars_barplot.legend.location='top_right'

In [28]:
cars_origin_cyl = cars[['origin', 'name', 'cyl']].groupby(['origin', 'cyl']).count()
cars_origin_cyl.rename(columns={'name': 'n'}, inplace=True)
cars_origin_cyl.reset_index(inplace=True)
cars_origin_cyl['cyl'] = cars_origin_cyl['cyl'].astype('str')

cars_pivot = cars_origin_cyl.pivot_table(
    index='origin',
    columns='cyl',
    values='n',
    aggfunc='sum',
    fill_value=0
).rename_axis(None, axis=1).reset_index()

car_list = cars_pivot.to_dict('list')
origins = car_list['origin']
cyls = list(cars_pivot)[1:]

car_plot = figure(
    x_range=origins,
    plot_height=300,
    plot_width=500,
    title="Cars by Origin and Cylinder",
    toolbar_location=None
)

car_plot.vbar_stack(
    stackers=cyls,
    x='origin',
    color=Category20_5,
    legend_label=cyls,
    width=0.5,
    source=car_list
)
car_plot.title.text_font_size='15pt'
car_plot.title.align='center'
car_plot.y_range.start=0
car_plot.x_range.range_padding=0.1
car_plot.xgrid.grid_line_color=None
car_plot.axis.minor_tick_line_color=None
car_plot.outline_line_color=None
car_plot.legend.location='top_left'
car_plot.legend.orientation='horizontal'

In [29]:
tab1 = Panel(child=cars_barplot, title="Origin")
tab2 = Panel(child=car_plot, title="Origin + Cyl")
cars_tabplot = Tabs(tabs=[tab1, tab2])
show(cars_tabplot)