In [39]:
from bokeh.io import output_notebook, show, curdoc
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, BasicTickFormatter, HoverTool, Legend, Column
from bokeh.models.widgets import Tabs, Panel
from bokeh.palettes import Category20
from bokeh.layouts import column, row, gridplot
import pandas as pd
import pandas as pd
from math import pi

In [40]:
output_notebook()

In [68]:
csv_url = "https://health-infobase.canada.ca/src/data/covidLive/covid19.csv"

df = pd.read_csv(csv_url)

#get individual province data frames
df['dates'] = pd.to_datetime(df['date'], dayfirst = True)
df['YMD'] = df['dates'].dt.date
df = df.drop(['date', 'dates', 'prnameFR', 'pruid'], axis = 1)

#List of unique provinces, Canada, ordered alphabetically with Canada first
provnames = df.prname.unique()
provnames = list(provnames)
provnames.pop()
provnames.sort()
myorder = [2, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
provnames = [provnames[i] for i in myorder]

colors = (list(Category20[14]))

def make_dataset(pnames):
    
    by_prov = pd.DataFrame()

    # Iterate through all the provinces
    for i in pnames:

        # Subset to the province
        sub = df[df['prname'] == i]

        # Add to the overall dataframe
        by_prov = by_prov.append(sub)
    
    return by_prov

#to make barchart dataset, must retrieve data from most recent date only
def make_barchart_dataset(pnames):
    df_sub = df[df['prname'].isin(provnames)]
    recent = max(df_sub['YMD'])
    recent_date = df_sub[df_sub['YMD'] == recent]

    return recent_date

#must pivot dataset to plot so prov names are columns
def format_dataset(df, value):
    df_pivot = pd.pivot_table(df,
    columns = 'prname',
    index = 'YMD',
    values = value)
    
    df_pivot = df_pivot.rename_axis(None)

    return df_pivot


def plot_lines_multi(src, title, yaxisname):

    numlines = len(src)

    #import color pallete
    mypalette = colors[0:numlines]

    # make a list of our columns
    col = []
    [col.append(i) for i in src.columns]
    
    #source = ColumnDataSource(df)

    # make the figure, 
    p = figure(x_axis_type="datetime", title= title, width = 600, height = 600, 
              tools='pan, wheel_zoom, reset')
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = yaxisname

    # loop through our columns and colours
    for (columnnames, colore) in zip(col, mypalette):
        p.line(src.index, src[columnnames], legend_label = columnnames, color = colore, line_width=4, alpha = 0.8)

    #format axes and legend
    p.xaxis.major_label_orientation = "vertical"
    p.yaxis.formatter = BasicTickFormatter(use_scientific=False)
    p.legend.location = "top_left"
    
    # Hover tool
    hover = HoverTool(mode = 'mouse')
    hover.tooltips = [('Date', '$x{%F}'), ('Cases', '$y{use_scientific = False}')]
    hover.formatters = { "$x": "datetime"}
        
    p.add_tools(hover)
    p.legend.click_policy = 'hide'

    output_file("Canada_COVID-19_Dashboard.html", title="Canada_COVID-19_Dashboard.py example")

    return p 
    


def plot_bar_multi(src, yaxis, title, yaxisname, cols):
    numlines = len(src)

    #import color pallete
    mypalette = colors[0:numlines]

    # make a list of our columns
    col = []
    [col.append(i) for i in src.columns]
    
    source = ColumnDataSource(data = src)
    regions = source.data['prname'].tolist()

    # make the figure, 
    b = figure(title= title, width = 600, height = 600, 
              tools='pan, wheel_zoom, reset', x_range = regions)
    b.xaxis.axis_label = 'Region'
    b.yaxis.axis_label = yaxisname
    
    b.vbar(x = 'prname', top = yaxis, source = source, alpha = 0.8, width = 0.8)

    b.xaxis.major_label_orientation = "vertical"

    # Hover tool
    hover = HoverTool()

    hover.tooltips = [(c, '@' + c) for c in cols]

    b.add_tools(hover)

    return b 


def plot_pie(src):
    
    cols = (list(Category20))
    # remove canada
    src = src[src['prname'] != 'Canada']
    src['numactive'] = src['numactive'].fillna(0.0)
    total_active = src['numactive'].sum()
    src['frac_active'] = src['numactive'] / total_active
    src['angle'] = src['frac_active'] / src['frac_active'].sum() * 2 * pi
    src['color'] = Category20[len(src)]
    
    pie = figure (plot_height = 600, plot_width = 800, title = 'Active Cases By Region', 
                tools = "hover, pan, wheel_zoom, reset", tooltips = "@prname: @numactive")
    
    pie.wedge(x=0, y=1, radius=0.5,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='prname', source=src)
    
    pie.legend.location = "top_left"
    
    return(pie)


#Plots

#Total Cases
src = format_dataset(make_dataset(provnames), 'numtotal')
p = plot_lines_multi(src, 'Total Cases', 'Cases')
src_bar = make_barchart_dataset(provnames)
b1 = plot_bar_multi(src_bar, 'numtoday', 'Number of Cases Identified Today', 'Cases', ['prname', 'numtoday'])
b4 = plot_bar_multi(src_bar, 'percentoday', 'Percent of Cases Identified Today', 'Percent (%)', ['prname', 'percentoday'])
src6 = format_dataset(make_dataset(provnames), 'percentactive')
p6 = plot_lines_multi(src6, 'Percent of Cases Active Historically', 'Percent (%)')
layout = gridplot([[p, b1], [p6, b4]])

tab = Panel(child=layout, title = 'Total Cases')

#Active Cases By Region
#layout = [percent_active, num_active]
pie = plot_pie(src_bar)
layout_pie = row(pie)
tab_cases = Panel(child = layout_pie, title = 'Active Cases by Region')


#Recovered Cases
src1 = format_dataset(make_dataset(provnames), 'numrecover')
p1 = plot_lines_multi(src1, 'Total Recoveries', 'Cases')
b2 = plot_bar_multi(src_bar, 'recoveredtoday', 'Recoveries Today', 'Recoveries', ['prname', 'recoveredtoday'])
layout1 = row(p1, b2)
tab1 = Panel(child=layout1, title = 'Recoveries')

#Deaths and Mortality
src2 = format_dataset(make_dataset(provnames), 'numdeaths')
p2 = plot_lines_multi(src2, 'Total Deaths', 'Deaths')
b3 = plot_bar_multi(src_bar, 'deathstoday', 'Deaths Today', 'Deaths', ['prname', 'deathstoday'])
src4 = format_dataset(make_dataset(provnames), 'percentdeath')
p4 = plot_lines_multi(src4, 'Historical Percent Mortality', 'Percent (%)')
layout2 = gridplot([[p2, b3], [p4, None]])
tab2 = Panel(child=layout2, title = 'Deaths')

#Case rate per 100,000 population
src3 = format_dataset(make_dataset(provnames), 'ratetotal')
p3 = plot_lines_multi(src3, 'Rate of Infection', 'Rate of Infection (per 100,000 population)')
layout3 = row(p3)
tab3 = Panel(child=layout3, title = 'Rate of Infection')

#Testing Rate per 1 million population
src5 = format_dataset(make_dataset(provnames), 'ratetested')
p5 = plot_lines_multi(src5, 'Testing Rate', 'Testing Rate (per 1 million population)')
layout4 = row(p5)
tab4 = Panel(child=layout4, title = 'Testing Rate')


tabs = Tabs(tabs=[tab, tab_cases, tab1, tab2, tab3, tab4])

# Add it to the current document (displays plot)
curdoc().add_root(tabs)
show(tabs)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user