In [2]:
%autosave 10

Autosaving every 10 seconds


In [3]:
import numpy as np
import pandas as pd
from datetime import date, timedelta
from datetime import datetime

In [4]:
from bokeh.io import show, output_notebook
from bokeh.plotting import figure

from bokeh.models import HoverTool, ColumnDataSource, DatetimeTickFormatter, LinearAxis, Range1d
from bokeh.models.widgets import Dropdown,DateRangeSlider, Select

from bokeh.layouts import column, row

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

output_notebook()

In [5]:
cur_day = date.today()
dates = [cur_day, cur_day - timedelta(days = 1)]

In [6]:
url = 'https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-'
import urllib
try:
    dat = dates[0].strftime("%Y-%m-%d")
    df = pd.read_excel(url+dat+".xlsx")
except urllib.error.URLError:
    dat = dates[1].strftime("%Y-%m-%d")
    df = pd.read_excel(url+dat+".xlsx")
df.to_csv("covid-19-"+dat+".csv",index= False)

In [7]:
dfx = df.iloc[:,[0,4,5,6,9]]
dfx.columns = ['Date','Cases','Deaths','Country', 'Population']
dfx.head(5)

Unnamed: 0,Date,Cases,Deaths,Country,Population
0,2020-04-14,58,3,Afghanistan,37172386.0
1,2020-04-13,52,0,Afghanistan,37172386.0
2,2020-04-12,34,3,Afghanistan,37172386.0
3,2020-04-11,37,0,Afghanistan,37172386.0
4,2020-04-10,61,1,Afghanistan,37172386.0


In [8]:
datemin = dfx.Date.min()
datemax = dfx.Date.max()

In [9]:
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
by_country = dfx.set_index('Date').sort_index()
by_country = by_country[datemin:datemax]
by_country = by_country[by_country['Country'] == 'Italy']
by_country['CumC'] = by_country.Cases.cumsum()
by_country['CumD'] = by_country.Deaths.cumsum()

chart_type = 'Day by Day'
chart_type = 'Cumulative'
if chart_type == 'Day by Day':
    by_country = by_country.loc[:,['Cases','Deaths','Country']]
elif chart_type == 'Cumulative':
    by_country = by_country.loc[:,['CumC','CumD','Country','Population']]
    by_country.columns = ['Cases','Deaths','Country','Population']
    by_country['Cases per 1M people'] = by_country.Cases/by_country.Population*1e6
    by_country['Deaths per 1M people'] = by_country.Deaths/by_country.Population*1e6
by_country

Unnamed: 0_level_0,Cases,Deaths,Country,Population,Cases per 1M people,Deaths per 1M people
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31,0,0,Italy,60431283.0,0.000000,0.000000
2020-01-01,0,0,Italy,60431283.0,0.000000,0.000000
2020-01-02,0,0,Italy,60431283.0,0.000000,0.000000
2020-01-03,0,0,Italy,60431283.0,0.000000,0.000000
2020-01-04,0,0,Italy,60431283.0,0.000000,0.000000
...,...,...,...,...,...,...
2020-04-10,143626,18281,Italy,60431283.0,2376.682951,302.508884
2020-04-11,147577,18851,Italy,60431283.0,2442.062996,311.941085
2020-04-12,152271,19470,Italy,60431283.0,2519.737997,322.184124
2020-04-13,156363,19901,Italy,60431283.0,2587.451271,329.316192


In [10]:
sorted_by_cases = list(dfx.groupby('Country').Cases.sum().sort_values(ascending=False).index)
def modify_doc(doc):
    def make_dataset(country, chart_type = chart_type,range_start = datemin, range_end = datemax):
        by_country = dfx.set_index('Date').sort_index()
        if not type(range_start) == pd.Timestamp and not type(range_end) == pd.Timestamp:
            range_start = datetime.fromtimestamp(range_start/1000)
            range_end = datetime.fromtimestamp(range_end/1000)
        by_country = by_country.loc[range_start:range_end]
        by_country = by_country.loc[by_country['Country'] == country]
        by_country['CumC'] = by_country.Cases.cumsum()
        by_country['CumD'] = by_country.Deaths.cumsum()
        if chart_type == 'Day by Day':
            by_country = by_country.loc[:,['Cases','Deaths','Country','Population']]
        elif chart_type == 'Cumulative':
            by_country = by_country.loc[:,['CumC','CumD','Country','Population']]
            by_country.columns = ['Cases','Deaths','Country','Population']
        by_country['Population'] = by_country['Population'] - by_country['Deaths']
        by_country['avgc'] = by_country.Cases/by_country.Population*1e6
        by_country['avgd'] = by_country.Deaths/by_country.Population*1e6
        return ColumnDataSource(by_country)
    
    def style(p):
        # Title 
        p.title.align = 'center'
        p.title.text_font_size = '20pt'
        p.title.text_font = 'serif'

        # Axis titles
        p.yaxis.axis_label_text_font_size = '14pt'
        p.yaxis.axis_label_text_font_style = 'bold'

        # Tick labels
        p.xaxis.major_label_text_font_size = '12pt'
        p.yaxis.major_label_text_font_size = '12pt'

        return p
    def make_plot(src):
        # Blank plot with correct labels
        p = figure(plot_width = 900,
                  sizing_mode = "stretch_height",
                  title = 'Covid-19 Cases and Deaths for ' + sel_country.value + ': '+sel_chart.value,
                  x_axis_type="datetime")
        p.xaxis[0].formatter = DatetimeTickFormatter(days='%b %d')
        
        p.extra_y_ranges = {"Avg": Range1d(start=-1, end=2)}
        p.add_layout(LinearAxis(y_range_name="Avg"), 'right')
        
        # line chart
        cases = p.line('Date','Cases',source = src, legend_label = 'Cases',
                line_color = 'blue',line_width = 4)
        deaths = p.line('Date','Deaths',source = src, legend_label = 'Deaths',
                line_color = 'red', line_width = 4)
        avg_cases = p.line('Date','avgc',source = src, legend_label = 'Avg Cases per 1M people',
                    line_color = 'green',line_width = 4, y_range_name = "Avg")
        avg_deaths = p.line('Date','avgd',source = src, legend_label = 'Avg Deaths per 1M people',
                    line_color = 'orange', line_width = 4,y_range_name = "Avg")
        
        p.extra_y_ranges['Avg'].start = 0.95*np.min([src.data['avgc'],src.data['avgd']])
        p.extra_y_ranges['Avg'].end = 1.05*np.max([src.data['avgc'],src.data['avgd']])
        
#       Hover tool with mouse mode
        hover = HoverTool(tooltips=[('Date', '@Date{%F}'), 
                                    ('Deaths', '@Deaths'),
                                    ('Cases', '@Cases'),
                                   ('Avg Cases','@avgc'),
                                   ('Avg Deaths','@avgd')],
                          formatters={'@Date': 'datetime'},
                          mode='mouse')

        p.add_tools(hover)
        p.legend.location = "top_left"
        # Styling
        p = style(p)
        return p
       
    def update(attr, old, new):
        new_src = make_dataset(sel_country.value,sel_chart.value,
                               range_start = dateslider.value[0],
                               range_end = dateslider.value[1])
        p.title.text = 'Covid-19 Cases and Deaths for ' + sel_country.value + ': '+ sel_chart.value
        src.data.update(new_src.data)
        p.extra_y_ranges['Avg'].start = 0.95*np.min([src.data['avgc'],src.data['avgd']])
        p.extra_y_ranges['Avg'].end = 1.05*np.max([src.data['avgc'],src.data['avgd']])
    
    sel_country = Select(value = "India", options = sorted_by_cases,width =220)
    
    sel_chart = Select(value = 'Day by Day', options = ['Day by Day','Cumulative'], width = 120)
    dateslider = DateRangeSlider(start = datemin, end = datemax, value = (datemin, datemax),
                                 title = 'Date Range', sizing_mode = "scale_width")
    
    sel_country.on_change('value', update)
    sel_chart.on_change('value', update)
    
    dateslider.on_change('value', update)
    src = make_dataset(sel_country.value,sel_chart.value,
                       range_start = datemin,
                       range_end = datemax)
    p = make_plot(src)
    controls = row(sel_country,sel_chart, dateslider)
    layout = column(controls,p)
    doc.add_root(layout)
handler = FunctionHandler(modify_doc)
mapp = Application(handler)
show(mapp)