In [None]:
from IPython.display import HTML, display

HTML('''<script>
code_show=true;
function code_toggle() {
  if (code_show){
  $('div.input').hide();
  } else {
  $('div.input').show();
  }
  code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Reveal/Hide Code"></form>''')

In [None]:
# Package imports
import os
import re

import altair as alt
from altair_saver import save
from vega_datasets import data

import json
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

import datetime

from pycountry_convert import country_alpha2_to_continent_code


# Package configurations
alt.data_transformers.disable_max_rows()


# Configuration for notebook
aggregate_states = True # option to roll-up multiple states to a top level per country view
show_charts_for_all_countries = False # option to show full long list of charts for all countries
normalise_vs_population = False # option to normalise figures per Million population

num_cases_threshold = 1500 # optional number of cases threshold to filter out what would otherwise be 'noisy' statistics; value is None, or integer threashold

countries_to_aggregate = ['Australia', 'Canada', 'China']

run_date_str = "{:%d-%b-%Y}".format(datetime.datetime.now())

# Slide export configuration
export_slides = True
slides_file_name = 'COVID_Summary.pptx'
template_pptx = 'widescreen.pptx' # input slide template, to help set aspect ratio
saved_image_name = 'image.png'

In [None]:
def display_h1(myString):
    """display as heading 1"""

    display(HTML('<h1>%s</h1>' % myString))

def display_h2(myString):
    """display as heading 2"""

    display(HTML('<h2>%s</h2>' % myString))

def display_table(myDF):
    """display as formatted table"""

    total_id = 'totalID'
    header_id = 'headerID'
    data_in_html = myDF.to_html(index=False)
    tr_style = '<style>.dataframe td { text-align: left; }</style>'
    style_in_html = """<style>
        table#{total_table} {{color='black';font-size:13px; text-align:center; border:0.2px solid black;
                             border-collapse:collapse; max-width: 900px; word-wrap:break-word; white-space:pre-wrap; height='250'; }}
        thead#{header_table} {{background-color: #4D4D4D; color:#ffffff}}
        </style>""".format(total_table=total_id, header_table=header_id)
    data_in_html = re.sub(r'<table',
                          r'<table id=%s ' % total_id, data_in_html)
    data_in_html = re.sub(r'<thead', r'<thead id=%s ' % header_id, data_in_html)

    display(HTML(style_in_html + tr_style + data_in_html))

def display_set_full_width():
    """use the full screen width"""

    display(HTML("""
    <style>
        div#notebook-container    { width: 100%; }
        div#menubar-container     { width: 65%; }
        div#maintoolbar-container { width: 99%; }
    </style>
    """))

# load country information
def load_country_data():
    base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data'
    file_name = f'UID_ISO_FIPS_LookUp_Table.csv'
    url = os.path.join(base_url, file_name)
    df = pd.read_csv(url)

    return df

def get_continent_from_iso2_code(iso2):
    try:
        continent = country_alpha2_to_continent_code(iso2)
    except:
        continent = 'Unknown'
        
    return continent

def add_continent(df):
    df_countries = load_country_data()
    df_countries = df_countries.filter(['Country_Region', 'Province_State', 'Combined_Key', 'iso2', 'iso3', 'Population'])

    # fix missing space in Johns Hopkins online data before merge
    combined_key_replace_dict = {'Northwest Territories,Canada': 'Northwest Territories, Canada'}
    df_countries['Combined_Key'] = df_countries['Combined_Key'].replace(combined_key_replace_dict)
    
    df = df.merge(df_countries, how='left', on='Combined_Key')
    df = df.drop_duplicates()

    df['continent'] = df['iso2'].apply(lambda x: get_continent_from_iso2_code(x))

    df.loc[df['iso2']=='SX', 'continent'] = 'NA' # add missing lookup for Sint Marten (Dutch part)
    
    continent_dict = {'AN':'Antarctica', 'AS':'Asia', 'EU':'Europe', 'SA':'South America', 'NA':'North America', 'OC':'Oceania', 'AF':'Africa'}
    df['continent'] = df['continent'].replace(continent_dict)

    return df

# load data series, valid options are: 'confirmed', 'deaths', or 'recovered'
def load_individual_timeseries(name, agg_states=True, keep_latlong=True):
    base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series'
    file_name = f'time_series_covid19_{name}_global.csv'
    url = os.path.join(base_url, file_name)
    df = pd.read_csv(url, index_col=['Country/Region', 'Province/State', 'Lat', 'Long'])
    df['type'] = name.lower()
    df.columns.name = 'date'
    
    df = df.set_index('type', append=True)
    if not keep_latlong:
        df = df.reset_index(['Lat', 'Long'], drop=True)
        column_list = ['country', 'state', 'type', 'cases']
    else:
        column_list = ['country', 'state', 'lat', 'long', 'type', 'cases']

    df = df.stack()
    df = df.reset_index()
    df = df.set_index('date')
         
    df.index = pd.to_datetime(df.index)
    df.columns = column_list
    
    df['cases'] = df['cases'].astype(int)

    # Aggregate large countries split by states
    if agg_states:
        df_non_states = df[~df['country'].isin(countries_to_aggregate)]
        df_states     = df[ df['country'].isin(countries_to_aggregate)]
        df_states     = df_states.groupby(['country', 'date', 'type']).agg({'lat':'mean', 'long':'mean', 'cases':'sum'})
        df_states     = df_states.reset_index(['country', 'type'])
        df = pd.concat([df_non_states, df_states])
        
        df = df.reset_index()
        df = df.set_index('date')

    countries_to_drop = ['Diamond Princess'] # drop the cruise ship
    df = df[~df['country'].isin(countries_to_drop)]
    
    return df

def load_covid_data(agg_states=False, p_crit=.05, filter_n_days_100=None, keep_latlong=True):
    df = load_individual_timeseries('confirmed', agg_states=agg_states, keep_latlong=keep_latlong)
    df = df.rename(columns={'cases': 'confirmed'})

    df['state'] = df['state'].fillna('')

    # Estimated critical cases
    df = df.assign(critical_estimate=df.confirmed*p_crit)

    # Compute days relative to when 100 confirmed cases was crossed
    df.loc[:, 'days_since_100'] = np.nan
    for country in df.country.unique():
        if not df.loc[(df.country == country), 'state'].isnull().all():
            for state in df.loc[(df.country == country), 'state'].unique():
                df.loc[(df.country == country) & (df.state == state), 'days_since_100'] = \
                    np.arange(-len(df.loc[(df.country == country) & (df.state == state) & (df.confirmed < 100)]), 
                              len(df.loc[(df.country == country) & (df.state == state) & (df.confirmed >= 100)]))
        else:
            df.loc[(df.country == country), 'days_since_100'] = \
                np.arange(-len(df.loc[(df.country == country) & (df.confirmed < 100)]), 
                          len(df.loc[(df.country == country) & (df.confirmed >= 100)]))

    # Add recovered cases
#     df_recovered = load_individual_timeseries('Recovered')
#     df_r = df_recovered.set_index(['country', 'state'], append=True)[['cases']]
#     df_r.columns = ['recovered']

    # Add deaths
    df_deaths = load_individual_timeseries('deaths')
    df_d = df_deaths.set_index(['country', 'state'], append=True)[['cases']]
    df_d.columns = ['deaths']

    df = (df.set_index(['country', 'state'], append=True)
#             .join(df_r)
            .join(df_d)
            .reset_index(['country', 'state'])
    )
    
    if filter_n_days_100 is not None:
        # Select countries for which we have at least some information
        countries = pd.Series(df.loc[df.days_since_100 >= filter_n_days_100].country.unique())
        df = df.loc[lambda x: x.country.isin(countries)]

    df['Combined_Key'] = (df['state'] + ", " + df['country']).str.strip(', ')

    df = df.reset_index()

    df = add_continent(df) # consider to add continent for time series data too
    
    df['deaths_per_M_population']    = df['deaths']    / df['Population'] * 1E6 # per 1M population
    df['confirmed_per_M_population'] = df['confirmed'] / df['Population'] * 1E6 # per 1M population

    return df

def get_totals_from_time_series(df):
    df_totals = df.copy().groupby(['country', 'state']).last()
    df_totals = df_totals.reset_index() # to ensure important fields are accessible in columns rather than hidden in index

    return df_totals

def get_continent_totals(df, confirmed_field, deaths_field):
    df_totals = df.copy().drop(['country', 'state', 'critical_estimate', 'days_since_100', 'type'], axis=1)
    df_totals = df_totals.groupby(['continent']).agg({'lat':'mean', 'long':'mean', confirmed_field:'sum', deaths_field:'sum', 'Population':'sum'})
    df_totals = df_totals.reset_index()
    return df_totals

display_set_full_width()
display_h1('COVID Data Visualisation')
display_h2('Load COVID data')
df_covid_time_series = load_covid_data(agg_states=aggregate_states, filter_n_days_100=1, keep_latlong=True)

df_totals = get_totals_from_time_series(df_covid_time_series)

# optionally filter by num cases, to exclude what would otherwise be 'noisy' statistics
if num_cases_threshold:
    selected_list = df_totals.loc[df_totals['confirmed']>=num_cases_threshold, 'Combined_Key']
    exclude_list  = df_totals.loc[df_totals['confirmed']<num_cases_threshold, 'Combined_Key']
    print('Excluding cases <', num_cases_threshold)
    print('- list:', list(exclude_list))
    df_totals = df_totals[df_totals['Combined_Key'].isin(selected_list)]
    df_covid_time_series = df_covid_time_series[df_covid_time_series['Combined_Key'].isin(selected_list)]
    
df_unknown_continents = df_totals[df_totals['continent']=='Unknown']
if len(df_unknown_continents):
    print('Unknown continents:')
    display(df_totals[df_totals['continent']=='Unknown'])

df_country_continent = df_totals.copy().filter(['continent', 'Combined_Key', 'country', 'state']).drop_duplicates()

latest_date = df_covid_time_series['date'].max()
latest_date_str = "{:%d-%b-%Y}".format(latest_date)
print('Most recent data: ', latest_date_str)

if normalise_vs_population:
    confirmed_field = 'confirmed_per_M_population'
    deaths_field    = 'deaths_per_M_population'
else:
    confirmed_field = 'confirmed'
    deaths_field    = 'deaths'

In [None]:
__version__ = "0.1.00"

import os # For file path handling  

# to obtain the pptx module:
#   pip install --user python-pptx
import pptx
from pptx.util import Mm


class PPT():
    OUTPUT_FILE_NAME = 'test_slides.pptx'
    IMAGE_FILE_NAME = 'test_chart.png'
    DIRECTORY_NAME = '.' # option to put in sub-directory
    
    # folder structure
    generated_chart = os.path.join(DIRECTORY_NAME, IMAGE_FILE_NAME)
    generated_ppt = os.path.join(DIRECTORY_NAME, OUTPUT_FILE_NAME)
    
    # slide template enumerations
    from enum import IntEnum
    class slide_template_layout(IntEnum):
        TITLE = 0
        TITLE_AND_CONTENT = 1
        TITLE_AND_MAX_CONTENT = 2
        
    placeholders = {
        slide_template_layout.TITLE:{'title':{'id':0, 'type':'text'}, 'subtitle':{'id':1, 'type':'text'}},
        slide_template_layout.TITLE_AND_CONTENT:{'title':{'id':0, 'type':'text'}, 'content':{'id':1, 'type':'content'}},
        slide_template_layout.TITLE_AND_MAX_CONTENT:{'title':{'id':0, 'type':'text'}, 'content':{'id':1, 'type':'content'}},
    }
    
    def __init__(self, template=None, outfile=None, placeholders=None, slide_width_mm=None, slide_height_mm=None):
        self.template = template
        self.outfile = outfile if outfile else self.generated_ppt
        self.placeholders = placeholders if placeholders else self.placeholders
        
        if template:
            print('Opening slides:', self.template)

        self.ppt = pptx.Presentation(self.template)
        self.slides = self.ppt.slides
        
        if (not slide_width_mm is None) and (not slide_height_mm is None):
            self.ppt.slide_width = Mm(slide_width_mm)
            self.ppt.slide_height = Mm(slide_height_mm)
        
    # Add slide
    def add_slide(self, layout):
        slide_layout = self.ppt.slide_layouts[layout]
        slide = self.slides.add_slide(slide_layout)
        return slide
    
    def set_text(self, slide, placeholder_name, text):
        try:
            layout = self.ppt.slide_layouts.index(slide.slide_layout)
            placeholder_id = self.placeholders.get(layout,{}).get(placeholder_name,{}).get('id',None)
            placeholder = slide.placeholders[placeholder_id]
            placeholder.text = text
        except Exception as e:
            print('Failed to add text to placeholder: ', placeholder_name)
            print(e)
            pass
    
    def get_slide_placeholder_size(self, slide, placeholder_name):
        try:
            layout = self.ppt.slide_layouts.index(slide.slide_layout)
            placeholder_id = self.placeholders.get(layout,{}).get(placeholder_name,{}).get('id',None)
            placeholder = slide.placeholders[placeholder_id]
            sizes = (placeholder.left.mm, placeholder.top.mm, placeholder.width.mm, placeholder.height.mm)
        except Exception as e:
            print('Failed to get placeholder: ', placeholder_name)
            print(e)
            pass
        return sizes

    def add_image(self, slide, placeholder_name, image_file, keep_aspect_ratio=True, position='centre', delete_placeholder=True):
        try:
            # read image file to check actual aspect ratio
            img = plt.imread(image_file)
            img_height, img_width = img.shape[0], img.shape[1]
            image_aspect_ratio = img_width/img_height
            
            # get placeholder details, including aspect ratio
            layout = self.ppt.slide_layouts.index(slide.slide_layout)
            placeholder_id = self.placeholders.get(layout,{}).get(placeholder_name,{}).get('id',None)
            placeholder = slide.placeholders[placeholder_id]
            placeholder_aspect_ratio = placeholder.width/placeholder.height
            
            # default positions; later calculate offset for centering if appropriate
            x_pos = placeholder.left
            y_pos = placeholder.top
            width = placeholder.width
            height = placeholder.height
            
            # determine which dimensions to use
            if keep_aspect_ratio: # only set 1 dimension of image, to keep aspect ratio
                use_image_width=True
                if image_aspect_ratio < placeholder_aspect_ratio:
                    use_image_width=False

                if use_image_width:
                    if position=='centre':
                        new_img_height = img_height/img_width*width
                        y_pos = y_pos + (height - new_img_height)/2
                    pic = slide.shapes.add_picture(image_file, x_pos, y_pos, width=width)
                else:
                    if position=='centre':
                        new_img_width = img_width/img_height*height
                        x_pos = x_pos + (width - new_img_width)/2
                    pic = slide.shapes.add_picture(image_file, x_pos, y_pos, height=height)
                    
            else: # set both dimensions of image, which can therefore change aspect ratio
                pic = slide.shapes.add_picture(image_file, x_pos, y_pos, width=width, height=height)
                
            if delete_placeholder:
                sp = placeholder._sp # get shape xml element
                sp.getparent().remove(sp) # remove shape

        except Exception as e:
            print('Failed to add image to placeholder: ', placeholder_name, ', image filename:', image_file)
            print(e)
            pass

    def save(self, outfile=None):
        outfile = outfile if outfile else self.outfile
        if not outfile:
            print('Need to specify filename to save; existing')
            quit()
        self.ppt.save(outfile)

    def slide_describe(self, slide, layout_name):
        print('Slide layout:', layout_name)
        try:
            set_title(slide, layout_name)
        except:
            pass

        txBox = slide.shapes.add_textbox(0, 0, 100, 100)
        tf = txBox.text_frame
        tf.text = 'Placeholders:'
        for placeholder in slide.placeholders:
            size_description = ("left=%d, top=%d, width=%d, height=%d (mm)" % (placeholder.left.mm, placeholder.top.mm, placeholder.width.mm, placeholder.height.mm))
            num_name = '%d %s' % (placeholder.placeholder_format.idx, placeholder.name)
            name_name_size = num_name + " : " + size_description

            p = tf.add_paragraph()
            p.text = name_name_size

    def new_slide_describe(self, layout):
        slide = self.add_slide(layout)
        self.slide_describe(slide, layout.name)

    def add_describe_template(self, known_layouts_only = True):
        # to do: derive slide list to populate from template
        if not known_layouts_only:
            print('Proposed option to include and annotate all layouts in the template; not yet implemented')
        else:
            self.new_slide_describe(self.slide_template_layout.TITLE)
            self.new_slide_describe(self.slide_template_layout.TITLE_AND_CONTENT)
            self.new_slide_describe(self.slide_template_layout.TITLE_AND_MAX_CONTENT)

In [None]:
if export_slides:
    # Create new presentation from (empty) template
    prs = PPT(template_pptx) # create new presentation using default template

In [None]:
slide_title = 'COVID-19 Summary'
display_h1(slide_title)
text_str = 'Analysis run: '+ run_date_str
text_str = text_str + '\n'
text_str = text_str + 'Latest data: '+ latest_date_str
text_str = text_str + '\n'
text_str = text_str + 'Total confirmed cases: {:,d}'.format(df_totals['confirmed'].sum().astype(int))
text_str = text_str + '\n'
text_str = text_str + 'Total deaths: {:,d}'.format(df_totals['deaths'].sum().astype(int))
text_str = text_str + '\n'
text_str = text_str + '\n'
text_str = text_str + 'Data from: COVID Data Repository by Johns Hopkins CSSE'
text_str = text_str + '\n'
text_str = text_str + '\n'

text_str = text_str + 'Analysis & summary by Colin Osborne'
text_str = text_str + '\n'
text_str = text_str + 'Country/region included in trends if confirmed cases above: {:,d}'.format(num_cases_threshold)

print(text_str)

if export_slides:
    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.set_text(slide, placeholder_name='content', text=text_str)

In [None]:
slide_title = 'Continents'
display_h1(slide_title)

# category: 'continent', 'country' or 'Combined_Key'; data_field: 'confirmed' or 'deaths'
def get_bar_chart(df, category='continent', data_field='confirmed', row_title='', column_title=''):
    bars = alt.Chart(df).mark_bar().encode(
        x=data_field+':Q',
        y=alt.Y(category+':N', sort=None, title=row_title), # sort='-x' to sort locally in this chart
        tooltip=[alt.Tooltip(category+':N'),
                 alt.Tooltip(data_field+':Q', format=',d')]
    )

    text = bars.mark_text(
        align='left',
        baseline='middle',
        dx=3  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(
        text=alt.Text(data_field+':Q', format=',d') #,.2r gives rounded to 2 significant figures
    )
    
    chart = (bars + text)
    chart = chart.properties(title=column_title)
    
    return chart

def configure_bar_chart(chart):
    chart = chart.configure_axisY(
        titleColor='blue',
        titleFontSize=20,
    )

    chart = chart.configure_title(
        fontSize=20,
        color='blue'
    )
    return chart

df_continent_totals_absolute       = get_continent_totals(df_totals, 'confirmed', 'deaths')
df_continent_totals_per_population = get_continent_totals(df_totals, 'confirmed_per_M_population', 'deaths_per_M_population')

sort_by_confirmed = False # sort by confirmed cases or deaths
sort_field_absolute = 'confirmed' if sort_by_confirmed else 'deaths'
sort_field_per_population = 'confirmed_per_M_population' if sort_by_confirmed else 'deaths_per_M_population'

print(f'Totals sorted by {sort_field_absolute}; '+latest_date_str)
df_continent_totals_absolute = df_continent_totals_absolute.sort_values(by=sort_field_absolute, ascending=False)
df_continent_totals_per_population = df_continent_totals_per_population.sort_values(by=sort_field_per_population, ascending=False)

chart_confirmed = get_bar_chart(df_continent_totals_absolute, category='continent', data_field='confirmed', row_title='Absolute', column_title='Confirmed Cases')
chart_deaths    = get_bar_chart(df_continent_totals_absolute, category='continent', data_field='deaths', column_title='Deaths')
chart_population    = get_bar_chart(df_continent_totals_absolute, category='continent', data_field='Population', column_title='Population')
row_absolute = chart_confirmed | chart_deaths | chart_population

chart_confirmed = get_bar_chart(df_continent_totals_per_population, category='continent', data_field='confirmed_per_M_population', row_title='Per M Population')
chart_deaths    = get_bar_chart(df_continent_totals_per_population, category='continent', data_field='deaths_per_M_population')
chart_population    = get_bar_chart(df_continent_totals_per_population, category='continent', data_field='Population')
row_per_population = chart_confirmed | chart_deaths | chart_population

chart = (row_absolute & row_per_population).resolve_scale(color='independent').configure_legend(title=None, labelFontSize=11, symbolSize=15)
chart = configure_bar_chart(chart)

chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)

In [None]:
slide_title = 'Countries'
display_h1(slide_title)
top_n = 20
sort_by_confirmed = False # sort by confirmed cases or deaths

sort_field_absolute = 'confirmed' if sort_by_confirmed else 'deaths'
sort_field_per_population = 'confirmed_per_M_population' if sort_by_confirmed else 'deaths_per_M_population'

print(f'Totals, top {top_n} by {sort_field_absolute}; '+latest_date_str)
df_totals_filtered_absolute       = df_totals.copy()
df_totals_filtered_per_population = df_totals.copy()

df_totals_filtered_absolute       = df_totals_filtered_absolute.sort_values(by=sort_field_absolute, ascending=False)
df_totals_filtered_per_population = df_totals_filtered_per_population.sort_values(by=sort_field_per_population, ascending=False)

df_totals_filtered_absolute = df_totals_filtered_absolute[:top_n]
df_totals_filtered_per_population = df_totals_filtered_per_population[:top_n]

chart_confirmed = get_bar_chart(df_totals_filtered_absolute, category='Combined_Key', data_field='confirmed', row_title='Absolute', column_title='Confirmed Cases')
chart_deaths    = get_bar_chart(df_totals_filtered_absolute, category='Combined_Key', data_field='deaths', column_title='Deaths')
chart_population    = get_bar_chart(df_totals_filtered_absolute, category='Combined_Key', data_field='Population', column_title='Population')
row_absolute = chart_confirmed | chart_deaths | chart_population

chart_confirmed = get_bar_chart(df_totals_filtered_per_population, category='Combined_Key', data_field='confirmed_per_M_population', row_title='Per M Population')
chart_deaths    = get_bar_chart(df_totals_filtered_per_population, category='Combined_Key', data_field='deaths_per_M_population')
chart_population    = get_bar_chart(df_totals_filtered_per_population, category='Combined_Key', data_field='Population')
row_per_population = chart_confirmed | chart_deaths | chart_population

chart = (row_absolute & row_per_population).resolve_scale(color='independent').configure_legend(title=None, labelFontSize=11, symbolSize=15)

chart = configure_bar_chart(chart)

chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)

In [None]:
display_h1('Trend Charts by Continent')
small_subset = False
top_n = 10

measure_daily = True
include_total_confirmed_bar_chart = True

sort_field_absolute = 'confirmed' if sort_by_confirmed else 'deaths'
sort_field_per_population = 'confirmed_per_M_population' if sort_by_confirmed else 'deaths_per_M_population'

if small_subset:
    Combined_Key_list = ['Italy', 'Sweden', 'United Kingdom', 'France', 'US', 'Germany', 'Brazil']
    print('Country list:', Combined_Key_list)
    df_covid_time_series_filtered = df_covid_time_series[df_covid_time_series['Combined_Key'].isin(Combined_Key_list)].copy()
else:
    df_covid_time_series_filtered = df_covid_time_series.copy()

print(f'Per continent, top {top_n} by total {sort_field_absolute}; '+latest_date_str)


def trend_chart_continent(df, continent, scale_type='linear', x_metric='date', metric='deaths', x_min=None, x_max=None, measure_daily=True, row_title='', column_title=''):
    df = df.copy()
    df = df.reset_index()
    
    if measure_daily:
        df[metric] = df[metric].diff().clip(lower=0)
    x_metric_type = ':T' # for date
    if x_metric == 'days_since_100':
        df = df[df['days_since_100']>=0]
        x_metric_type = ':Q' # for count of days
        x_min = 0 # no need for negative x-axis

    line = alt.Chart(df).mark_line(
#         color='red',
    ).transform_window(
        weekAvg=f'mean({metric})',
        frame=[-7, 0],
        groupby=['Combined_Key']
    ).transform_filter(
        alt.datum.weekAvg > 0
    ).encode(
        x=alt.X(x_metric+x_metric_type, scale=alt.Scale(domain=[x_min, x_max])),
        y=alt.Y('weekAvg:Q', title=row_title, scale=alt.Scale(type=scale_type)),
        color=alt.Color('Combined_Key:N', sort=None),
        tooltip=[alt.Tooltip('Combined_Key:N'),
                 alt.Tooltip(x_metric+x_metric_type),
                 alt.Tooltip('weekAvg:Q', format=',d')]
    ).properties(
        height=110, width=450,
#         title='Covid daily '+metric+', '+continent
    )
    
    chart = line
    chart = chart.properties(title=column_title)

    return chart

# category: 'continent', 'country' or 'Combined_Key'; data_field: 'confirmed' or 'deaths'
def get_bar_chart_coloured(df, continent, category='continent', data_field='confirmed', row_title='', column_title=''):
    bars = alt.Chart(df).mark_bar().encode(
        x=alt.X(data_field+':Q', title=data_field),
        y=alt.Y(category+':N', sort=None, title=row_title), # sort='-x' to sort locally in this chart
        color=alt.Color('Combined_Key:N', sort=None),
        tooltip=[alt.Tooltip(category+':N'),
                 alt.Tooltip(data_field+':Q', format=',d')]
    ).properties(
        height=110, width=200,
#         title='Covid total '+data_field+', '+continent
    )

    text = bars.mark_text(
        align='left',
        baseline='middle',
        dx=3  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(
        text=alt.Text(data_field+':Q', format=',d') #,.2r gives rounded to 2 significant figures
    )
    
    chart = (bars + text)
    chart = chart.properties(title=column_title)

    return chart

def configure_trend_chart_continents(chart):
    chart = chart.configure_axisY(
        titleColor='blue',
        titleFontSize=20,
    )
    chart = chart.configure_axisX(
#         title=None
    )

    chart = chart.configure_title(
        fontSize=20,
        anchor='middle',
        color='blue'
    )
    return chart

def trend_dashboard_charts(df, sort_field, confirmed_field, deaths_field, scale_type='linear', x_metric='date'):
    chart = alt.vconcat()
    first_row = True
    x_min = df[x_metric].min()
    x_max = df[x_metric].max()
    
    for continent in df['continent'].unique():
        df_filtered = df[df['continent']==continent]
        if not top_n is None:
            df_totals_filtered = df_totals.copy()
            df_totals_filtered = df_totals_filtered[df_totals_filtered['continent']==continent]
            df_totals_filtered = df_totals_filtered.sort_values(by=sort_field, ascending=False)
            df_totals_filtered = df_totals_filtered[:top_n]
            combined_key_top_n = df_totals_filtered['Combined_Key']
            df_filtered = df_filtered[df_filtered['Combined_Key'].isin(combined_key_top_n)] # would like to put it in the order of highest totals

        trend_confirmed_cases = trend_chart_continent(df_filtered, continent, scale_type, x_metric=x_metric, metric=confirmed_field, x_min=x_min, x_max=x_max,
                                                      measure_daily=measure_daily, row_title=continent, column_title='Daily Confirmed Cases' if first_row else '')
        trend_deaths          = trend_chart_continent(df_filtered, continent, scale_type, x_metric=x_metric, metric=deaths_field, x_min=x_min, x_max=x_max,
                                                      measure_daily=measure_daily, row_title='', column_title='Daily Deaths' if first_row else '')

        chart_confirmed = get_bar_chart_coloured(df_totals_filtered, continent, category='Combined_Key', data_field=confirmed_field, column_title='Total Confirmed Cases' if first_row else '')
        chart_deaths    = get_bar_chart_coloured(df_totals_filtered, continent, category='Combined_Key', data_field=deaths_field, column_title='Total Deaths' if first_row else '')
        first_row = False

        row = (trend_confirmed_cases | trend_deaths)
        if include_total_confirmed_bar_chart:
            row = row  | chart_confirmed
        row = row | chart_deaths
        chart = (chart & row).resolve_scale(color='independent').configure_legend(title=None, labelFontSize=11, symbolSize=15)
    
    return chart

slide_title = 'COVID-19 Trend - Absolute'
display_h2(slide_title)

chart = trend_dashboard_charts(df_covid_time_series_filtered, sort_field=sort_field_absolute, confirmed_field='confirmed', deaths_field='deaths')
chart = configure_trend_chart_continents(chart)

chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)
    
slide_title = 'COVID-19 Trend - Per M Population'
display_h2(slide_title)

chart = trend_dashboard_charts(df_covid_time_series_filtered, sort_field=sort_field_per_population, confirmed_field='confirmed_per_M_population', deaths_field='deaths_per_M_population')
chart = configure_trend_chart_continents(chart)

chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)

In [None]:
slide_title = 'COVID-19 Trend - Log Absolute Since 100 cases'
display_h2(slide_title)

chart = trend_dashboard_charts(df_covid_time_series_filtered, sort_field=sort_field_absolute, confirmed_field='confirmed', deaths_field='deaths', scale_type='log', x_metric='days_since_100')
chart = configure_trend_chart_continents(chart)

chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)

slide_title = 'COVID-19 Trend - Log Per M Population Since 100 cases'
display_h2(slide_title)

chart = trend_dashboard_charts(df_covid_time_series_filtered, sort_field=sort_field_per_population, confirmed_field='confirmed_per_M_population', deaths_field='deaths_per_M_population', scale_type='log', x_metric='days_since_100')
chart = configure_trend_chart_continents(chart)

chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)

In [None]:
slide_title = 'Trend Charts - Selected Countries'
display_h1(slide_title)
small_subset = True
measure_daily = True
independent_y_scales = True

if small_subset:
    Combined_Key_list = ['Italy', 'Sweden', 'United Kingdom', 'US', 'Ireland']

    print('Country list:', Combined_Key_list, latest_date_str)
    df_covid_time_series_filtered = df_covid_time_series[df_covid_time_series['Combined_Key'].isin(Combined_Key_list)].copy()
else:
    df_covid_time_series_filtered = df_covid_time_series.copy()

def trend_chart(df, metric='deaths', measure_daily=True, column_title=''):
    df = df.copy()
    df = df.reset_index()

    if measure_daily:
        df[metric] = df[metric].diff().clip(lower=0)

    chart = alt.Chart().mark_area().encode(
        x   = 'date:T',
        y   = metric+':Q',
#         row = alt.Row('country:N'),
        tooltip=[alt.Tooltip('Combined_Key:N'),
                 alt.Tooltip('date:T'),
                 alt.Tooltip(metric+':Q', format=',d')]
    ).properties(height=100, width=500)
    
    line = alt.Chart().mark_line(
        color='red',
    ).transform_window(
        weekAvg=f'mean({metric})',
        frame=[-7, 0],
        groupby=['country']
    ).encode(
        x='date:T',
        y='weekAvg:Q',
        tooltip=[alt.Tooltip('Combined_Key:N'),
                 alt.Tooltip('date:T'),
                 alt.Tooltip('weekAvg:Q', format=',d')]
    )
    
    chart = alt.layer(chart, line, data=df).facet(row='Combined_Key:N')
    if independent_y_scales:
        chart = chart.resolve_scale(y='independent')#.properties(height=50, width=400)

    chart = chart.properties(title=column_title)

    return chart

def configure_trend_chart(chart):
    chart = chart.configure_headerRow(
        title=None,
        labelColor='blue',
        labelFontSize=20
    )

    chart = chart.configure_title(
        fontSize=20,
        anchor='middle',
        color='blue'
    )
    return chart

trend_confirmed_cases = trend_chart(df_covid_time_series_filtered, metric=confirmed_field, measure_daily=measure_daily, column_title='Daily Confirmed Cases')
trend_deaths          = trend_chart(df_covid_time_series_filtered, metric=deaths_field, measure_daily=measure_daily, column_title='Daily Deaths')
chart = trend_confirmed_cases | trend_deaths
chart = configure_trend_chart(chart)
chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text=slide_title)
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)

In [None]:
display_h1('Maps')
# countries = alt.topo_feature(data.world_110m.url, 'countries')
# Problem:
#   Country code 250 appears to be used twice in the map data
#   both for France, and French Guiana
#   patched in local edited world-110m_edited.json file 

filepath = 'world-110m_edited.json'
with open(filepath) as json_data:
    d = json.load(json_data)
        
    countries_map_data = alt.Data(
        values=d, 
        format=alt.TopoDataFormat(feature='countries',type='topojson')
    )

country_code_url = 'https://raw.githubusercontent.com/alisle/world-110m-country-codes/master/world-110m-country-codes.json'
country_code_df = pd.read_json(country_code_url)

In [None]:
display_h2('World COVID Cases')
print(latest_date_str)
width = 700
aspect_ratio = 750/450

def map_covid(metric='deaths', project='equirectangular', width=800, aspect_ratio=750/450, clip_europe=False):
    world_map = alt.Chart(countries_map_data).mark_geoshape(
        fill='#666666',
        stroke='white'
    ).encode(
        tooltip=['name:N']
    ).transform_lookup(
        lookup='id',
        from_=alt.LookupData(country_code_df, 'id', ['name'])
    ).properties(
        title='Covid '+metric,
        width=width,
        height=width/aspect_ratio
    ).project(project)
    
    # covid positions on map
    points = alt.Chart(df_totals).mark_circle().encode(
        longitude='long:Q',
        latitude='lat:Q',
        size=alt.Size(metric+':Q', title='Number of '+metric),
        color=alt.value('red'), #steelblue
        tooltip=[alt.Tooltip('country:N'),
                 alt.Tooltip(metric+':Q', format=',d')]
    ).properties(
        title='Number of covid '+metric,
    )
    
    if clip_europe:
        world_map = world_map.project(
            type= project,
            scale= 350,                          # Magnify
            center= [20,50],                     # [lon, lat]
            clipExtent= [[0, 0], [width, width/aspect_ratio]],    # [[left, top], [right, bottom]]
        )
        points = points.project(
            type= project,
            scale= 350,                          # Magnify
            center= [20,50],                     # [lon, lat]
            clipExtent= [[0, 0], [width, width/aspect_ratio]],    # [[left, top], [right, bottom]]
        )

    return world_map + points

world_covid_deaths = map_covid(metric=deaths_field, project='equirectangular', width=width, aspect_ratio=aspect_ratio)
world_covid_cases = map_covid(metric=confirmed_field, project='equirectangular', width=width, aspect_ratio=aspect_ratio)

chart= alt.hconcat(world_covid_cases, world_covid_deaths).resolve_scale(size='independent')

chart.display()

if export_slides:
    chart.save(saved_image_name)

    # Add a new slide ready for chart and set title text
    slide = prs.add_slide(layout = prs.slide_template_layout.TITLE_AND_MAX_CONTENT)
    prs.set_text(slide, placeholder_name='title', text='World COVID Cases')
    prs.add_image(slide, placeholder_name='content', image_file=saved_image_name)

In [None]:
display_h2('Europe COVID Cases')
print(latest_date_str)

width = 400
aspect_ratio = 4/3

europe_covid_deaths = map_covid(metric=deaths_field, project='mercator', width=width, aspect_ratio=aspect_ratio, clip_europe=True)
europe_covid_cases = map_covid(metric=confirmed_field, project='mercator', width=width, aspect_ratio=aspect_ratio, clip_europe=True)

# alt.hconcat(europe_covid_cases, europe_covid_deaths).resolve_scale(size='independent').display() # displays with different center position for some reasons
europe_covid_cases.display()
europe_covid_deaths.display()

In [None]:
display_h2('World COVID Cases - alternative map projections')
print(latest_date_str)

width = 500
aspect_ratio = 4/3

projections = ['equirectangular', 'mercator', 'orthographic'] # , 'gnomonic'

def project(chart, proj, width):
    chart = chart.project(proj).properties(title=proj, width=width, height=width/aspect_ratio)
    return chart

charts = [map_covid(metric=deaths_field, project=proj, width=width, aspect_ratio=aspect_ratio).properties(title=proj)
          for proj in projections]
alt.concat(*charts, columns=2)

In [None]:
display_h1('Full country data')
display_h2('Bar chars - confirmed cases and deaths')

if not show_charts_for_all_countries:
    print('Not selected to show full country data')
else:
    print(latest_date_str)
    chart_confirmed = get_bar_chart(df_totals, category='Combined_Key', data_field=confirmed_field)
    chart_deaths    = get_bar_chart(df_totals, category='Combined_Key', data_field=deaths_field)

    chart_confirmed | chart_deaths

In [None]:
display_h2('Trend charts')

if not show_charts_for_all_countries:
    print('Not selected to show full country data')
else:
    print(latest_date_str)
    df = df_covid_time_series.copy()
    measure_daily = True

    for country in df['Combined_Key'].unique():
        print(country)
        df_filtered = df[df['Combined_Key']==country].copy()

        trend_confirmed_cases = trend_chart(df_filtered, metric=confirmed_field, measure_daily=measure_daily)
        trend_deaths = trend_chart(df_filtered, metric=deaths_field, measure_daily=measure_daily)
        #     alt.hconcat(trend_confirmed_cases, trend_deaths).resolve_scale(size='independent').display()
        chart = trend_confirmed_cases | trend_deaths

        chart.display()

In [None]:
if export_slides:
    # save slides
    print('Saving slides')
    prs.save(slides_file_name)