# **Covid19 Dashboard**

## Daily Statistics ##

This Dashboard plots new cases of Covid-19 by specimen date against hospital admissions and new deaths within 28 days of a Covid-19 diagnosis.

Please select desired area from the toggle box:

In [1]:
#The First step to running my dashboard is to perform the necessary imports.
#I'm importing a number of modules to process and plot the data, and ipywidgets to provide interactive controls.

from IPython.display import clear_output
import ipywidgets as wdg
from ipywidgets import HBox
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API
from datetime import datetime

In [2]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

In [3]:
#The next step is to import startup data:

#Initialise global variables to store the json data in. They are global so they can be updated later by other functions.

global cases_and_deaths
global eng_only
global sco_only
global wal_only
global ni_only

#load UK data and separate country data
with open("start_cases_and_deaths.json", "rt") as INFILE:
    cases_and_deaths=json.load(INFILE)
    
with open("eng_only_start_cases_and_deaths.json", "rt") as INFILE:
    eng_only = json.load(INFILE)
    
with open("sco_only_start_cases_and_deaths.json", "rt") as INFILE:
    sco_only = json.load(INFILE)

with open("wal_only_start_cases_and_deaths.json", "rt") as INFILE:
    wal_only = json.load(INFILE)
    
with open("ni_only_start_cases_and_deaths.json", "rt") as INFILE:
    ni_only = json.load(INFILE)


In [4]:
#Next, I will create my Data-Wrangling function

#A date parsing function, to turn string dates into Pandas date objects.
def parse_date(datestring):
    return pd.to_datetime(datestring, format="%Y-%m-%d")

#The main data wrangling function.
#it creates a data frame and populates it with cases, hospitalisations, deaths and a 7-day average of cases.
def wrangle_cases_and_deaths_data(data):
    
    #extract the set of dicitonaries from the API response.
    datalist=data['data']
    
    #create a list of dates and sort it
    dates = [dictionary['date'] for dictionary in datalist]
    dates.sort()

    #create the date range
    start_date = parse_date(dates[0])
    end_date = parse_date(dates[-1])
    drange = pd.date_range(start_date,end_date,freq="D")
    
    #Create the empty dataframe with the date range and our desired column headings.
    timeseriesdf = pd.DataFrame(index=drange, columns = ["cases", "admissions", "deaths", "7 Day Average Cases"])
    
    #adding case admission and death data
    for entry in datalist:
        date = parse_date(entry["date"])
        for column in ['cases', 'admissions', 'deaths']:
            #check that all dates have available data, and fill unavailable dates with 0.0
            if pd.isna(timeseriesdf.loc[date, column]):
                value = float(entry[column]) if entry[column]!= None else 0.0
                timeseriesdf.loc[date, column] = value
    
    #code to calculate 7 day average 
    #for the first 6 days, it tallies up total cases
    
    current_total = 0
    for i in range(len(datalist)):
        entry = datalist[i]
        cases = float(entry['cases']) if entry['cases'] != None else 0.0
        date = parse_date(entry["date"])
        if i <5:
            current_total += cases
#On the 7th day, it divides the total cases by 7 to get the average and saves it to the data frame.

        elif i == 6:
            current_total += cases
            current_av = current_total/7.0
            timeseriesdf.loc[date, "7 Day Average Cases"] = current_av
#From then on, it deletes the case number from 7 days ago, adds a new one and recalculates and saves the average.
        else:
            del_entry = datalist[(i-7)]
            del_cases = float(del_entry['cases']) if del_entry['cases'] != None else 0.0
            current_total -= del_cases
            current_total += cases
            current_av = current_total/7.0
            timeseriesdf.loc[date, "7 Day Average Cases"] = current_av
        i += 1
    # making column labels neat        
    timeseriesdf = timeseriesdf.rename(columns = {'cases':'Cases', 'admissions': 'Hospital Admissions','deaths':'Deaths (within 28 days of +test)'})
    
    return timeseriesdf # return the populated dataframe

In [5]:
#API accessing code - as my cases and deaths graphs are separated by nation,

# For the individual nations, I've created a get data function that takes area as a parameter and uses it in the filter call.

def get_area_data(area):
    filters = ["areaType=nation", f"areaName={area}"]
    structure={"date": "date",
                  "cases": "newCasesBySpecimenDate",
                  "admissions": "newAdmissions",
                  "deaths": "newDeaths28DaysByDeathDate"}
        
    api = Cov19API(filters=filters, structure=structure)
    data = api.get_json()
    return data # return data read from the API

#for the uk data, I've used "areaType=Overview"

def get_uk_data():
    
    filters = ["areaType=overview"]
    structure={"date": "date",
                  "cases": "newCasesBySpecimenDate",
                  "admissions": "newAdmissions",
                  "deaths": "newDeaths28DaysByDeathDate"}
    api = Cov19API(filters=filters, structure=structure)
    data = api.get_json()
    return data

#the access api function itself - calls the global variables from earlier and updates them using the functions I just defined.

def access_api():
    global cases_and_deaths
    global eng_only
    global sco_only
    global wal_only
    global ni_only
    
    cases_and_deaths = get_uk_data()
    eng_only = get_area_data("England")
    wal_only = get_area_data("Wales")
    sco_only = get_area_data("Scotland")
    ni_only = get_area_data("Northern Ireland")

In [6]:
#the button callback function
def api_button_callback(button):
    #update the json data, accounting for errors
    try:
        access_api()
        #This code updates the button label to show the time it was updated
        time = datetime.now()
        time = time.strftime("%H:%M:%S")
        button.description = f"data updated at: {time}"
        # refresh the graph using a 'trick' function, that switches the graph option to another option and back again.
        refresh_graph()
        # update the button icon and tooltip to account for the update having happened.
        apibutton.icon="check"
        button.tooltip=f"data updated at: {time}"
    except:
        print("update failed")

# Create my update button, with appropriate parameters.
apibutton=wdg.Button(
    description='Refresh Data', # you may want to change this...
    disabled=False,
    layout=wdg.Layout(width='200px'), # Changed the width to make sure whole time displays
    button_style='info',
    tooltip="Click to refresh data",
    icon="exclamation-triangle"
)

# registering callback function
apibutton.on_click(api_button_callback)


In [7]:
# function that returns desired JSON data from one of the global variables.

def get_data(area):
    if area == "UK":
        return cases_and_deaths
    if area == 'England':
        return eng_only
    if area == 'Wales':
        return wal_only
    if area == 'Scotland':
        return sco_only
    if area == 'Northern Ireland':
        return ni_only

# plotting function - recalculates the dataframe based on the fresh data and plots it
def plot_cases_and_deaths(area):
    jsondata = get_data(area)
    df = wrangle_cases_and_deaths_data(jsondata)    
    df.plot(figsize=(12,6)) #setting the size to show plenty of information
    plt.legend() 

def refresh_graph():
    current = whicharea.value
    other = whicharea.options[1]
    if current == whicharea.options[1]:
        other = whicharea.options[0]
    else:
        other == whicharea.options[1]
    whicharea.value = other
    whicharea.value = current

whicharea=wdg.ToggleButtons(
    options=['UK', 'England', 'Wales', 'Scotland', 'Northern Ireland'],
    value='UK',
    description='Area: ',
    disabled=False,
)

toggle_box = HBox([whicharea])

graph=wdg.interactive_output(plot_cases_and_deaths, {'area': whicharea})
    
display(toggle_box, graph)

<Figure size 600x400 with 0 Axes>

HBox(children=(ToggleButtons(description='Area: ', options=('UK', 'England', 'Wales', 'Scotland', 'Northern Ir…

Output()

Initial data downloaded on 5/12/21. 
Please click the button below to update the graph with the latest available data.

In [8]:
display(apibutton)

Button(button_style='info', description='Refresh Data', icon='exclamation-triangle', layout=Layout(width='200p…

##The Vaccine Effect##

This Graph shows the effect of Vaccines on number of daily deaths from Covid-19. It plots the cumulative number of vaccinated people against the number of deaths from Covid (x 100,000 so the numbers are comparale), and in the first few months of the vaccine roll out, a clear drop in the number of deaths can be seen. 

One can also observe the effects of waning immunity - the number of deaths begins creeping up from July, around 6 months after most vulnerable cohorts were vaccinated. However, since the UK booster jab rollout began in Novmber, this number has started falling again. 

The slider below allows the user to zoom in on a particular range of dates.

__(This graph automatically downloads the latest available data from . If the graph is not displaying, there may be issues with the PHE API.)__

In [59]:
#getting data from the api
filters = ["areaType=overview"]

vaccines_and_deaths={"date": "date",
                  "vaccines": "cumVaccinesGivenByPublishDate",
                  "deaths": "newDeaths28DaysByDeathDate"}

api = Cov19API(filters=filters, structure=vaccines_and_deaths)
vaccinations = api.get_json()


# a small function to calculate the max value for the range slider. 
# first, load the data list
datalist = vaccinations['data']

#next, use a while loop to find the positon of the first entry with no vaccination data
global v_index
v_index = 0
while True:
    entry = datalist[v_index]
    vax = entry['vaccines']
    if vax == None:
        break
    v_index += 1
# subtract 1 to get the last index WITH vaccination data. 
v_index -= 1

datalist = datalist[:v_index] # make sure we only use list entries with vaccination data
dates = [entry['date'] for entry in datalist] # find valid dates
l = len(dates) # find the number of valid dates
maxi = (l - 1)

#creating the range slider - I used an int slider so I could use the values as list indexes

range_slider = wdg.IntRangeSlider(value =[0,maxi],
                                 min = 0,
                                 max = maxi,
                                 step = 1,
                                 description = 'Date range selector: ',
                                 disabled = False,
                                 continuous_update=True,
                                 orientation='horizontal',
                                 readout=False,
                                 layout={'width': '850px'},
                                 style = {'description_width': 'initial'} # displaying the long description
                                 )    
def get_date_range(i, j, datalist):
    
    #create a list of dates and sort it
    dates = [dictionary['date'] for dictionary in datalist]
    dates.sort()

    #create the date range
    start_date = parse_date(dates[i])
    end_date = parse_date(dates[j])
    drange = pd.date_range(start_date,end_date,freq="D")
    return drange
    
#wrangle the data, taking values from the slider
def wrangle_and_plot_vaccines(vals):
    i = vals[0]
    j = vals[1]
    datalist=vaccinations['data']
    datalist = datalist[:v_index] # same reasoning as earlier
    drange = get_date_range(i, j, datalist)
    
    #create the empty data frame
    vdf = pd.DataFrame(index = drange, columns = ['vaccines', 'deaths'])
    
    for entry in datalist:
        date = parse_date(entry["date"])
        for column in ['vaccines', 'deaths']:
            #the int slider values were throwing some key errors with the .loc function, so I included this try block to get around it.
            # invalid dates can be simply ignored i.e. not plotted.
            try:
                if pd.isna(vdf.loc[date, column]):
                    value = int(entry[column]) if entry[column]!= None else 0
                    if column == 'deaths': # the raw number of deaths is far lower than cumulative vaccines (which reach the milliions!).
                        value *= 100000    # therefore, I multiplied the numebr of deaths by 100,000 to scale them up to a similar level and allow observaion of trends.
                    vdf.loc[date, column] = value
            except KeyError: # move onto the next entry if there's a key error.
                next
    vdf = vdf.rename(columns={'vaccines':'Cumulative Vaccines','deaths':'Deaths (x100,000)'}) # renaming columns to make them neat and informative
    vdf.plot(figsize=(12,6)) # matching the size of the other graph
    

plt.show()# show graphs
v_graph=wdg.interactive_output(wrangle_and_plot_vaccines,{'vals': range_slider}) # use an interactive output to feed values into my wrangling/plotting function
display(v_graph, range_slider)

Output()

IntRangeSlider(value=(0, 331), description='Date range selector: ', layout=Layout(width='850px'), max=331, rea…

_DIY Covid-19 Dashboard (C) Harry Evans 2021 (ec21939@qmul.ac.uk)._
_Based on UK Government data published by Public Health England._ 
_Template by Dr. Fabrizio Smeraldi_