In [1]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API
import time
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

# Covid-19 Dashboard

## A plot graph about the number of new cases & new vaccines given by publish date

This is a plot graph(imported by matplotlib.pyplot) which shows the number of new cases versus the number of vaccines given by publish date in England. All data is accessed through a web-based API from [Public Health England](https://www.gov.uk/government/organisations/public-health-england) (PHE) with json content. In data wrangling, it involves pandas library to create the Dataframe; In visualisation part, it involves ipywidgets and matplotlib.pyplot to create and display different interactive controls and the graphs. 

On top of the graph, a "UPDATE" button can be used to retrieve the latest data prior to 90 days and then refresh the graph immediately. 
Besides, a ```SelectMultiple``` widget is available for you to select the stats that to be shown in the graph (Multiple values can be selected with shift and/or ctrl (or command) pressed and mouse clicks), while a ```RadioButtons``` widget is available for you to select the linear or log scale in the graph.  
Note: To facilitate data analysis and have a better looking of the graph, this graph will only show data up to 3 months prior the retrieving date. 

In [2]:
# Load JSON files and store the raw data in variable jsondata.
jsondata_case = 'casevaccine.json'

def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

def wrangle_data_case(rawdata):
    """ Parameters: rawdata - data from json file. Returns a dataframe.
    It wrangles the data, creates the dataframe and fills missing data. """
    with open(rawdata, "rt") as INFILE:
        data=json.load(INFILE)
    datalist = data['data']
    datalist = list(datalist[:91])          # select the data before 3 months only 
    dates = [dictionary['date'] for dictionary in datalist]
    dates.sort()
    startdate=parse_date(dates[0])
    enddate=parse_date(dates[-1])
    index=pd.date_range(startdate, enddate, freq='D')   # return a list of date by startdate and enddate 
    casetest_df=pd.DataFrame(index=index, columns=['cases', 'vaccines'])       # creates the dataframe with index and columns only
    for entry in datalist:
        date=parse_date(entry['date'])
        for column in ['cases','vaccines']:
            if pd.isna(casetest_df.loc[date, column]):      #check if the values are null
                value = float(entry[column]) if entry[column]!=None else 0.0         
                casetest_df.loc[date,column] = value        # fillin corresponding value into DataFrame
    casetest_df.fillna(0.0,inplace=True)
    return casetest_df

casetest_df=wrangle_data_case(jsondata_case)



In [3]:
def access_api_case():
    filters = ['areaType=nation',
    'areaName=England']
    structure = {
        "date":"date",
        "cases":"newCasesByPublishDate",
        "vaccines":"newVaccinesGivenByPublishDate"
        }
    api = Cov19API(filters=filters, structure=structure)
    casetest=api.get_json()                         # it's a dict
    with open("casevaccine.json", "wt") as OUTF:       # save it into .json from dict
        json.dump(casetest, OUTF)
    return("casevaccine.json")
      
def api_button_callback_case(button):
    """ Accesses API, wrangles data, updates global variable df used for plotting.
        A warning statement will pop out if the raw data is unavailable """
    try:
        apidata=access_api_case() 
    except:
        raise 'The data cannot be updated as the Public Health England (PHE) server cannot be reached right now '   # display warning signal when update function does not work
    else:
        global casetest_df
        casetest_df = wrangle_data_case(apidata)
        refresh_graph()
    finally:
        apibutton_case.icon="check"          # give user some feedbacks when the graph has been refreshed
        apibutton_case.description="Data updated"
        apibutton_case.button_style='success'
        apibutton_case.disabled=False
    
apibutton_case=wdg.Button(
    description='Update data', 
    disabled=False,
    button_style='warning', 
    tooltip="Click to download current Public Health England data",
    icon='repeat'
)

apibutton_case.on_click(api_button_callback_case)     # register button callback function with the button



In [4]:
def casetest_graph(cols, scale):        # Plot the graph
    if scale=='linear':
        logscale=False
    else:
        logscale=True
    ncols=len(cols)
    if ncols>0:
        casetest_df[list(cols)].plot(logy=logscale,linewidth=1, marker='o', markersize=3, markerfacecolor='w')
        plt.title(label='Plot graph about the number of new cases & new vaccines given',fontsize=18)
        plt.ylabel('Number',fontsize=14)
        plt.xlabel('Date',fontsize=14)
        plt.show() # important - graphs won't update if this is missing 
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")
        
serie=wdg.SelectMultiple(       # control_1, will be used to to plot the graph again
    options=['cases', 'vaccines'],
    value=['cases'],
    rows=2,
    description='Stats:',
    disabled=False
)
scale=wdg.RadioButtons(         # control_2
    options=['linear', 'log'],
    layout={'width': 'max-content'}, # If the items' names are long
    description='Scale:',
    disabled=False
)
def refresh_graph():        # redraw the graph when updated
    current=serie.value
    if serie.value==serie.options:
        other=serie.options[-1]
    else:    
        other=serie.options
    serie.value=other
    time.sleep(0.2)
    serie.value=current

controls = wdg.HBox([serie,scale])      # merge into one control interface

graph=wdg.interactive_output(casetest_graph, {'cols': serie, 'scale': scale})

display(apibutton_case, controls, graph)



HBox(children=(SelectMultiple(description='Stats:', index=(0,), options=('cases', 'vaccines'), rows=2, value=(…

Output()

## Age distribution data

Here is a grouped bar chart which compares the distribution of cases by sex and age bands in England.

Again, a ```SelectMultiple``` widget is available for you to select the stats that to be shown in the graph (Multiple values can be selected with shift and/or ctrl (or command) pressed and mouse clicks). On top of the graph, a "UPDATE" button can be used to retrieve the latest data and refresh the graph immediately. 

In [5]:
jsondata_age = "agedistribution.json" 

def min_age(agerange):
    agerange=agerange.replace('+','') # remove the + from 90+
    start=agerange.split('_')[0]
    return int(start)

def wrangle_data_age(rawdata):
    with open(rawdata, "rt") as INFILE:
        data=json.load(INFILE)
    datadic=data['data'][0] # data['data'] is a list
    males=datadic['males']
    females=datadic['females']
    ageranges=[x['age'] for x in males] # each entry of males is a dictionary
    ageranges.sort(key=min_age)
    age_df=pd.DataFrame(index=ageranges, columns=['males','females', 'total'])  # build a Dataframe without value
    for entry in males: # each entry is a dictionary
        ageband=entry['age'] # our index position
        age_df.loc[ageband, 'males']=entry['value']     # input value into df
    for entry in females:
        ageband=entry['age']
        age_df.loc[ageband, 'females']=entry['value']
    age_df['total']=age_df['males']+age_df['females']       # another way to input value into df
    return age_df

age_df=wrangle_data_age(jsondata_age)

def access_api():
    filters = ['areaType=nation',
    'areaName=England']
    structure = {
        "males": "maleCases",
        "females": "femaleCases"
        }
    api = Cov19API(filters=filters, structure=structure)
    agedistribution=api.get_json()                         # it's a dict
    with open("agedistribution.json", "wt") as OUTF:       # save it into .json from dict
        json.dump(agedistribution, OUTF)
    return("agedistribution.json")

def api_button_callback(button):
    """ Accesses API, wrangles data, updates global variable df used for plotting.
        A warning statement will pop out if the raw data is unavailable """
    try:
        apidata=access_api() 
    except:
        raise 'The data cannot be updated as the Public Health England (PHE) server cannot be reached right now '
    else:
        global df
        df = wrangle_data_age(apidata)
        refresh_graph()
    finally:
        apibutton.icon="check"          # give user some feedbacks when the graph has been refreshed
        apibutton.description="Data updated"
        apibutton.button_style='success'
        apibutton.disabled=True
    
apibutton=wdg.Button(
    description='Update data', 
    disabled=False,
    button_style='warning', 
    tooltip="Click to download current Public Health England data",
    icon='repeat'
)
apibutton.on_click(api_button_callback)     # register button callback function with the button

agecols=wdg.SelectMultiple(
    options=['males', 'females', 'total'], # options available
    value=['males','females'], # initial value
    rows=3, # rows of the selection box
    description='Sex',
    disabled=False
)

def age_graph(graphcolumns):
    ncols=len(graphcolumns)
    if ncols>0:
        age_df.plot(kind='bar', y=list(graphcolumns), width=0.7, alpha=0.8) # graphcolumns is a tuple - we need a list
        plt.title('Bar chart about the distribution of cases by sex and age bands',fontsize=18)
        plt.xlabel('Number', fontsize=14)
        plt.ylabel('Age bands', fontsize=14)
        plt.show() # important - graphs won't update properly if this is missing
    else:
        # if the user has not selected any column, print a message instead
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")
    
# keep calling age_graph(graphcolumns=value_of_agecols); capture output in widget output    
output=wdg.interactive_output(age_graph, {'graphcolumns': agecols})

display(apibutton, agecols, output)



SelectMultiple(description='Sex', index=(0, 1), options=('males', 'females', 'total'), rows=3, value=('males',…

Output()

**Acknowledge and Copyright Notice** 

This website and its content is only to be used by the signatories for the coursework and research work at the Queen Mary University. All rights reserved. 

Data Source: *Based on UK Government [data](https://coronavirus.data.gov.uk/) published by [Public Health England](https://www.gov.uk/government/organisations/public-health-england).*

Author: *Wong Chuen Lik Daniel*

Last modified date: *2021-12-9* 