# Tracking Covid Cases in Wales Using GOV Data - Fawwaz Chowdhury

This is a project to create an interactive dashboard to track the number of cases, hospitalisation and deaths occurring in Wales.

In [31]:
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API
import datetime

In [32]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

The code running the background successfully acquires the most recent data from the PHE API and SDK provided by GOV. 

The data is then wrangled so it is compatible for graphical visualisations - I created only one graph with interactive buttons. This fulfils both tasks of developing one graphical visualisation and one with interactive features. The data is stored in .JSON Files and there is a button to trigger the API to refresh the data which simultaneously updates the graph.

In [33]:
# Load JSON files and store the raw data in some variable. Edit as appropriate
jsondata={}

In [34]:
filters = [
    'areaType=nation',
    'areaName=Wales'
]

structure = {
    "date": "date",
    "cases": "newCasesByPublishDate",
    "hospital": "newAdmissions",
    "deaths": "cumDeaths28DaysByDeathDateRate"    
}

api = Cov19API(filters=filters, structure=structure)
timeseries=api.get_json()

In [35]:
with open("timeseries.json", "wt") as OUTF:
    json.dump(timeseries, OUTF)

In [36]:
# an iPython  "magic" that enables the embedding of matplotlib output
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

with open("timeseries.json", "rt") as INFILE:
    data=json.load(INFILE)

In [37]:
def wrangle_data(rawdata):
    """ Parameters: rawdata - data from json file or API call. Returns a dataframe.
    Edit to include the code that wrangles the data, creates the dataframe and fills it in. """
    df=pd.DataFrame(index=range(0,100), columns=['One', 'Two'])
    # we have no real data to wrangle, so we just generate two random walks.
    one=two=0.0
    for i in range(0,100):
        df.loc[i,'One']=one
        df.loc[i,'Two']=two
        one+=np.random.randn()
        two+=2*np.random.randn()
    return df

# putting the wrangling code into a function allows you to call it again after refreshing the data through 
# the API. You should call the function directly on the JSON data when the dashboard starts, by including 
# the call in the cell as below:
df=wrangle_data(jsondata) # df is the dataframe for plotting

In [38]:
datalist=data['data']

In [39]:
dates=[dictionary['date'] for dictionary in datalist ]
dates.sort()

In [40]:
def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

In [41]:
startdate=parse_date(dates[0])
enddate=parse_date(dates[-1])

In [42]:
index=pd.date_range(startdate, enddate, freq='D')
timeseriesdf=pd.DataFrame(index=index, columns=['cases', 'hospital', 'deaths'])

In [43]:
for entry in datalist: # each entry is a dictionary with date, cases, hospital and deaths
    date=parse_date(entry['date'])
    for column in ['cases', 'hospital', 'deaths']:
        # check that nothing is there yet - just in case some dates are duplicated,
        # maybe with data for different columns in each entry
        if pd.isna(timeseriesdf.loc[date, column]): 
            # replace None with 0 in our data 
            value= float(entry[column]) if entry[column]!=None else 0.0
            # this is the way you access a specific location in the dataframe - use .loc
            # and put index,column in a single set of [ ]
            timeseriesdf.loc[date, column]=value
            
# fill in any remaining "holes" due to missing dates
timeseriesdf.fillna(0.0, inplace=True)

In [44]:
# Place your API access code in this function. Do not call this function directly; it will be called by 
# the button callback. 
def access_api():
    """ Accesses the PHE API. Returns raw data in the same format as data loaded from the "canned" JSON file. """
    filters = [
        'areaType=nation',
        'areaName=Wales'
    ]
    structure = {
        "date": "date",
        "cases": "newCasesByPublishDate",
        "hospital":"newAdmissions",
        "deaths":"cumDeaths28DaysByDeathDateRate"
    }
    api = Cov19API(filters=filters, structure=structure)
    timeseries=api.get_json()

    with open("timeseries.json", "wt") as OUTF:
        json.dump(timeseries, OUTF)
        
    return data # return data read from the API

In [45]:
# our API access function. This will be called by the button when it is clicked
# Printout from this function will be lost in Voila unless captured in an
# output widget - therefore, we give feedback to the user by changing the 
# appearance of the button
def api_button_callback(button):
    """ Button callback - it must take the button as its parameter (unused in this case).
    Accesses API, wrangles data, updates global variable df used for plotting. """
    # Get fresh data from the API. If you have time, include some error handling around this call.
    apidata=access_api()
    # wrangle the data and overwrite the dataframe for plotting
    global dataframe
    dataframe = wrangle_data(data)
    # the graph won't refresh until the user interacts with the widget.
    # this function simulates the interaction, see Graph and Analysis below.
    # you can omit this step in the first instance
    # after all is done, you can switch the icon on the button to a "check" sign
    # and optionally disable the button - it won't be needed again. You can use icons
    # "unlink" or "times" and change the button text to "Unavailable" in case the 
    # api call fails.
    apibutton.icon="check"
    apibutton.disabled=True
    print("Data updated at" + str(datetime.datetime.now()))
    
apibutton=wdg.Button(
    description='Press to Update', # you may want to change this...
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip="Click to download updated data from PHE",
    # FontAwesome names without the `fa-` prefix - try "download"
    icon='download'
)

# remember to register your button callback function with the button
apibutton.on_click(api_button_callback) # the name of your function inside these brackets

display(apibutton)

# run all cells before clicking on this button

Button(description='Press to Update', icon='download', style=ButtonStyle(), tooltip='Click to download updated…

Data updated at2020-11-27 02:24:56.708735


In [46]:
timeseriesdf.to_pickle("timeseriesdf.pkl")

## Graphs and Analysis

Here is the graph of Wales illustrating the current number of cases, hospitalisations and deaths. You can view this graph both linearly and logarithmically.

In [47]:
series=wdg.SelectMultiple(
    options=['cases', 'hospital', 'deaths'],
    value=['cases', 'hospital', 'deaths'],
    rows=3,
    description='Stats:',
    disabled=False
)

scale=wdg.RadioButtons(
    options=['linear', 'log'],
#    value='pineapple', # Defaults to 'pineapple'
#    layout={'width': 'max-content'}, # If the items' names are long
    description='Scale:',
    disabled=False
)

# try replacing HBox with a VBox
controls=wdg.HBox([series, scale])

def timeseries_graph(gcols, gscale):
    if gscale=='linear':
        logscale=False
    else:
        logscale=True
    ncols=len(gcols)
    if ncols>0:
        timeseriesdf[list(gcols)].plot(logy=logscale)
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")

# keep calling timeseries_graph(gcols=value_of_series, gscale=value_of_scale); capture output in variable graph   
graph=wdg.interactive_output(timeseries_graph, {'gcols': series, 'gscale': scale})

display(controls, graph)

HBox(children=(SelectMultiple(description='Stats:', index=(0, 1, 2), options=('cases', 'hospital', 'deaths'), …

Output()

In [56]:
# stack series and scale on top of each other
ctrls=wdg.VBox([series, scale])
# put the graph and the controls side by side
form=wdg.HBox([graph, ctrls])