In [1]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API

In [2]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

# New Cases & Testing in the COVID-19 Pandemic

This site makes use of data from Public Health England (PHE) in relation to the COVID-19 pandemic, specifically data relating to testing. The data covers the period 03-01-2020 to 29-11-2023 but may be refreshed to reflect the most recent data. On this time interval, we track the number of new cases, and the number of both new LFD and PCR test results for each day.

In [3]:
# Load JSON files and store the raw data in some variable. Edit as appropriate
with open("test_cases.json", "rt") as INFILE:
    jsondata = json.load(INFILE)

In [4]:
def wrangle_data(rawdata):
    rawlist = rawdata['data'] # isolate list of actual data
    dates = [dict['date'] for dict in rawlist] # creates a list of all date entries in the data
    dates.sort() # sorts dates chronologically
    startdate = pd.to_datetime(dates[0], format = "%Y-%m-%d")
    enddate = pd.to_datetime(dates[-1], format = "%Y-%m-%d")
    index = pd.date_range(startdate, enddate, freq = 'D') # this returns a more reliable series of dates
    columns = list(rawlist[0].keys())[1:] # the column names correspond to the keys of each dictionary entry (except date)

    df = pd.DataFrame(index = index, columns = columns) # creation of the dataframe

    for entry in rawlist:
        date = pd.to_datetime(entry['date'], format = "%Y-%m-%d")
        for column in columns:
            # each date-column datapoint is assigned value 0 if no is data available, 
            # otherwise we assign it the entry value.
            if pd.isna(df.loc[date,column]): # if no value for that date-column entry (all NaN so far):
                value = float(entry[column]) if entry[column] != None else 0.0 # 'value' is set to the corresponding entry from the data
                df.loc[date, column] = value # set that corresponding entry to 'value'
    df.fillna(0.0, inplace = True) # fills any remaining gaps as a result of missing dates in the data

    return df

df = wrangle_data(jsondata)

In [5]:
# API access code. Do not call this function directly; it will be called by the button callback.

def access_api():
    filt = ['areaType=nation', 'areaName=England']
    struct = {'date': 'date',
          'cases': 'newCasesBySpecimenDate',
          'LFDs': 'newLFDTestsBySpecimenDate',
          'PCRs': 'newPCRTestsBySpecimenDate',
         }
    api = Cov19API(filters=filt, structure=struct) # accesses exactly the same data from PHE as was done for original json.
    apidata = api.get_json()

    return apidata # return data read from the API

Click the 'Fetch Data' button to retrieve the latest data from PHE. This will automatically refresh the graph.
Choose which data to display and whether to represent it on a linear or logarithmic scale.

In [6]:
# Printout from this function will be lost in Voila unless captured in an
# output widget - therefore, we give feedback to the user by changing the 
# appearance of the button
def api_button_callback(button):
    """ Button callback - it must take the button as its parameter (unused in this case).
    Accesses API, wrangles data, updates global variable df used for plotting. """
    # Get fresh data from the API.
    try:
        apidata = access_api()
        # wrangle the data and overwrite the dataframe for plotting
        global df
        df = wrangle_data(apidata) # set dataframe to new API data
        refresh_graph() # function defined later - refreshes data plotted on the graph
        apibutton.icon = "check" # lets user know the data has been updated
        apibutton.disabled = False # allows user to refresh data multiple times
    except:
        apibutton.description = "Unavailable" # lets user know the data could not be updated
        apibutton.icon = "unlink"
    
apibutton = wdg.Button(
    description = 'Fetch Data',
    disabled = False,
    button_style = 'primary',
    tooltip = "Fetch updated data from PHE",
    icon = "refresh"
)

# remember to register your button callback function with the button
apibutton.on_click(api_button_callback) # the name of your function inside these brackets

display(apibutton)

# run all cells before clicking on this button

Button(button_style='primary', description='Fetch Data', icon='refresh', style=ButtonStyle(), tooltip='Fetch u…

In [7]:
tests_series = wdg.SelectMultiple(
    # this defines which columns are being shown, as chosen by the user
    options = ['cases', 'LFDs', 'PCRs'], # data to choose from
    value = ['cases', 'LFDs', 'PCRs'],
    rows = 3,
    description = 'Data:',
    disabled = False # so that user can keep changing their choices
)

tests_scale = wdg.RadioButtons(
    # this allows the user to choose whether to represent the data on a linear or logarithmic scale
    options = ['linear', 'log'],
    description = 'Scale:',
    disabled = False
)

tests_controls = wdg.HBox([tests_series, tests_scale])

def tests_graph(cols, scale):
    if scale == 'linear':
        logscale = False
    else:
        logscale = True # updates graph to reflect user's choice of scale
    ncols = len(cols)
    if ncols == 0: # i.e. if no data has been selected to be visualised by the user
        print("Click to select data for graph")
        print("(cmd/CTRL-Click to select more than one category)")
    else: # i.e. if data has been selected by the user
        # plot the graph according to the data chosen for visualisation by the user:
        df[list(cols)].plot.line(logy = logscale, color={"cases": "mediumseagreen", "LFDs": "cornflowerblue", "PCRs": "mediumpurple"})
        # (added pretty colours)
        plt.xlabel("Time")
        plt.ylabel("Amount")
        plt.legend() # needed to reflect colour changes
        plt.show()

def refresh_graph():
    """ We change the value of the widget in order to force a redraw of the graph;
    this is useful when the data have been updated. """
    current = tests_series.value 
    if current == tests_series.options[0]:
        other = [tests_series.options[1], tests_series.options[2]]
    if current == tests_series.options[1]:
        other = [tests_series.options[0], tests_series.options[2]]
    else:
        other = [tests_series.options[0], tests_series.options[1]]
    tests_series.value = other # forces the redraw
    tests_series.value = current # now we can change it back

output = wdg.interactive_output(tests_graph, {'cols' : tests_series, 'scale' : tests_scale})

display(tests_controls, output)

HBox(children=(SelectMultiple(description='Data:', index=(0, 1, 2), options=('cases', 'LFDs', 'PCRs'), rows=3,…

Output()

As we would expect, the graph shows a rapid increase in the number of PCR test results and subsequently of new cases over the course of the first few months of 2020 at the outbreak of the pandemic. Following this, the number of new cases varies with seasonal change, showing a general decrease in the summer and an increase in the winter months (e.g. an increase of over 2 orders of magnitude between May 2020 and January 2021), which is typical for the propagation of viruses. However the number of new test results varies much less and remains relatively stable until approximately January 2022, where the number of new cases and new test results all generally decrease. This could therefore indicate the reliability of the tests, as the number of positive results appears to vary more or less independently of the number of tests taken.