references

https://coronavirus.data.gov.uk/details/download

https://coronavirus.data.gov.uk/details/developers-guide


In [467]:
# import packages
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from requests import get

In [468]:
def get_data(url):
    # load the data in json format
    json_data = get(url, timeout=10).json()

    # convert to dataframe - we're only actually interested in the body column (which contains dictionaries)
    df = pd.DataFrame(json_data)
    df = df['body']

    # extract the column headings from the keys in the dictionary
    headings = []
    for key in df[0].keys():
        headings.append(key)

    # now create rows from the values in the dictionaries
    data = []
    for entry in df:
        row = []
        for col in entry.items():
            row.append(col[1])
        data.append(row)

    # create a dataframe from the headings and values
    data_df = pd.DataFrame(data)
    data_df.columns = headings

    # ensure data is in ascending order of date
    data_df = data_df.sort_values('date', ascending=True)

    # return dataframe
    return data_df

In [469]:
# delete rows from the end of the dataframe (i.e. most recent date) until reaching a row where the specified field is not null or zero
def clean_data(df, col_name):
    # replace nulls with zeros
    df = df.fillna(0)

    # determine the value of the specified column in the final row (i.e. latest date)
    value = df.iloc[ df.shape[0] - 1, df.columns.get_loc( col_name )]
    
    # while the value in the final row is zero, remove the row
    while value == 0:
        # remove the final row from the dataframe
         df = df[:-1]
         # determine the value in the new final row
         value = df.iloc[ df.shape[0] - 1, df.columns.get_loc( col_name )]
    
    # determine the date corresponding to the first non-zero value encountered
    latest_date = df[df.date == df.date.max()].date.max()

    # return the dataframe and latest date
    return df, latest_date

In [470]:
# Changes the format of a date string from yyyy-mm-dd to dd-mm-yyyy 
def format_date( date ):
    date = date.split("-")
    date.reverse()
    return "-".join(date)  

In [471]:
def get_nations_data():
    # initialise lists
    nations_list = []
    nations_labels = []

    # import data for England
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=nation&areaCode=E92000001&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    # england_df = get_data(endpoint)
    nations_list.append(get_data(endpoint))
    nations_labels.append('England')

    # import data for Wales
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=nation&areaCode=W92000004&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    #wales_df = get_data(endpoint)
    nations_list.append(get_data(endpoint))
    nations_labels.append('Wales')

    # import data for Scotland
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=nation&areaCode=S92000003&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    #scotland_df = get_data(endpoint)
    nations_list.append(get_data(endpoint))
    nations_labels.append('Scotland')

    # import data for Northern Ireland
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=nation&areaCode=N92000002&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    #northerireland_df = get_data(endpoint)
    nations_list.append(get_data(endpoint))
    nations_labels.append('Northern Ireland')

    return nations_list, nations_labels

In [472]:
def get_regions_data():
    # initialise lists
    regions_list = []
    regions_labels = []

    # import data for West Midlands
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000005&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('West Midlands')

    # import data for East Midlands
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000004&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('East Midlands')

    # import data for Yorkshire and the Humber
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000003&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('Yorkshire and the Humber')

    # import data for East of England
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000006&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('East of England')

    # import data for London
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000007&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('London')

    # import data for North East
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000001&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('North East')

    # import data for North West
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000002&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('North West')

    # import data for South East
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000008&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('South East')

    # import data for South West
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&areaCode=E12000009&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    regions_list.append(get_data(endpoint))
    regions_labels.append('South West')

    return regions_list, regions_labels

In [473]:
def get_local_authority_data():
    # initialise lists
    locauth_list = []
    locauth_labels = []

    # import data for Birmingham
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=utla&areaCode=E08000025&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    locauth_list.append(get_data(endpoint))
    locauth_labels.append('Birmingham')

    # import data for Herefordshire
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=utla&areaCode=E06000019&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    locauth_list.append(get_data(endpoint))
    locauth_labels.append('Herefordshire')

    # import data for Sheffield
    endpoint = 'https://api.coronavirus.data.gov.uk/v2/data?areaType=utla&areaCode=E08000019&metric=cumDeaths28DaysByDeathDate&metric=cumDeaths28DaysByDeathDateRate&metric=newDeaths28DaysByDeathDateRollingRate&metric=newDeaths28DaysByDeathDateRollingSum&format=json'
    locauth_list.append(get_data(endpoint))
    locauth_labels.append('Sheffield')

    return locauth_list, locauth_labels

In [474]:
def clean_combined_data( df_list, col_name ):
    # initialise the max date and return list
    df = df_list[0]
    max_date = df[df.date == df.date.min()].date.min()
    clean_list = []
    # clean each dataframe in the specified list, and determine the overall max date
    for df in df_list:
        df, max_df_date = clean_data(df, col_name)
        clean_list.append(df)
        if max_df_date > max_date:
            max_date = max_df_date
    max_date = format_date(max_date)
    return clean_list, max_date

In [475]:
# plot line on graph
def plot_line(fig, df, data_field, label):
    x_data = np.asarray(df['date'])
    y_data = np.asarray(df[data_field])                 
    if len(x_data) > 0:
        fig.add_trace(go.Scatter(x=x_data, y=y_data, mode="lines", name=label, connectgaps=True))  

In [476]:
# plot graph
def plot_graph(df_list, labels, title, col_name, y_label):
    fig = go.Figure()
    i = 0
    for df in df_list:
        plot_line(fig, df, col_name, labels[i])
        i += 1    
    fig.update_layout(title=graph_title, xaxis_title="Date", yaxis_title=y_label, width = 1000)
    fig.show()

In [477]:
# restrict data to second wave i.e. deaths from 1 Sep 2020 onwards
def second_wave(df):
    df = df[df.date >= '2020-09-01']
    return df

In [478]:
# get the nations data
nations_list, nations_labels = get_nations_data()

# remove rows from the end (i.e. latest date) where the column we're plotting is null or zero
col_name = 'newDeaths28DaysByDeathDateRollingRate'
nations_list, max_nations_date = clean_combined_data(nations_list, col_name)

In [479]:
# get the regions data
regions_list, regions_labels = get_regions_data()

# remove rows from the end (i.e. latest date) where the column we're plotting is null or zero
col_name = 'newDeaths28DaysByDeathDateRollingRate'
regions_list, max_regions_date = clean_combined_data(regions_list, col_name)

# restrict data to second wave
second_wave_list = []
for df in regions_list:
    second_wave_list.append(second_wave(df))

In [480]:
# get the local authorities data
locauth_list, locauth_labels = get_local_authority_data()

# remove rows from the end (i.e. latest date) where the column we're plotting is null or zero
col_name = 'newDeaths28DaysByDeathDateRollingRate'
locauth_list, max_locauth_date = clean_combined_data(locauth_list, col_name)

In [481]:
# plot nations data
graph_title = 'New Deaths per 100,000 within 28 Days of Positive Test to ' + max_nations_date
col_name = 'newDeaths28DaysByDeathDateRollingRate'
y_label = 'Rolling Rate'
plot_graph( nations_list, nations_labels, graph_title, col_name, y_label)

In [482]:
# plot regional data for second wave
col_name = 'newDeaths28DaysByDeathDateRollingRate'
graph_title = 'New Deaths per 100,000 within 28 Days of Positive Test, to ' + max_regions_date
y_label = 'Rolling Rate'
plot_graph( second_wave_list, regions_labels, graph_title, col_name, y_label)

In [483]:
# plot local authority data
graph_title = 'New Deaths per 100,000 within 28 Days of Positive Test to ' + max_nations_date
col_name = 'newDeaths28DaysByDeathDateRollingRate'
y_label = 'Rolling Rate'
plot_graph( locauth_list, locauth_labels, graph_title, col_name, y_label)

In [484]:
# restrict local authority data to second wave
second_wave_locauth_list = []
for df in locauth_list:
    second_wave_locauth_list.append(second_wave(df))

# plot local authority second wave data
graph_title = 'New Deaths per 100,000 within 28 Days of Positive Test to ' + max_nations_date
col_name = 'newDeaths28DaysByDeathDateRollingRate'
y_label = 'Rolling Rate'
plot_graph( second_wave_locauth_list, locauth_labels, graph_title, col_name, y_label)