# England vs Scotland

Comparing vaccination rates in age groups as a proportion of the population. The difference column highlights the greater proportion that Scotland has vaccinated than England.


In [1]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from uk_covid19 import Cov19API

ModuleNotFoundError: No module named 'uk_covid19'

In [None]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

In [None]:
# Loading the JSON files of data to create default values for the graph.
jsondata={}
with open("eng.json", "rt") as IFILE:
    eng=json.load(IFILE)
engjson = eng['data'][0]
with open("scot.json", "rt") as IFILE:
    scot=json.load(IFILE)
scotjson = scot['data'][0]

In [None]:
def wrangle_data(datascot,dataeng):
    """ Parameters: rawdata - data from json file or API call. Returns a dataframe.
    Edit to include the code that wrangles the data, creates the dataframe and fills it in. """
    def add_dicts(vaccined,ages):#function to manipulate England's age ranges to align with Scotland
        agel = ages[:] #first getting a new key from the list of ages, biggest and smallest value in range
        agel = [x.replace('_',' ').replace('+',' ') for x in agel]
        agel = ' '.join(agel)
        agel = [int(i) for i in agel.split()]
        agel.sort()
        if agel[-1]!=90:
            newval = f'{agel[0]}_{agel[-1]}' 
        else:
            newval = f'{agel[0]}+'
        newd = {k:0 for k in vaccined[0]} #create a new England dictionary with merged values
        for d in vaccined:
            for key,value in d.items():
                d[key]=0 if d[key] is None else d[key]
            if d['age'] in ages:
                for key, value in newd.items():
                    if key =='age':
                        newd['age']=newval
                    else:
                        value = float(d[key])
                        newd[key]+=value
                i = vaccined.index(d)
                vaccined.pop(i)
                if newd not in vaccined:
                    vaccined[i]=newd
                else:
                    vaccined
        return vaccined
    agesscot=[x['age'] for x in datascot['vaccinated']]
    ageseng=[x['age'] for x in dataeng['vaccinated']]
    difference = [x for x in ageseng if x not in agesscot] #finding the ages that are not in common, i.e. blank in dataframe
    vaccineseng = dataeng['vaccinated'].copy() #getting the list of dictionaries, one dictionary of data for each age group 
    ages1 = ['18_24','25_29'] #extracting these lists of ages to combine from the difference list
    ages2 = ['30_34','35_39']
    ages3 = ['40_44','45_49']
    ages4 = ['80_84','85_89','90+']
    sortedeng = add_dicts(vaccineseng,ages1)
    sortedeng = add_dicts(sortedeng,ages2)
    sortedeng = add_dicts(sortedeng,ages3)
    sortedeng = add_dicts(sortedeng,ages4)
    def min_age(agerange):
        agerange=agerange.replace('+','') # remove the + from 90+
        start=agerange.split('_')[0]
        return int(start)
    agesscot.sort(key=min_age)
    nations =''
    for v in dataeng:
        if dataeng['nation'] not in nations:
            nations+=' '+dataeng['nation']
    for v in datascot:
        if datascot['nation'] not in nations:
            nations+=' '+datascot['nation']
    nations = nations.split()
    age_df=pd.DataFrame(index=agesscot, columns=nations+['Difference'])#adding in a difference column to show comparison
    for entry in datascot['vaccinated']: #iterating over dictionaries, each age band is a dictionary
        ageband=entry['age'] 
        age_df.loc[ageband, 'Scotland']=entry['cumVaccinationCompleteCoverageByVaccinationDatePercentage']
    for entry in sortedeng: 
        ageband = entry['age']
        if ageband in agesscot: 
            percent = round((entry['cumPeopleVaccinatedCompleteByVaccinationDate']/entry['VaccineRegisterPopulationByVaccinationDate'])*100,1)#calculating this field as manipulation function above makes it unreliable
            age_df.loc[ageband, 'England']= percent
    age_df['Difference']=age_df['Scotland']-age_df['England']
    return age_df

df = wrangle_data(scotjson,engjson) # wrangling the JSON data to ensure there is data at dashboard startup

## Download current data

Click the 'refresh data' button below to retrieve the most up-to-date data from the PHE API. 

In [None]:
"""Callback functions to be called by refresh data button. Two API calls to retrieve data for England and Scotland"""
def access_eng(): #england data
    filters1 = [
        'areaType=nation',
        'areaName=england'
    ]
    structure1 = {
        'nation':'areaName',
        "vaccinated":"vaccinationsAgeDemographics"
    }
    apiE = Cov19API(filters=filters1, structure=structure1)
    england = apiE.get_json()
    apiE = england['data'][0] #formatting the same as JSON files
    apibutton.icon="check"
    apibutton.disabled=True
    return apiE # return data read from the API
def access_scot():
    filters2 = [
    'areaType=nation',
    'areaName=scotland'
    ]
    structure2 = {
    'nation':'areaName',
    "vaccinated":"vaccinationsAgeDemographics"
    }
    apiS = Cov19API(filters=filters2, structure=structure2)
    scotland = apiS.get_json()
    apiS = scotland['data'][0] #formatting the same as JSON files
    apibutton.icon="check"
    apibutton.disabled=True
    return apiS # return data read from the API


In [None]:
# Printout from this function will be lost in Voila unless captured in an
# output widget - therefore, we give feedback to the user by changing the 
# appearance of the button
def api_button_callback(button):
    """ Button callback - takes the button as its parameter, calls the functions to access the API for Scotland and England data,
    wrangles the data and updates the dataframe for plotting """
    # Get fresh data from the API. If you have time, include some error handling
    # around this call.
    apiscot=access_scot()
    apieng= access_eng()
    # wrangle the data and overwrite the dataframe for plotting
    global df
    df=wrangle_data(apiscot,apieng)
    # this function simulates the interaction with widget to refresh the graph.
    refresh_graph()
    #changing the button to 'check' and disabling the button to avoid too many API calls by end user. 
    apibutton.icon="check"
    apibutton.disabled=True

    
apibutton=wdg.Button(
    description='Refresh data',
    disabled=False,
    button_style='', 
    tooltip='Click to download current Public Health England data',
    icon='download' # (FontAwesome names without the `fa-` prefix)
)

apibutton.on_click(api_button_callback) # registers the button callback function with the button

display(apibutton)


Button(description='Refresh data', icon='download', style=ButtonStyle(), tooltip='Click to download current Pu…

Include at least one graph with interactive controls, as well as some instructions for the user and/or comments on what the graph represents and how it should be explored (this example shows two random walks)

In [None]:
def age_graph(graphcolumns):
    # our callback function.
    ncols=len(graphcolumns)
    if ncols>0:
        df.plot(kind='bar', y=list(graphcolumns)) # changing graphcolumns tuple to a list
        plt.show() 
    else:
        # if the user has not selected any column, print a message instead
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")

agecols=wdg.SelectMultiple(
    options=['England','Scotland','Difference'], # options available
    value=['England','Scotland','Difference'], # default value
    rows=3, # rows of the selection box
    description='Nation',
    disabled=False
)

def refresh_graph():
    """ We change the value of the widget in order to force a redraw of the graph;
    this is useful when the data have been updated. This is a bit of a gimmick; it
    needs to be customised for one of your widgets. """
    current=agecols.value #value is a tuple
    if current==(agecols.options[0],):
        other=(agecols.options[1],)
    else:
        other=(agecols.options[0],)
    agecols.value=other # forces the redraw
    agecols.value=current # now we can change it back
    
# keep calling age_graph(graphcolumns=value_of_agecols); capture output in widget output    
output=wdg.interactive_output(age_graph, {'graphcolumns': agecols})

display(agecols, output)

SelectMultiple(description='Nation', index=(0, 1, 2), options=('England', 'Scotland', 'Difference'), rows=3, v…

Output()

**Author and Copyright Notice** Ellen Fitzgerald, 2022. Data taken from: *Based on UK Government [data](https://coronavirus.data.gov.uk/) published by [Public Health England](https://www.gov.uk/government/organisations/public-health-england).*
