In [1]:
# Setup

In [2]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import time
import json

In [3]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

In [4]:
# Loading Static Data

In [5]:
jsondata={}

# Loading region data
jsondata["region"] = {}
with open("location_data.json", "rt") as f:
    jsondata["region"] = json.load(f)

# Loading age data
jsondata["age"] = {}
with open("age_data.json", "rt") as f:
    jsondata["age"] = json.load(f)


In [6]:
# Updating Data Logic

In [7]:
# Copied the APIwrapper code verbatim 
class APIwrapper:
    # class variables shared among all instances
    _access_point="https://api.ukhsa-dashboard.data.gov.uk"
    _last_access=0.0 # time of last api access
    
    def __init__(self, theme, sub_theme, topic, geography_type, geography, metric):
        """ Init the APIwrapper object, constructing the endpoint from the structure
        parameters """
        # build the path with all the required structure parameters. You do not need to edit this line,
        # parameters will be replaced by the actual values when you instantiate an object of the class!
        url_path=(f"/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/" +
                  f"{geography_type}/geographies/{geography}/metrics/{metric}")
        # our starting API endpoint
        self._start_url=APIwrapper._access_point+url_path
        self._filters=None
        self._page_size=-1
        # will contain the number of items
        self.count=None

    def get_page(self, filters={}, page_size=5):
        """ Access the API and download the next page of data. Sets the count
        attribute to the total number of items available for this query. Changing
        filters or page_size will cause get_page to restart from page 1. Rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365); use the default value 
        for debugging your structure and filters. """
        # Check page size is within range
        if page_size>365:
            raise ValueError("Max supported page size is 365")
        # restart from first page if page or filters have changed
        if filters!=self._filters or page_size!=self._page_size:
            self._filters=filters
            self._page_size=page_size
            self._next_url=self._start_url
        # signal the end of data condition
        if self._next_url==None: 
            return [] # we already fetched the last page
        # simple rate limiting to avoid bans
        curr_time=time.time() # Unix time: number of seconds since the Epoch
        deltat=curr_time-APIwrapper._last_access
        if deltat<0.33: # max 3 requests/second
            time.sleep(0.33-deltat)
        APIwrapper._last_access=curr_time
        # build parameter dictionary by removing all the None
        # values from filters and adding page_size
        parameters={x: y for x, y in filters.items() if y!=None}
        parameters['page_size']=page_size
        # the page parameter is already included in _next_url.
        # This is the API access. Response is a dictionary with various keys.
        # the .json() method decodes the response into Python object (dictionaries,
        # lists; 'null' values are translated as None).
        response = requests.get(self._next_url, params=parameters).json()
        # update url so we'll fetch the next page
        self._next_url=response['next']
        self.count=response['count']
        # data are in the nested 'results' list
        return response['results'] 

    def get_all_pages(self, filters={}, page_size=365):
        """ Access the API and download all available data pages of data. Sets the count
        attribute to the total number of items available for this query. API access rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365), and controls the trade-off
        between time to load a page and number of pages; the default should work well 
        in most cases. The number of items returned should in any case be equal to 
        the count attribute. """
        data=[] # build up all data here
        while True:
            # use get_page to do the job, including the pacing
            next_page=self.get_page(filters, page_size)
            if next_page==[]:
                break # we are done
            data.extend(next_page)
        return data

In [8]:
structure={"theme": "infectious_disease",
           "sub_theme": "vaccine_preventable",
           "topic": "Measles",
           "geography_type": None, 
           "geography": None,
           "metric": "measles_cases_casesByOnsetWeek"
          }

filters={"stratum" : None, 
         "age": None,
         "sex": None,
         "year": 2024,
         "month": None,
         "epiweek" :None,
         "date" : None,
         "in_reporting_delay_period": None
        }

In [9]:
def update_geo():
    """Requests and returns the new regional data form the API based on a pre-determined list of locations"""
    
    structure["geography_type"] = "UKHSA Region"
    location_list = ["London", "West Midlands", "Yorkshire and Humber"]

    data = {}

    # Pre-processing the data: organising it by location. The for loop makes a separate request for each location.
    for location in location_list:
        structure["geography"] = location
        api=APIwrapper(**structure)
        temp = api.get_all_pages(filters)
        data[location] = temp
        
    return data

In [10]:
def update_age():
    """Requests and returns the new age data form the API"""
    
    structure["geography_type"] = "Nation"
    structure["geography"] = "England"
    api=APIwrapper(**structure)
    data = api.get_all_pages(filters)
    
    # data = {}

    # # Pre-processing the data: organising it by age.
    # for item in temp:
    #     if item['age'] not in data:
    #         data[item['age']] = []
    #     data[item['age']].append(item)

    return data

In [11]:
def update_all():
    """Calls both the update functions. Returns the combined data as a dictionary"""
    all_data = {}
    all_data['region'] = update_geo()
    all_data['age'] = update_age()
    return all_data

In [12]:
# Data Wrangling

In [13]:
def fillDataDate(storage, dataset, key_name):
    """Creates the outline for the data frame as a nested dictionary. The outer dictionary is organised by date. The inner dictionary binds the provided dataset with the user-specified key name"""
    for entry in dataset:
        date = entry['date']
        key = key_name
        value = entry['metric_value']
        if date not in storage:
            storage[date] = {}
        storage[date][key] = value

In [14]:
# Copied the parse_date code verbatim 
def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")

In [15]:
def wrangle_region_data(rawdata):
    """ Parameters: rawdata - data from json file or API call. Returns a dataframe.
    Edit to include the code that wrangles the data, creates the dataframe and fills it in. """
    
    # Filling data dictionary with the dates available in the dataset
    data = {}
    fillDataDate(data, rawdata["region"]["London"], 'London')
    fillDataDate(data, rawdata["region"]["Yorkshire and Humber"], 'Yorkshire and Humber')
    fillDataDate(data, rawdata["region"]["West Midlands"], 'West Midlands')

    # Storing a list of sorted dates
    dates = sorted(list(data.keys()))

    # I have  offest the start date by -1 day (2023-12-31) to capture the first day of the year, to account for the way Pandas handles date ranges.
    startdate = parse_date(dates[0]) - pd.Timedelta(pd.offsets.Day(1))
    enddate = parse_date(dates[-1])

    # Creating the outline for the data frame
    # To capture the data, I have set the frequencey to 'W' (weekly) with an offset of 1 day.
    index = pd.date_range(startdate, enddate, freq='W')
    locationseriesdf = pd.DataFrame(index=index + pd.Timedelta(pd.offsets.Day(1)) , columns=['London', 'Yorkshire & Humber', 'West Midlands']) 

    # Flling in the data series
    locations = {'London': 'London',
               'Yorkshire & Humber': 'Yorkshire and Humber', 
               'West Midlands': 'West Midlands'
              }
    
    for date, entry in data.items():
        pd_date = parse_date(date) 
        for column in ['London', 'Yorkshire & Humber', 'West Midlands']:  
            location_name = locations[column]
            value = entry.get(location_name, 0.0)
            locationseriesdf.loc[date, column] = value
                
    # Filling in any remaining "holes" due to missing dates
    locationseriesdf.fillna(0.0, inplace=True)
    
    return locationseriesdf


location_df = wrangle_region_data(jsondata)

In [16]:
def wrangle_age_data(rawdata):
    """ Parameters: rawdata - data from json file or API call. Returns a dataframe.
    Edit to include the code that wrangles the data, creates the dataframe and fills it in. """

    temp = {}
    # Pre-processing the data: organising it by age.
    for item in rawdata['age']:
        if item['age'] not in temp:
            temp[item['age']] = []
        temp[item['age']].append(item)

    # filling data
    data = {}
    fillDataDate(data, temp["00-01"], '00-01')
    fillDataDate(data, temp["01-04"], '01-04')
    fillDataDate(data, temp["05-10"], '05-10')
    fillDataDate(data, temp["11-14"], '11-14')
    fillDataDate(data, temp["15-24"], '15-24')
    fillDataDate(data, temp["25-34"], '25-34')
    fillDataDate(data, temp["35+"], '35+')

    # Storing a list of sorted dates
    dates = sorted(list(data.keys()))

    # I have  offest the start date by -1 day (2023-12-31) to capture the first day of the year, to account for the way Pandas handles date ranges.
    startdate = parse_date(dates[0]) - pd.Timedelta(pd.offsets.Day(1))
    enddate = parse_date(dates[-1])

    # Creating the outline for the data frame
    # To capture the data, I have set the frequencey to 'W' (weekly) with an offset of 1 day.
    index = pd.date_range(startdate, enddate, freq='W')
    ageseriesdf = pd.DataFrame(index=index + pd.Timedelta(pd.offsets.Day(1)) , columns=['00-01', '01-04', '05-10', '11-14', '15-24', '25-34', '35+'])  

    # Flling in the data series
    ages = {'00-01': '00-01',
           '01-04': '01-04', 
           '05-10': '05-10',
           '11-14': '11-14',
           '15-24': '15-24',
           '25-34': '25-34',
           '35+': '35+'
          }

    for date, entry in data.items():
        pd_date = parse_date(date)
        for column in ['00-01', '01-04', '05-10', '11-14', '15-24', '25-34', '35+']:  
            age_name = ages[column]
            value = entry.get(age_name, 0.0)
            ageseriesdf.loc[date, column] = value

    # Filling in any remaining "holes" due to missing dates
    ageseriesdf.fillna(0.0, inplace=True)
    
    return ageseriesdf


age_df = wrangle_age_data(jsondata)

## Dashboard Description:

##### This dashboard provides an interactive view of measles cases in England throughout 2024. Users can 
##### explore the monthly age distribution of cases and observe case trends across different regions. The
##### data is sourced from the UK Health Security Agency (UKHSA): https://ukhsa-dashboard.data.gov.uk/

## Download Current Data

In [17]:
def access_api():
    """ Accesses the UKHSA API. Return data as a like-for-like replacement for the "canned" data loaded from the JSON file. """
    all_data = update_all()
    return all_data

In [18]:
def api_button_callback(button):
    """ Button callback - it must take the button as its parameter (unused in this case).
    Accesses API, wrangles data, updates global variable df used for plotting. """

    # Error handling implementation
    try:
        # Request data
        apidata = access_api()
        
        # Wrangling and overwriting the data
        global location_df
        global age_df
        location_df = wrangle_region_data(apidata)
        age_df = wrangle_age_data(apidata)

        # Updating the graph
        refresh_graph()
        apibutton.icon = 'check'
        
    # If the request fails, then disable button and display error message
    except Exception as e:
        apibutton.icon = 'unlink'
        apibutton.disabled = True
        print(f'Could not refresh the data. Error message: {e}')

# Styling the button
apibutton = wdg.Button(
    description = 'Refresh Data',
    disabled = False,
    button_style = 'success', 
    tooltip = 'Refresh Data',
    icon = 'sync'
)

# Adding event listner and displaying the button
apibutton.on_click(api_button_callback)
display(apibutton)

Button(button_style='success', description='Refresh Data', icon='sync', style=ButtonStyle(), tooltip='Refresh …

## Graph 1: Measles Distribution across Various Age Groups in England (2024)
### Explanation:
##### - The X-axis displays the different age groups.
##### - The Y-axis displays the number of measles cases in each age group for a given month. 
##### - Each bar represents the total number of cases in that age group.
### Functionality:
##### - The dropdown allows users to select a month view of the chosen data.
##### - Months are arranged in their natural order: 1 = January through 12 = December.
##### - The reset button below the bar chart restores the view to display the data for January.

In [19]:
def refresh_graph():
    """Rerenders the graphs, using their current state. E.g.: if bar chart displays data for February 2024, it will continue doing so, but with fresh data."""
    draw_age_chart(month.value)
    draw_line_graph(series.value, scale.value)

In [20]:
months = [
    "January", "February", "March", "April", "May", "June",
    "July", "August", "September", "October", "November", "December"
]

month = wdg.Dropdown(
    options = [x for x in range(1, 13)], # Option to choose which month
    value = 1, # Initial value: January
    rows = 12, # Rows of the selection box
    description = 'Month',
    disabled = False
)

def draw_age_chart(graphmonth):    
    monthdf = age_df[age_df.index.month == graphmonth]
    totals = monthdf.sum(axis = 0) # over the columns
    
    # Plotting the chart
    ax = totals.plot(kind = 'bar', stacked = False, cmap = 'tab20')
    ax.set_title(f'Distribution of Measles Cases in England ({months[month.value-1]}, 2024)'); # Dynamically printing the month of the graph
    plt.show()  

age_chart = wdg.interactive_output(draw_age_chart, {'graphmonth': month})

display(month, age_chart)

Dropdown(description='Month', options=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), value=1)

Output()

In [21]:
def reset_age_chart():
    """Resets the chart to its default view. Note: it does not refresh the data. This functionality was implemented out of curiosity."""
    month.value = 1
    draw_age_chart(month.value)

In [22]:
def resetButton1Callback(button):
    reset_age_chart()

In [23]:
resetButton1 = wdg.Button(
    description = 'Reset View',
    disabled = False,
    button_style = 'success', 
    tooltip = "Reset age chart",
    icon = 'sync'
)

resetButton1.on_click(resetButton1Callback)

display(resetButton1)

Button(button_style='success', description='Reset View', icon='sync', style=ButtonStyle(), tooltip='Reset age …

In [24]:
# Same steps are repeated for Graph 2

## Graph 2: Number of Measles Cases in 2024 across Three Regions in England 
### Explanation:
##### - The X-axis displays the timeline from January to December 2024.
##### - The Y-axis displays the number of cases reported each week.
##### - The legend shows the available locations (London, West Midlands, Yorkshire and Humber).
##### - The plot itself shows how the number of reported cases changes across the year for the chosen region.
### Functionality:
##### - The menu allows users to select which regional data to display.
##### - The scale selector allows users to display data either on a 'linear' scale to see absolute changes or 'log' scale to see relative changes.
##### - The reset button below the bar chart restores the default view: data of all regions on a 'linear' scale.

In [25]:
series=wdg.SelectMultiple(
    options = ['London', 'Yorkshire & Humber', 'West Midlands'],
    value = ['London', 'Yorkshire & Humber', 'West Midlands'],
    rows = 3,
    description = 'Stats:',
    disabled = False
)

scale=wdg.RadioButtons(
    options = ['linear', 'log'],
    layout = {'width': 'max-content'}, 
    description = 'Scale:',
    disabled = False
)

controls = wdg.HBox([series, scale])

def draw_line_graph(gcols, gscale):
    if gscale == 'linear':
        logscale = False
    else:
        logscale = True
    ncols = len(gcols)
    if ncols > 0:
        ax = location_df[list(gcols)].plot(logy=logscale)
        ax.set_title('Measles Cases by Region (2024)')
        plt.show()
    else:
        print("Click to select data for graph")
        print("(CTRL-Click to select more than one category)")

line_graph = wdg.interactive_output(draw_line_graph, {'gcols': series, 'gscale': scale})

display(controls, line_graph)

HBox(children=(SelectMultiple(description='Stats:', index=(0, 1, 2), options=('London', 'Yorkshire & Humber', …

Output()

In [26]:
def reset_loc_graph():
    """Resets the graph to its default view. Note: it does not refresh the data. This functionality was implemented out of curiosity."""
    series.value = ['London', 'Yorkshire & Humber', 'West Midlands']
    scale.value = 'linear'
    draw_line_graph(series.value, scale.value)

In [27]:
def resetButton2Callback(button):
    reset_loc_graph()

In [28]:
resetButton2 = wdg.Button(
    description = 'Reset View',
    disabled = False,
    button_style = 'success', 
    tooltip = "Reset location graph",
    icon = 'sync'
)

resetButton2.on_click(resetButton2Callback)

display(resetButton2)

Button(button_style='success', description='Reset View', icon='sync', style=ButtonStyle(), tooltip='Reset loca…