## Number of weekly confirmed cases of Measles for all ages

In [1]:
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import time
import json

In [2]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

In [3]:
# Load JSON files and store the raw data in jsondata
jsondata={}
#three files previously produced to represent the spread of measles per week in three UKHSA areas (london, East Midlands and East of England)
def file_opener():
    with open("East Midlands.json", "rt") as INFILE:
        jsondata['East Midlands']=json.load(INFILE)
    with open("London.json", "rt") as INFILE:
        jsondata['London']=json.load(INFILE)
    with open("East of England.json", "rt") as INFILE:
        jsondata['East of England']=json.load(INFILE) 
    return

file_opener() #here to run the graphs for the first time

In [4]:
'''Wrangling data section'''


def parse_date(datestring):
    """ Convert a date string into a pandas datetime object """
    return pd.to_datetime(datestring, format="%Y-%m-%d")                #Code taken from 2-Visualising_the_data

def wrangle_data(rawdata):
    """ Parameters: rawdata - data from json file or API call. Returns a dataframe.
    Edit to include the code that wrangles the data, creates the dataframe and fills it in. """
    global df
    data={}
    for dataset in rawdata.values():
        for entry in dataset:
            date=entry['date']
            geography=entry['geography']
            value=entry['metric_value']
            if date not in data:
                data[date]={}
            data[date][geography]=value
    dates=list(data.keys())
    dates.sort()
    #creating and sorting a dictionary with the useful information from the data files (taken and adapted from 2-Visualising_the_data)
    
    startdate=parse_date(dates[0])
    enddate=parse_date(dates[-1])
    index=pd.date_range(startdate, enddate, freq='W-MON')
    #converting dates to pandas type so can be used in Dataframe (taken and adapted from 2-Visualising_the_data)
    #data provided about measles provided weekly on for date starting on monday

    geographies=list(jsondata.keys()) #this is a list of my geographies. it is taken from the keys assigned in file_opener. It allows me to iterate for however many APIs I download (tested with 2 and 3)
    
    df=pd.DataFrame(index=index, columns=geographies)
    #making empty dataframe

    for date, entry in data.items():
        pd_date=parse_date(date)
        for column in geographies: 
            location=column               #code adapted so it's working with location
            value= entry.get(location, 0.0)
            df.loc[date, column]=value    #uses loc to assign value the date and value
    df.fillna(0.0, inplace=True)
    df.infer_objects(copy=False) #this was put in from the future warning
    return
    #code taken and adapted from 2-Visualising_the_data

# putting the wrangling code into a function allows you to call it again after refreshing the data through 
# the API. You should call the function directly on the JSON data when the dashboard starts, by including 
# the call in this cell as below:
wrangle_data(jsondata) # here to run the graphs for the first time on load up

  df.fillna(0.0, inplace=True)


In [5]:
'''Downloading the current data'''



#APIwrapper implemented as a class that is called by function access_API. So that when the information is accessed,
#it is immediately wrapped and via the access_API function it is immediately saved (overwrites)
#the JSON file corresponding to each location

#Code left unchanged, taken from 1- Accessing_UKHSA_Data
class APIwrapper:
    # class variables shared among all instances
    _access_point="https://api.ukhsa-dashboard.data.gov.uk"
    _last_access=0.0 # time of last api access

    def __init__(self, theme, sub_theme, topic, geography_type, geography, metric):
        """ Init the APIwrapper object, constructing the endpoint from the structure
        parameters """
        # build the path with all the required structure parameters. You do not need to edit this line,
        # parameters will be replaced by the actual values when you instantiate an object of the class!
        url_path=(f"/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/" +
                    f"{geography_type}/geographies/{geography}/metrics/{metric}")
        # our starting API endpoint
        self._start_url=APIwrapper._access_point+url_path
        self._filters=None
        self._page_size=-1
        # will contain the number of items
        self.count=None

    def get_page(self, filters={}, page_size=5):
        """ Access the API and download the next page of data. Sets the count
        attribute to the total number of items available for this query. Changing
        filters or page_size will cause get_page to restart from page 1. Rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365); use the default value 
        for debugging your structure and filters. """
        # Check page size is within range
        if page_size>365:
            raise ValueError("Max supported page size is 365")
        # restart from first page if page or filters have changed
        if filters!=self._filters or page_size!=self._page_size:
            self._filters=filters
            self._page_size=page_size
            self._next_url=self._start_url
        # signal the end of data condition
        if self._next_url==None: 
            return [] # we already fetched the last page
        # simple rate limiting to avoid bans
        curr_time=time.time() # Unix time: number of seconds since the Epoch
        deltat=curr_time-APIwrapper._last_access
        if deltat<0.33: # max 3 requests/second
            time.sleep(0.33-deltat)
        APIwrapper._last_access=curr_time
        # build parameter dictionary by removing all the None
        # values from filters and adding page_size
        parameters={x: y for x, y in filters.items() if y!=None}
        parameters['page_size']=page_size
        # the page parameter is already included in _next_url.
        # This is the API access. Response is a dictionary with various keys.
        # the .json() method decodes the response into Python object (dictionaries,
        # lists; 'null' values are translated as None).
        response = requests.get(self._next_url, params=parameters).json()
        # update url so we'll fetch the next page
        self._next_url=response['next']
        self.count=response['count']
        # data are in the nested 'results' list
        return response['results'] 

    def get_all_pages(self, filters={}, page_size=365):
        """ Access the API and download all available data pages of data. Sets the count
        attribute to the total number of items available for this query. API access rate
        limited to three request per second. The page_size parameter sets the number
        of data points in one response page (maximum 365), and controls the trade-off
        between time to load a page and number of pages; the default should work well 
        in most cases. The number of items returned should in any case be equal to 
        the count attribute. """
        data=[] # build up all data here
        while True:
            # use get_page to do the job, including the pacing
            next_page=self.get_page(filters, page_size)
            if next_page==[]:
                break # we are done
            data.extend(next_page)
        return data





def access_api():
    """ Accesses the UKHSA API. Return data as a like-for-like replacement for the "canned" data loaded from the JSON file. """
                           #Fabrizio Code (from lectures)
    structure={"theme": "infectious_disease", 
               "sub_theme": "vaccine_preventable",
               "topic": "Measles",
               "geography_type": "UKHSA%20Region",
               "metric": "measles_cases_casesByOnsetWeek"}
    #structure put in here for convenience, can be pulled out and changed to be a dictionary that can be changed at peoples whims, but requirement is only one graph.

    #Taken from 1-Accessing_UKHSA_Data, adapted to change structure for geographical location instead of for different metrics
    geographies=list(jsondata.keys()) #this is a list of my geographies. it is taken from the keys assigned in file_opener. It allows me to iterate for however many APIs I download (tested with 2 and 3)
    for geography in geographies:
        structure["geography"]= geography
        api=APIwrapper(**structure)
        cases=api.get_all_pages(page_size=365)                     #pulling page_size 365 to pull all the data faster (not necessary, but just runs faster)
        with open(structure["geography"]+'.json', "wt") as OUTF:
            json.dump(cases, OUTF)                                 #overwrites the JSON file with corresponding name when refreshing



In [6]:
#framework taken from original dashboard and adapted.


def api_button_callback(button):
    with output:
        output.clear_output(wait=True)   #This resets my output so that when my graph refreshes, it doesn't just post a new one
        print("Graph Refreshing")        #Put in my output to allow for people to clearly see when the graph is refreshing. 
    try:
        access_api()
        file_opener()
        df=wrangle_data(jsondata)  
        refresh_graph()
        apibutton.icon = "check"
        #apibutton.disabled=True  -- got rid of this, but left in code. Wanted to be able to continuously refresh the graph if person wanted to.
        with output:
            print('Graphs refreshed')
    except Exception as e:
        with output:
            print("Error:",e)  # Error capturing

#Taken from 3- Adding_interactive_controls. Code worked perfectly so left unchanged
apibutton=wdg.Button(
    description='Refresh data',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to download current Public Health England data',
    icon='download' # (FontAwesome names without the `fa-` prefix)
)


#Button is called next to graph, instead of at this box to allow easy access for users.

This graph (Based on UK Government data published by the UK Health Security Agency) shows the number of weekly laboratory confirmed cases of measles, reported to UKHSA, by date of onset of rash (or symptoms), age group, and UKHSA Region from 1 October 2023 onwards.

To interact with this graph you can search through 3 distinct UKHSA areas of the UK to view confirmed measles cases in each area, or click on any combination to show multiple graphs overlayed on each other. 

A refresh button is available, enabling you to update the data directly from the UK Health Security Agency website. If you are highlighting only one graph and refresh, then you will have to swap to another to show the refreshed data.

In [7]:
'''Graph plotting and analysis section'''



def plot_graph(column):                #kept from original dashboard, input had to be a list for multiple plots 
    """ Our sample graph plotting function """
    df[list(column)].plot()
    plt.xlabel("Date")
    plt.ylabel("Number of confirmed measles cases per day")
    plt.show() # important! update won't work properly without this

# a sample widget
column=wdg.SelectMultiple(            #Taken from 3- Adding_interactive_controls and adapted to use the columns of my dataframe as the options and values list
    options=list(df.columns),
    value=list(df.columns),
    rows=len(df.columns),            #Rows = length of the number of columns. i.e in my case 3, but allows for easy access to update with more information. 
    description='Location',
    disabled=False,
)

controls=wdg.HBox([column])


def refresh_graph():
    """ We change the value of the widget in order to force a redraw of the graph;
    this is useful when the data have been updated. This is a bit of a gimmick; it
    needs to be customised for one of your widgets. """
    current=column.value
    if current==column.options[0]:
        other=[column.options[1]]   #Adapted from refresh graph, had to list as a tuple/list
    else:
        other=[column.options[1]]
    column.value=other # forces the redraw
    column.value=current # now we can change it back

In [8]:
apibutton.on_click(api_button_callback) #put here because button is called in this cell,
output = wdg.Output()                   #this is here so that I can print information about the data being refreshed when the button is called
graph=wdg.interactive_output(plot_graph, {'column': column})
display(apibutton,output,column,graph)

Button(description='Refresh data', icon='download', style=ButtonStyle(), tooltip='Click to download current Pu…

Output()

SelectMultiple(description='Location', index=(0, 1, 2), options=('East Midlands', 'London', 'East of England')…

Output()