In [3]:
#Loading all necessary Libraries for analysis

import pprint as pp
import json
import json
import os
import pprint
import plotly.express as px
from itertools import chain
import pandas as pd
import requests
import pprint
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from datetime import datetime


In [4]:
# starting code that loads data, run this cell, but no need to understand it in depth
# this function just loads the data from files
def load_json_file_named(file_name):
    try: 
        loaded_data = []
        file_location = f"{file_name}"
        with open(file_location, 'r') as file: # or f"data/{file_name}" depending on your files
            loaded_data =  json.load(file)
    except OSError as e:
        print(f"Error. Does the file exist in this folder? {file_location}\n\n {e}")
    return loaded_data

Loading all Json Datasets (We restrict our analysis to the first two datasets)

In [5]:
boards_info = load_json_file_named('nhs_scotland_boards.json')
covid_days = load_json_file_named('covid_records_scotland.json')
cancer_waiting_times = load_json_file_named('cancer_waiting_times.json')
boards_code_lookup = load_json_file_named('boards_code_lookup.json')

The below function aims to exploit all values in a nested dictionary, thereby summarising it into nested key/value structures and consequently giving us a list type inference.

In [6]:
def extract_all_values(dict_temp, dimension):
    value_list_def = list(value for key, value in dict_temp.items())
    key_list_def = list(key for key, value in dict_temp.items())
    result = key_list_def if dimension == "key" else value_list_def
    return result

We aim to compress values to lists based on dictionary dimensions using the defined function

In [7]:
# Budget over 3 years for all 14 NHS zones.
budget_count = list(chain(*list(extract_all_values(i['budget_millions'], "values") for i in boards_info)))

# All 14 regions
regions = list(i['name'] for i in boards_info)
# List of all "year" attributes
year_mapping = list(chain(*list(extract_all_values(i['budget_millions'], "key") for i in boards_info)))

#list indicating the number of staff for all regions
staff_count = list(extract_all_values(i['people'], "values") for i in boards_info)
staff_count = [list(i) for i in zip(*staff_count)][1]

#Extracting Area in Km2 through a list
area_in_km2 = list(extract_all_values(i['geographic'], "values") for i in boards_info)
area_in_km2 = [list(i) for i in zip(*area_in_km2)][0]

#Calculating a staff per km2 area count
staff_per_area = [round(staff/area,2) for staff,area in zip(staff_count, area_in_km2)]

To essentially visualise how the budget is distributed over years, and across each region a sunburst visualisation is used. We are able to understand the hierarchial spread as such.

In [18]:
data_sunburst = dict(
    budget_count = budget_count,
    regions = list(i for i in regions for _ in range(3)) ,
    years = year_mapping
)
fig = px.sunburst(data_sunburst,path= ['regions', 'years'], values = 'budget_count',color = 'budget_count', title = "SunBurst Chart of Accumulated Budgets over years for different NHS zones")
fig.show()

Upnext, we wish to represent the staff per km2 area for different zones by plotting a density graph on to a map embedding. The relevant geographical data are obtained by geocoding the names of the NHS zones. A third party API (Geoapify) is used for the same. This module accomplishes the geocoding and returns a list with latitude and longitude values.

Now for the covid days dataset we create a fucntion where we want to plot the time series graph of each of the NHS centers. Here we plot four graphs for each center- one for the number of new positive cases, number of new deaths, number of first infections and lastly number of reinfections

In [31]:
from datetime import datetime
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def plot_time_series(input_dictionary, name_nhs_center,budget_count,regions,year_mapping):
    number_of_new_positive = []
    number_of_new_deaths = []
    number_of_first_infections = []
    number_of_reinfections = []
    dates = []

    for part_dictionary in input_dictionary:
        if part_dictionary['locations'][name_nhs_center] == {}:
            number_of_new_positive.append(0)
            number_of_new_deaths.append(0)
            number_of_first_infections.append(0)
            number_of_reinfections.append(0)
        else:
            number_of_new_positive.append(part_dictionary['locations'][name_nhs_center]['new_positive'])
            number_of_new_deaths.append(part_dictionary['locations'][name_nhs_center]['new_deaths'])
            number_of_first_infections.append(part_dictionary['locations'][name_nhs_center]['first_infections'])
            number_of_reinfections.append(part_dictionary['locations'][name_nhs_center]['reinfections'])
        dates.append(datetime.strptime(str(part_dictionary['date']), "%Y%m%d"))
    data_sunburst = dict(
    budget_count = budget_count,
    regions = list(i for i in regions for _ in range(3)) ,
    years = year_mapping)
#     sunburst_fig = go.sunburst(data_sunburst,path= ['regions', 'years'], values = 'budget_count',color = 'budget_count', title = "SunBurst Chart of Accumulated Budgets over years for different NHS zones")

    fig = make_subplots(rows=5, cols=1, vertical_spacing=0.1)
    fig.add_trace(go.Scatter(x=dates, y=number_of_new_positive, name="Number of New Positive Cases"), row=1, col=1)
    fig.add_trace(go.Scatter(x=dates, y=number_of_new_deaths, name="Number Of New Deaths"), row=2, col=1)
    fig.add_trace(go.Scatter(x=dates, y=number_of_first_infections, name="Number of First Infections Cases"), row=3, col=1)
    fig.add_trace(go.Scatter(x=dates, y=number_of_reinfections, name="Number of Reinfections"), row=4, col=1)
#     fig.add_trace(sunburst_fig.data[0],row=5,col=1)
    
    fig.update_yaxes(title_text="Number of Cases", row=1, col=1)
    fig.update_yaxes(title_text="Number of Cases", row=2, col=1)
    fig.update_yaxes(title_text="Number of Cases", row=3, col=1)
    fig.update_yaxes(title_text="Number of Cases", row=4, col=1)
    
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_xaxes(title_text="Date", row=3, col=1)
    fig.update_xaxes(title_text="Date", row=4, col=1)

    fig.update_layout(height=800, width=1000, title_text=str(name_nhs_center) + ": Time series graphs")
    fig.show()

Below if a for loop which calls our plot time series fucntions and plots the graphs for different centers. Here we provide our covid days json as input with the name of the nhs center and thus create the graphs on a loop

In [32]:
def update_plot(name_nhs_center):
    plot_time_series(covid_days,name_nhs_center,budget_count,regions,year_mapping)
dropdown_values = list(covid_days[0]['locations'].keys())
interact(update_plot,name_nhs_center=dropdown_values)

interactive(children=(Dropdown(description='name_nhs_center', options=('NHS Ayrshire and Arran', 'NHS Borders'…

<function __main__.update_plot(name_nhs_center)>

In [34]:
boards_info

[{'geographic': {'area_km_2': 2924,
   'areas_covered': 'East Ayrshire, North Ayrshire, South Ayrshire'},
  'budget_millions': {'2020': 762, '2021': 775, '2022': 807},
  'name': 'NHS Ayrshire and Arran',
  'people': {'patients': 369670, 'staff': 9491}},
 {'geographic': {'area_km_2': 4732, 'areas_covered': 'Scottish Borders'},
  'budget_millions': {'2020': 220, '2021': 223, '2022': 235},
  'name': 'NHS Borders',
  'people': {'patients': 115270, 'staff': 2627}},
 {'geographic': {'area_km_2': 6216, 'areas_covered': 'Dumfries and Galloway'},
  'budget_millions': {'2020': 316, '2021': 321, '2022': 334},
  'name': 'NHS Dumfries and Galloway',
  'people': {'patients': 148790, 'staff': 3832}},
 {'geographic': {'area_km_2': 1235, 'areas_covered': 'Fife'},
  'budget_millions': {'2020': 702, '2021': 713, '2022': 749},
  'name': 'NHS Fife',
  'people': {'patients': 371910, 'staff': 7683}},
 {'geographic': {'area_km_2': 2643,
   'areas_covered': 'Clackmannanshire, Falkirk, Stirling'},
  'budget_mil