# Imports

In [36]:
import json
import requests
import numpy as np
import pandas as pd
from typing import Iterable, Dict, Union, List

# Activity 1

## 1.1 Sub-activity: Open Data COVID-19 API

### Task 1

In [37]:
# The source code is retrieved from https://coronavirus.data.gov.uk/details/developers-guide/main-api

StructureType = Dict[str, Union[dict, str]]
FiltersType = Iterable[str]

def get_API_data(filters:FiltersType , structure: StructureType):
    # API URL
    endpoint = "https://api.coronavirus.data.gov.uk/v1/data"

    api_params = {
        "filters": 'areaType=' + filters["areaType"],
        "structure": json.dumps(structure)
    }

    data = []
    page_number = 1
    
    while True:
        # Adding page number to query params
        api_params["page"] = page_number

        response = requests.get(endpoint, params=api_params)

        current_data = response.json()
        
        # If there is no data in vaccination_age, set the value to NaN for Pandas' DataFrame
        for age in current_data['data']:
            if not age["vaccination_age"]:
                age["vaccination_age"] = np.nan

        page_data: List[StructureType] = current_data['data']
       
        data.extend(page_data)

        # The "next" attribute in "pagination" will be `None`
        # when we reach the end.
        if current_data["pagination"]["next"] is None:
            break

        page_number += 1

    return data

### Task 2

In [38]:
nation_filters = {"areaType":"nation"}
region_filters = {"areaType":"region"}

structure = {
    "date": "date",
    "name": "areaName",
    "daily_cases": "newCasesBySpecimenDate",
    "cumulative_cases": "cumCasesBySpecimenDate",
    "daily_deaths": "newDeaths28DaysByPublishDate",
    "cumulative_deaths": "cumDeaths28DaysByPublishDate",
    "cumulative_vaccinated": "cumPeopleVaccinatedCompleteByVaccinationDate",
    "vaccination_age": "vaccinationsAgeDemographics"
    }

results_json_national = get_API_data(nation_filters, structure)
results_json_regional = get_API_data(region_filters, structure)

## 1.2 Sub-activity: Shaping the COVID data into different dataframes

### Task 3

In [39]:
covid_data_list = results_json_national + results_json_regional

### Task 4

In [40]:
covid_data = pd.DataFrame(covid_data_list)

### Task 5

In [41]:
covid_data.drop(covid_data[covid_data["name"] == "England"].index, inplace=True)
covid_data.reset_index(drop=True, inplace=True)

### Task 6

In [42]:
covid_data.rename(columns={"name": "area"}, inplace=True)

### Task 7

In [43]:
covid_data['date'] = pd.to_datetime(covid_data['date'])

### Task 8

In [44]:
# Since the vaccination_age will return a emtpy list if the data is not available
# I set the empty list value to NaN for Pandas' DataFrame by using Numpy's function "np.nan"
# Therefore, all the Nan values will be counted as missing data in the dataframe
nan = covid_data.isna().sum().sum()
print("The amount of missing data is {}.".format(nan)) 

The amount of missing data is 13825.


### Task 9

In [52]:
# covid_data[covid_data['cumulative_deaths'].isnull() == True]
covid_data[covid_data["area"] == "London"].fillna(method='bfill')
# covid_data['date']
# print(covid_data['cumulative_deaths'])
# print(covid_data['cumulative_deaths'].fillna(method='ffill'))



Unnamed: 0,date,area,daily_cases,cumulative_cases,daily_deaths,cumulative_deaths,cumulative_vaccinated,vaccination_age
8608,2022-12-01,London,248.0,3101083.0,41.0,21822.0,6090340.0,"[{'age': '05_11', 'VaccineRegisterPopulationBy..."
8609,2022-11-30,London,248.0,3101083.0,0.0,21781.0,6090340.0,"[{'age': '05_11', 'VaccineRegisterPopulationBy..."
8610,2022-11-29,London,453.0,3100835.0,0.0,21781.0,6090184.0,"[{'age': '05_11', 'VaccineRegisterPopulationBy..."
8611,2022-11-28,London,469.0,3100382.0,0.0,21781.0,6090018.0,"[{'age': '05_11', 'VaccineRegisterPopulationBy..."
8612,2022-11-27,London,377.0,3099913.0,0.0,21781.0,6089862.0,"[{'age': '05_11', 'VaccineRegisterPopulationBy..."
...,...,...,...,...,...,...,...,...
9636,2020-02-07,London,0.0,1.0,,,,
9637,2020-02-06,London,0.0,1.0,,,,
9638,2020-02-05,London,0.0,1.0,,,,
9639,2020-02-04,London,0.0,1.0,,,,


### Task 10

### Task 11

### Task 12

### Task 13

### Task 14

## 1.3 Sub-activity: Aggregating, plotting, and analysing

### Task 15

### Task 16

### Task 17

### Task 18

### Task 19

### Task 20

### Task 21

### Task 22

### Task 23

### Task 24

### Task 25

### Task 26

### Task 27

### Task 28