# Imports

In [13]:
import json
import requests
import numpy as np
import pandas as pd
from typing import Iterable, Dict, Union, List

# Activity 1

## 1.1 Sub-activity: Open Data COVID-19 API

### Task 1

In [14]:
# The source code is retrieved from https://coronavirus.data.gov.uk/details/developers-guide/main-api

StructureType = Dict[str, Union[dict, str]]
FiltersType = Iterable[str]

def get_API_data(filters:FiltersType , structure: StructureType):
    # API URL
    endpoint = "https://api.coronavirus.data.gov.uk/v1/data"

    api_params = {
        "filters": 'areaType=' + filters["areaType"],
        "structure": json.dumps(structure)
    }

    data = []
    page_number = 1
    
    while True:
        # Adding page number to query params
        api_params["page"] = page_number

        response = requests.get(endpoint, params=api_params)

        current_data = response.json()
        
        # If there is no data in vaccination_age, set the value to NaN for Pandas' DataFrame
        for age in current_data['data']:
            if not age["vaccination_age"]:
                age["vaccination_age"] = np.nan

        page_data: List[StructureType] = current_data['data']
       
        data.extend(page_data)

        # The "next" attribute in "pagination" will be `None`
        # when we reach the end.
        if current_data["pagination"]["next"] is None:
            break

        page_number += 1

    return data

### Task 2

In [15]:
nation_filters = {"areaType":"nation"}
region_filters = {"areaType":"region"}

structure = {
    "date": "date",
    "name": "areaName",
    "daily_cases": "newCasesBySpecimenDate",
    "cumulative_cases": "cumCasesBySpecimenDate",
    "daily_deaths": "newDeaths28DaysByPublishDate",
    "cumulative_deaths": "cumDeaths28DaysByPublishDate",
    "cumulative_vaccinated": "cumPeopleVaccinatedCompleteByVaccinationDate",
    "vaccination_age": "vaccinationsAgeDemographics"
    }

results_json_national = get_API_data(nation_filters, structure)
results_json_regional = get_API_data(region_filters, structure)

## 1.2 Sub-activity: Shaping the COVID data into different dataframes

### Task 3

In [16]:
covid_data_list = results_json_national + results_json_regional

### Task 4

In [17]:
covid_data = pd.DataFrame(covid_data_list)

### Task 5

In [18]:
covid_data.drop(covid_data[covid_data["name"] == "England"].index, inplace=True)

### Task 6

In [19]:
covid_data.rename(columns={"name": "area"}, inplace=True)

### Task 7

In [20]:
covid_data['date'] = pd.to_datetime(covid_data['date'])

### Task 8

In [21]:
# Since the vaccination_age will return a emtpy list if the data is not available
# I set the empty list value to NaN for Pandas' DataFrame by using Numpy's function "np.nan"
# Therefore, all the Nan values will be counted as missing data in the dataframe
nan = covid_data.isna().sum().sum()
print("The amount of missing data is {}.".format(nan)) 

The amount of missing data is 8898.


### Task 9

In [22]:
print(nan)

8898


### Task 10

### Task 11

### Task 12

### Task 13

### Task 14

## 1.3 Sub-activity: Aggregating, plotting, and analysing

### Task 15

### Task 16

### Task 17

### Task 18

### Task 19

### Task 20

### Task 21

### Task 22

### Task 23

### Task 24

### Task 25

### Task 26

### Task 27

### Task 28