In [1]:
import requests 
import pandas as pd 
import time
import numpy as np
import os


In [2]:
# define the header
headers = {'user-agent': os.environ.get("ONS-USER-AGENT")}

# define requets workflow
def requests_get(url):
    print(f"requesting {url}")
    try:
        response = requests.get(url,headers = headers)
        if response.status_code != 200:
            print(f"An error occured parsing content in the url: error {response.status_code}")
            return None
        else:
            return response
    except:
        print("System error with the `requests` module")
        return None
    

## 1. Explore the data set available

In [3]:
# loading the datasets available
url = "https://api.beta.ons.gov.uk/v1/datasets"
res = requests.get(url, "html.parser")

# parse the result as a dataframe
df = pd.json_normalize(res.json()['items'], max_level=10)
df.columns

Index(['contacts', 'description', 'id', 'keywords', 'methodologies',
       'national_statistic', 'next_release', 'related_datasets',
       'release_frequency', 'state', 'title', 'unit_of_measure',
       'links.editions.href', 'links.latest_version.href',
       'links.latest_version.id', 'links.self.href', 'links.taxonomy.href',
       'qmi.href', 'type', 'publications'],
      dtype='object')

In [4]:
# parse the title of the datasets into a list
thematics = list(df.title)
thematics

['Quarterly personal well-being estimates',
 'Personal well-being estimates by local authority',
 'Deaths registered weekly in England and Wales by region',
 'Death registrations and occurrences by local authority and place of death',
 'Death registrations and occurrences by health board and place of death',
 'Deaths registered weekly in England and Wales by age and sex',
 'UK spending on credit and debit cards',
 'UK Business: Activity, Size and Location',
 'Traffic Camera Activity',
 'Trade in goods: country by commodity',
 'Effects of Taxes and Benefits on Household Income',
 'Suicide registrations in England and Wales by local authority',
 'Sexual orientation by English regions and UK countries',
 'Sexual orientation by age and sex',
 'Retail sales index - large and small businesses',
 'Retail sales index - all businesses',
 'Retail sales index',
 'Annual GDP for England, Wales and the English regions',
 'Quarterly GDP for England, Wales and the English regions',
 'Local authority 

## 2. GDP Data Set
In this section, we try to look into the **Regional GDP** dataset.

While the following code attemps to retrieve the full dataset using the API developed by the ONS, this is a much laborious process due to its limitation. If your intention is to acquire the full dataset, it is recommended that you download the `.csv` file directly.
For this particular dataset, the link to the `.csv` file is enclosed in the API response of:
https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6.
Under the key `download`, it listed out 3 files that are available for download:
* Observation dataset in `.csv` format
* Observation dataset in `.xls` format
* Meta data in `.csv` format

### 2.1 Exploration of Structure
Before writing a valid query string, we need to understand the structure of the API query.

In [306]:
# subset rows that contains gdp
gdp = df[df['title'].str.contains("GDP")].reset_index(drop = True)
gdp["links.editions.href"].values

array(['https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions',
       'https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-quarter/editions'],
      dtype=object)

In [307]:
# request the annual gdp meta data
url = gdp.loc[0,"links.editions.href"]
res_wb = requests_get(url)

# parse meta data as a pd DataFrame
annual_gdp_links = pd.json_normalize(res_wb.json()['items'], max_level=2)

# extract and inspect the href
href = [col for col in annual_gdp_links.columns if col.find("href") >0]
for col in href:
    print(annual_gdp_links[col].values)

requesting https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions
['https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year']
['https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6']
['https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series']
['https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions']


In [308]:
# attempt for API request for observaiton and parse the dataframe
annual_gdp_df = "".join([annual_gdp_links.loc[0,href[1]],"/observations"])
annual_gdp = requests_get(annual_gdp_df)

requesting https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6/observations
An error occured parsing content in the url: error 400


Loading the above url with a browser, the API returns an error message:
>missing query parameters for the following dimensions: 
>* time 
>* geography 
>* unofficialstandardindustrialclassification 
>* prices 
>* growthrate

Hence, we need to take a step back to request for a list of arguments that is accepted for each parameter.
* Alternatively,following the version link should return a result that contains a downlod csv link.

In [309]:
# request meta data
annual_gdp_meta = requests_get(annual_gdp_links.loc[0,href[1]])
# parse for the href for parameter code list
params = pd.json_normalize(annual_gdp_meta.json()['dimensions']).set_index('label')
# inspect the result 
params

requesting https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6


Unnamed: 0_level_0,href,id,name
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Time,https://api.beta.ons.gov.uk/v1/code-lists/cale...,calendar-years,time
Geography,https://api.beta.ons.gov.uk/v1/code-lists/nuts,nuts,geography
Standard Industrial Classification,https://api.beta.ons.gov.uk/v1/code-lists/sic-...,sic-unofficial,unofficialstandardindustrialclassification
Prices,https://api.beta.ons.gov.uk/v1/code-lists/type...,type-of-prices,prices
Measure,https://api.beta.ons.gov.uk/v1/code-lists/quar...,quarterly-index-and-growth-rate,growthrate


In [310]:
# Storing the code list
param_geocode = "https://api.beta.ons.gov.uk/v1/code-lists/nuts/editions/one-off/codes"
param_price = "https://api.beta.ons.gov.uk/v1/code-lists/type-of-prices/editions/one-off/codes"
param_rate = "https://api.beta.ons.gov.uk/v1/code-lists/quarterly-index-and-growth-rate/editions/one-off/codes"
param_class = "https://api.beta.ons.gov.uk/v1/code-lists/sic-unofficial/editions/one-off/codes"

In [311]:
# parsing the geocode
geocode = requests_get(param_geocode)
geocode = pd.json_normalize(geocode.json()["items"])

# parsing the price
pricecode = requests_get(param_price)
pricecode = pd.json_normalize(pricecode.json()["items"])

# parsing rate code
ratecode = requests_get(param_rate)
ratecode = pd.json_normalize(ratecode.json()['items'])

# parsing industrial classificaiton
classcode = requests_get(param_class)
classcode = pd.json_normalize(classcode.json()['items'])

requesting https://api.beta.ons.gov.uk/v1/code-lists/nuts/editions/one-off/codes


requesting https://api.beta.ons.gov.uk/v1/code-lists/type-of-prices/editions/one-off/codes
requesting https://api.beta.ons.gov.uk/v1/code-lists/quarterly-index-and-growth-rate/editions/one-off/codes
requesting https://api.beta.ons.gov.uk/v1/code-lists/sic-unofficial/editions/one-off/codes


In [312]:
# instantiate an empty dataframe
df = pd.DataFrame(columns = ['year','area','price','rate','industry','value'])

#
for year in range(2012,2024):
    for geo in geocode.code:
        for price in pricecode.code:
            for rate in ratecode.code:
                url = f"https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/\
                        time-series/versions/6/observations?\
                        time={year}&geography={geo}&unofficialstandardindustrialclassification=*\
                        &prices={price}&growthrate={rate}".replace(" ","")
                try:
                    response = requests_get(url)
                    observe = response.json()['observations']
                    time.sleep(0.5)
                    if observe is not None:
                        new_data = pd.json_normalize(observe)[['observation','dimensions.UnofficialStandardIndustrialClassification.id']]
                        new_data = new_data.rename(columns = {'observation':'value',
                                    'dimensions.UnofficialStandardIndustrialClassification.id':'industry'})
                    else:
                        print("0 observation under this parameterization")
                        continue
                except:
                    print('Error occured when parsing the API result to a pd DataFrame')
                    pass
                
                # fill na value
                new_data = new_data.replace("", pd.NA)

                # add column for each attribute
                col = len(new_data)
                new_data["year"] = [str(year) for _ in range(col)]
                new_data["area"] = [geo for _ in range(col)]
                new_data["price"] = [price for _ in range(col)]
                new_data["rate"] = [rate for _ in range(col)]

                # add to the dataframe
                df = pd.concat([df, new_data], ignore_index= True)
                print(f"{new_data['value'].count()} data points added")
                


requesting https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6/observations?time=2012&geography=UK&unofficialstandardindustrialclassification=*&prices=2017-prices&growthrate=aix
0 observation under this parameterization
requesting https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6/observations?time=2012&geography=UK&unofficialstandardindustrialclassification=*&prices=2017-prices&growthrate=gra
0 observation under this parameterization
requesting https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6/observations?time=2012&geography=UK&unofficialstandardindustrialclassification=*&prices=2017-prices&growthrate=grq
0 observation under this parameterization
requesting https://api.beta.ons.gov.uk/v1/datasets/regional-gdp-by-year/editions/time-series/versions/6/observations?time=2012&geography=UK&unofficialstandardindustrialclassification=*&prices=2017-prices&growthrate=gr