In [28]:
import pandas as pd
import requests

In [29]:
# set base url for API
base_url = "https://api.beta.ons.gov.uk/v1/data?uri="

In [30]:
uris = {
    "Total vacancies":"/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/ap2y/lms",
    "Vacancies per 100 jobs":"/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/ap2z/lms",
    "Unpemployment rate":"/employmentandlabourmarket/peoplenotinwork/unemployment/timeseries/mgsx/lms",
    "Employment rate":"/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/lf24/lms",
    "Female employment rate":"/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/lf25/lms",
    "Male employment rate":"/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/mgsv/lms",
    "Avg weekly earnings":"/employmentandlabourmarket/peopleinwork/earningsandworkinghours/timeseries/kai7/emp",
    "Working days lost due to strikes":"/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/timeseries/bbfw/lms",
    "Inactivity rate":"/employmentandlabourmarket/peoplenotinwork/economicinactivity/timeseries/lf2s/lms"
}

In [31]:
def fetch_ons_data(uri):
    response = requests.get(base_url + uri)
    if response.status_code == 200:
        return response.json()
    else:
        return None

In [32]:
data_frames = {}
for series_name, uri in uris.items():
    data = fetch_ons_data(uri)
    if data:
        df = pd.DataFrame(data['months'])
        df['series'] = series_name
        data_frames[series_name] = df

df = pd.concat(data_frames.values(), ignore_index=True)

df

Unnamed: 0,date,value,label,year,month,quarter,sourceDataset,updateDate,series
0,2001 MAY,680,2001 APR-JUN,2001,May,,LMS,2016-03-16T09:30:00.000Z,Total vacancies
1,2001 JUN,674,2001 MAY-JUL,2001,June,,LMS,2023-04-17T23:00:00.000Z,Total vacancies
2,2001 JUL,663,2001 JUN-AUG,2001,July,,LMS,2023-04-17T23:00:00.000Z,Total vacancies
3,2001 AUG,663,2001 JUL-SEP,2001,August,,LMS,2022-04-11T23:00:00.000Z,Total vacancies
4,2001 SEP,639,2001 AUG-OCT,2001,September,,LMS,2022-04-11T23:00:00.000Z,Total vacancies
...,...,...,...,...,...,...,...,...,...
5203,2024 MAY,22.1,2024 APR-JUN,2024,May,,LMS,2024-12-17T00:00:00.000Z,Inactivity rate
5204,2024 JUN,21.9,2024 MAY-JUL,2024,June,,LMS,2024-09-09T23:00:00.000Z,Inactivity rate
5205,2024 JUL,21.8,2024 JUN-AUG,2024,July,,LMS,2024-10-14T23:00:00.000Z,Inactivity rate
5206,2024 AUG,21.7,2024 JUL-SEP,2024,August,,LMS,2024-12-17T00:00:00.000Z,Inactivity rate


In [37]:
df[df['series'] == 'Inactivity rate']

Unnamed: 0,date,value,series
4564,1971-02-01,24.9,Inactivity rate
4565,1971-03-01,25.0,Inactivity rate
4566,1971-04-01,25.0,Inactivity rate
4567,1971-05-01,24.9,Inactivity rate
4568,1971-06-01,25.0,Inactivity rate
...,...,...,...
5203,2024-05-01,22.1,Inactivity rate
5204,2024-06-01,21.9,Inactivity rate
5205,2024-07-01,21.8,Inactivity rate
5206,2024-08-01,21.7,Inactivity rate


In [34]:
# Convert 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'], format='%Y %b')

df = df[['date', 'value', 'series']]

df

Unnamed: 0,date,value,series
0,2001-05-01,680,Total vacancies
1,2001-06-01,674,Total vacancies
2,2001-07-01,663,Total vacancies
3,2001-08-01,663,Total vacancies
4,2001-09-01,639,Total vacancies
...,...,...,...
5203,2024-05-01,22.1,Inactivity rate
5204,2024-06-01,21.9,Inactivity rate
5205,2024-07-01,21.8,Inactivity rate
5206,2024-08-01,21.7,Inactivity rate


In [35]:
# show min date for each series value
df.groupby('series').agg({'date': 'min'})

Unnamed: 0_level_0,date
series,Unnamed: 1_level_1
Avg weekly earnings,2000-01-01
Employment rate,1971-02-01
Female employment rate,1971-02-01
Inactivity rate,1971-02-01
Male employment rate,1971-02-01
Total vacancies,2001-05-01
Unpemployment rate,1971-02-01
Vacancies per 100 jobs,2001-05-01
Working days lost due to strikes,1931-01-01


In [36]:
df.to_csv('data/ons_data.csv', index=False)