In [1]:
import os
import sys
import pandas as pd

TOP_DIR=os.path.abspath("../../")
if not TOP_DIR in sys.path: sys.path.append(TOP_DIR)

In [2]:
from prepare import LMS_EXTRACT, DASHBOARD_DIR, create_table, save_files, summarise, labour_market_status_variables, long_term_unemployed_variables
from scripts.util.metadata import read_meta, extract_dates

Load in the LMS data

In [3]:
lms_extract = pd.read_csv(
    LMS_EXTRACT,
    parse_dates=['lms_period']
)

Extract relevant metrics, pivot and save file for labour market

In [4]:
labour_market_status = lms_extract.pipe(
    create_table, labour_market_status_variables
).pipe(
    save_files, 'labour_market_status'
)
labour_market_status

variable,age_16_to_17_not_in_ft_education_in_employment_sa,age_18_to_24_not_in_ft_education_in_employment_sa,age_16_to_24_not_in_ft_education_in_employment_sa,men_16_to_24_not_in_ft_education_in_employment_sa,women_16_to_24_not_in_ft_education_in_employment_sa,age_16_to_24_in_ft_education_in_employment_sa,age_16_to_17_not_in_ft_education_unemployed_sa,age_18_to_24_not_in_ft_education_unemployed_sa,age_16_to_24_not_in_ft_education_unemployed_sa,men_16_to_24_not_in_ft_education_unemployed_sa,...,men_16_to_24_not_in_ft_education_total_sa,women_16_to_24_not_in_ft_education_total_sa,unemployment_rate_sa,economic_inactivity_rate_sa,age_16_to_24_in_employment_sa,age_16_to_24_unemployed_sa,age_16_to_24_economically_inactive_sa,age_16_to_24_unemployment_rate_sa,quarter_label,quarter_axis_label
lms_period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-01,249.0,2765.0,3013.0,1632.0,1381.0,920.0,79.0,329.0,408.0,243.0,...,2017.0,1966.0,5.9,23.3,3933.0,570.0,1756.0,12.7,Dec-Feb 2000,Dec-Feb\n2000
2000-04-01,246.0,2748.0,2994.0,1625.0,1369.0,946.0,85.0,337.0,422.0,255.0,...,2031.0,1942.0,5.7,23.2,3940.0,579.0,1744.0,12.8,Mar-May 2000,Mar-May\n2000
2000-07-01,240.0,2750.0,2989.0,1627.0,1362.0,951.0,77.0,316.0,393.0,230.0,...,2011.0,1934.0,5.3,23.3,3941.0,534.0,1801.0,11.9,Jun-Aug 2000,Jun-Aug\n2000
2000-10-01,226.0,2749.0,2976.0,1614.0,1361.0,934.0,85.0,317.0,402.0,237.0,...,2013.0,1939.0,5.3,23.5,3910.0,553.0,1842.0,12.4,Sep-Nov 2000,Sep-Nov\n2000
2001-01-01,241.0,2735.0,2976.0,1622.0,1354.0,952.0,82.0,324.0,406.0,251.0,...,2030.0,1928.0,5.2,23.3,3928.0,550.0,1856.0,12.3,Dec-Feb 2001,Dec-Feb\n2001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-01,72.0,2682.0,2753.0,1448.0,1305.0,979.0,37.0,352.0,389.0,263.0,...,2092.0,1819.0,4.5,22.1,3732.0,598.0,2962.0,13.8,Mar-May 2024,Mar-May\n2024
2024-07-01,91.0,2702.0,2794.0,1477.0,1317.0,944.0,26.0,373.0,399.0,256.0,...,2137.0,1866.0,4.2,21.8,3737.0,594.0,2994.0,13.7,Jun-Aug 2024,Jun-Aug\n2024
2024-10-01,78.0,2733.0,2811.0,1496.0,1315.0,901.0,40.0,370.0,409.0,271.0,...,2165.0,1900.0,4.5,21.6,3712.0,626.0,3019.0,14.4,Sep-Nov 2024,Sep-Nov\n2024
2025-01-01,70.0,2737.0,2807.0,1503.0,1304.0,967.0,37.0,393.0,430.0,271.0,...,2164.0,1886.0,4.5,21.4,3774.0,643.0,2971.0,14.6,Dec-Feb 2025,Dec-Feb\n2025


Extract relevant metrics for long-term unemployment, pivot and save file

In [5]:
def construct_16_24_stats(data):
    data['age_16_to_24_unemployed_sa'] = (data.age_18_to_24_unemployed_sa + data.age_16_to_17_unemployed_sa).round(0)
    data['age_16_to_24_unemployed_6_to_12_months_sa'] = (data.age_18_to_24_unemployed_6_to_12_months_sa + data.age_16_to_17_unemployed_6_to_12_months_sa).round(0)
    data['age_16_to_24_unemployed_over_12_months_sa'] = (data.age_18_to_24_unemployed_over_12_months_sa + data.age_16_to_17_unemployed_over_12_months_sa).round(0)
    data['age_16_to_24_unemployed_6_to_12_months_rate_sa'] = (data['age_16_to_24_unemployed_6_to_12_months_sa'] / \
      data['age_16_to_24_unemployed_sa'] * 100).round(1)
    data['age_16_to_24_unemployed_over_12_months_rate_sa'] = (data['age_16_to_24_unemployed_over_12_months_sa'] / \
      data['age_16_to_24_unemployed_sa'] * 100).round(1)

    # Handle missing data by using the 18-24 figure instead
    data['missing_over_12_months_data'] = data.age_16_to_24_unemployed_over_12_months_sa.isna()
    data.age_16_to_24_unemployed_over_12_months_sa = data.age_16_to_24_unemployed_over_12_months_sa.mask(
        data.missing_over_12_months_data,
        data.age_18_to_24_unemployed_over_12_months_sa
    )
    data.age_16_to_24_unemployed_over_12_months_rate_sa = data.age_16_to_24_unemployed_over_12_months_rate_sa.mask(
        data.missing_over_12_months_data,
        data.age_18_to_24_unemployed_over_12_months_rate_sa.round(1)
    )
    return data

def add_axis_labels(data):
    data['quarter_axis_label'] = data.quarter_label.str.replace(' ', '\\n')
    return data    

In [6]:
long_term_unemployed = lms_extract.pipe(create_table, long_term_unemployed_variables).pipe(construct_16_24_stats).pipe(add_axis_labels)
long_term_unemployed.pipe(save_files, 'long_term_unemployed')

variable,age_16_to_17_unemployed_sa,age_18_to_24_unemployed_sa,age_16_to_17_unemployed_6_to_12_months_sa,age_16_to_17_unemployed_over_12_months_sa,age_16_to_17_unemployed_over_12_months_rate_sa,age_18_to_24_unemployed_6_to_12_months_sa,age_18_to_24_unemployed_over_12_months_sa,age_18_to_24_unemployed_over_12_months_rate_sa,quarter_label,quarter_axis_label,age_16_to_24_unemployed_sa,age_16_to_24_unemployed_6_to_12_months_sa,age_16_to_24_unemployed_over_12_months_sa,age_16_to_24_unemployed_6_to_12_months_rate_sa,age_16_to_24_unemployed_over_12_months_rate_sa,missing_over_12_months_data
lms_period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2000-01-01,172.0,398.0,23.0,10.0,5.6,63.0,56.0,14.2,Dec-Feb 2000,Dec-Feb\n2000,570.0,86.0,66.0,15.1,11.6,False
2000-04-01,177.0,402.0,24.0,9.0,5.2,53.0,65.0,16.2,Mar-May 2000,Mar-May\n2000,579.0,77.0,74.0,13.3,12.8,False
2000-07-01,163.0,371.0,25.0,12.0,7.1,59.0,62.0,16.8,Jun-Aug 2000,Jun-Aug\n2000,534.0,84.0,74.0,15.7,13.9,False
2000-10-01,171.0,382.0,20.0,11.0,6.5,56.0,52.0,13.7,Sep-Nov 2000,Sep-Nov\n2000,553.0,76.0,63.0,13.7,11.4,False
2001-01-01,160.0,390.0,22.0,9.0,5.8,61.0,53.0,13.5,Dec-Feb 2001,Dec-Feb\n2001,550.0,83.0,62.0,15.1,11.3,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-01,134.0,464.0,18.0,20.0,14.8,80.0,79.0,16.9,Mar-May 2024,Mar-May\n2024,598.0,98.0,99.0,16.4,16.6,False
2024-07-01,99.0,495.0,16.0,7.0,7.5,64.0,91.0,18.3,Jun-Aug 2024,Jun-Aug\n2024,594.0,80.0,98.0,13.5,16.5,False
2024-10-01,130.0,496.0,29.0,13.0,9.7,81.0,92.0,18.6,Sep-Nov 2024,Sep-Nov\n2024,626.0,110.0,105.0,17.6,16.8,False
2025-01-01,118.0,525.0,26.0,9.0,7.3,70.0,114.0,21.7,Dec-Feb 2025,Dec-Feb\n2025,643.0,96.0,123.0,14.9,19.1,False


Create a summary file

In [7]:
summary = pd.concat([
    summarise(
      long_term_unemployed=long_term_unemployed,
      labour_market_status=labour_market_status,
    ),
    read_meta().pipe(extract_dates, 'LMS'),
])

summary.to_json(os.path.join(DASHBOARD_DIR, 'latest.json'), indent=2, date_format='iso')

In [8]:
summary

unemployment_rate_all_working_age                                      4.7
economic_inactivity_rate_all_working_age                              21.0
unemployment_rate_young_people                                        14.2
economic_inactivity_rate_young_people                                 40.4
unemployment_rate_not_in_full_time_education                          12.6
economic_inactivity_rate_not_in_full_time_education                   20.1
long_term_unemployment_rate_6_to_12_months                            16.4
long_term_unemployment_rate_over_12_months                           110.0
most_recent_lms_period                                        Mar-May 2025
last_update                                            2025-07-17 00:00:00
next_update                                            2025-12-08 00:00:00
dtype: object