# ABS Detailed Labour Force 6291

## Python set-up

In [1]:
# system imports
from functools import cache

# analytic imports
import matplotlib.pyplot as plt
import pandas as pd
import readabs as ra
from readabs import metacol as mc
from mgplot import line_plot_finalise, finalise_plot, summary_plot_finalise

# local imports
from abs_helper import get_abs_data
from henderson import hma

# pandas display settings
pd.options.display.max_rows = 99999
pd.options.display.max_columns = 999

# show charts within this notebook
SHOW = False

## Get data from ABS

In [2]:
abs_monthly, monthly_meta, source, _ = get_abs_data(
    "6291.0.55.001", single_zip_only="p6291_all_monthly_spreadsheets", verbose=False
)
print(f"Monthly: {abs_monthly.keys()}")

Table MRM1 has no 'Index' sheet.
Monthly: dict_keys(['6291002', '6291003', '6291008', '6291009', '6291010', '6291014a', '6291014b', '6291014c', '6291014d', '6291014e', '6291020a', '6291020b', '6291020c', '6291001'])


In [3]:
# NOTE: the ABS has a typo in name of the zip file for the quarterly data
# ---- the second 'r' in 'quarterly' is missing (it was once there).
# ---- if this stops working, it might be worth checking if the typo is still there.
abs_qtrly, qtrly_meta, _, _ = get_abs_data(
    "6291.0.55.001", single_zip_only="p6291_all_quarterly_spreadsheets", verbose=False
)
print(f"Quarterly: {abs_qtrly.keys()}")

Quarterly: dict_keys(['6291013', '6291017', '6291019', '6291023a', '6291023b', '6291024a', '6291024b', '6291025a', '6291025b', '6291029b', '6291029a', '6291004', '6291005', '6291006', '6291007', '6291011', '6291012'])


## Plot

### Monthly by Age

In [4]:
def monthly_by_age() -> None:
    """Produce monthly by age charts."""

    table = "6291001"
    data = abs_monthly[table]
    series_type = "Original"

    ages = [
        "15-24 years",
        "25-34 years",
        "35-44 years",
        "45-54 years",
        "55-64 years",
    ]

    token = "By Age"
    plots = [
        f"> {token} ;  Participation rate ;  Persons ;",
        f"> {token} ;  Unemployment rate ;  Persons ;",
    ]

    for plot in plots:
        combined = {}
        for age in ages:
            plot_desc = plot.replace(token, age)
            search = {
                plot_desc: mc.did,
                series_type: mc.stype,
                table: mc.table,
            }
            _table, ident, units = ra.find_abs_id(monthly_meta, search)
            combined[age] = data[ident]
        combined_df, units = ra.recalibrate(pd.DataFrame(combined), units)

        title = f"Australia: {plot}"
        line_plot_finalise(
            combined_df,
            title=title,
            ylabel=units,
            rfooter=f"{source} {table}",
            lfooter=f"Australia. {series_type.capitalize()} series. ",
            show=SHOW,
        )
        line_plot_finalise(
            combined_df.rolling(12).mean().dropna(),
            title=f"{title} (12m rolling mean)",
            ylabel=units,
            rfooter=f"{source} {table}",
            lfooter=f"Australia. {series_type.capitalize()} series. ",
            show=SHOW,
        )


monthly_by_age()

recalibrate(): Units not appropriately calibrated: Percent
recalibrate(): Units not appropriately calibrated: Percent


### Quarterly by sector

Note: The ABS may have discontinued this reporting

In [5]:
Q_FOOTER = f"Australia. Seasonally adjisted series. Quarters ending in Feb, May, Aug, Nov. {abs_qtrly['6291013'].index[-1]}"

In [6]:
@cache
def get_sector() -> tuple[pd.DataFrame, pd.DataFrame]:
    """Get sector data."""

    table = "6291004"
    if table not in abs_qtrly:
        print(f"Table {table} not found in abs_qtrly")
        return pd.DataFrame(), pd.DataFrame()

    data = abs_qtrly[table]
    series_type = "Seasonally Adjusted"
    m = qtrly_meta
    selection = m[(m["Series Type"] == series_type) & (m["Table"] == table)]
    labels = selection[mc.did].str.split(" ; ").str[0]
    series_ids = selection.loc[labels.index, mc.id]
    sectors = data[series_ids]
    return sectors, selection

In [7]:
def summary_growth() -> None:
    """Produce quarterly sector growth charts."""

    sectors, selection = get_sector()
    if sectors.empty:
        return

    table = "6291004"
    data = abs_qtrly[table]

    required = {
        x.split(" ;")[0]: y
        for x, y in zip(selection[mc.did], selection[mc.id])
    }
    numbers = data[required.values()]
    numbers.columns = required.keys()
    numbers = numbers.drop(columns=["Employed total"], axis=1)
    units = qtrly_meta.loc[qtrly_meta[mc.id] == list(required.values())[0], mc.unit].values[0]
    numbers, units = ra.recalibrate(numbers, units)

    print(numbers.columns)

    # number plots
    line_plot_finalise(
        numbers,
        title=f"Australia: Number employed by sector",
        ylabel=f"{units} employed",
        rfooter=f"{source} {table}",
        lfooter=Q_FOOTER,
        legend={"loc": "best", "ncol": 2, "fontsize": "xx-small"},
        style=["--", "-.", "-", ":"],
        marker=["o", "<", ">", "^", "v", "s", 'D'],
        markersize=2,
        show=SHOW,
    )

    # - summary recent growth plots
    for period, title in zip([1, 4], ["$Q/Q_{-1}$", "$Q/Q_{-4}$"]):
        growth = numbers.pct_change(period) * 100
        summary_plot_finalise(
            growth,
            plot_from=pd.Period("1995Q1", freq="Q-NOV"),
            title=f"{title} Growth: % change # employed for {sectors.index[-1]}",
            rfooter=source,
            lfooter=Q_FOOTER,
            show=SHOW,
        )


summary_growth()

Index(['Agriculture, Forestry and Fishing', 'Mining', 'Manufacturing',
       'Electricity, Gas, Water and Waste Services', 'Construction',
       'Wholesale Trade', 'Retail Trade', 'Accommodation and Food Services',
       'Transport, Postal and Warehousing',
       'Information Media and Telecommunications',
       'Financial and Insurance Services',
       'Rental, Hiring and Real Estate Services',
       'Professional, Scientific and Technical Services',
       'Administrative and Support Services',
       'Public Administration and Safety', 'Education and Training',
       'Health Care and Social Assistance', 'Arts and Recreation Services',
       'Other Services'],
      dtype='object')


In [8]:
def diffusion() -> None:
    """Produce sectoral diffusion index timeseries chart.
    A diffusion index is a statistical measure that summarizes
    the proportion of components within a group that are showing
    positive change."""

    sectors, selection = get_sector()
    if sectors.empty:
        return

    sectors.columns = pd.Index([x.split(" ;")[0] for x in selection[mc.did]])
    subset = [x for x in sectors.columns if "Employed" not in x]
    sectors = sectors[subset]
    for title, period in zip(["$Q/Q_{-1}$", "$Q/Q_{-2}$", "$Q/Q_{-4}$"], [1, 2, 4]):
        raw = sectors.diff(period).dropna(how="all").map(
            lambda x: 1 if x > 0 else 0
        ).sum(axis=1) / sectors.notna().sum(axis=1)
        henderson_terms = 9
        smoothed = hma(raw.dropna(), henderson_terms)
        combined = pd.DataFrame(
            {
                "Diffusion Index": raw,
                f"{henderson_terms}-term Henderson Moving Average": smoothed,
            }
        )
        d = combined if period == 1 else raw
        line_plot_finalise(
            d,
            title=f"Employment Diffusion Index by Industry Sector ({title})",
            ylabel="Proportion of industry sectors",
            rfooter=source,
            lfooter="Australia. Seasonally adjusted series. "
            + f"Proportion of industry sectors showing {title} employment growth. ",
            width=(0.75,3) if period == 1 else (1,2),
            color=("blue", "darkorange"),
            axhline={"y": 0.5, "lw": 0.5, "color": "b", "ls": "-"},
            show=SHOW,
        )


diffusion()

In [9]:
def growth_paths():

    sectors, selection = get_sector()
    if sectors.empty:
        return
    sectors.columns = pd.Index([x.split(" ;")[0] for x in selection[mc.did]])
    sectors = sectors[[x for x in sectors.columns if "Employed" not in x]]

    start = sectors.index[-1] - 40
    sectors = sectors.loc[lambda x: x.index >= start]
    sectors = sectors.div(sectors.iloc[0]) * 100
    line_plot_finalise(
        sectors,
        title="Employment Growth Paths by Industry Sector",
        ylabel="Num. Employed Index (start = 100)",
        rfooter=source,
        lfooter=Q_FOOTER,
        legend={"loc": "upper left", "ncol": 2, "fontsize": 7},
        width=1.5,
        style=["-", "--", "-.", ":"],
        show=SHOW,
    )


growth_paths()

In [10]:
def number_employed():

    sectors, selection = get_sector()
    if sectors.empty:
        return
    sectors.columns = [x.split(" ;")[0] for x in selection[mc.did]]
    sectors = sectors[[x for x in sectors.columns if "Employed" not in x]]

    ax = sectors.iloc[-1].div(1000).sort_values().plot(kind="barh")
    finalise_plot(
        ax,
        title="Number Employed by Industry Sector",
        ylabel=None,
        xlabel="Million Persons Employed",
        rfooter=source,
        lfooter=Q_FOOTER,
        show=SHOW,
    )


number_employed()

In [12]:
def quarterly_by_sector() -> None:
    """Produce quarterly by sector charts."""

    sectors, selection = get_sector()
    if sectors.empty:
        return
    sectors.columns = pd.Index([x.split(" ;")[0] for x in selection[mc.did]])
    sectors = sectors[[x for x in sectors.columns if "Employed" not in x]]

    # --- individual sector plots for number employed ...
    for col, series in sectors.items():
        series, units = ra.recalibrate(series, "Thousand Persons")
        line_plot_finalise(
            series,
            title=f"Number Employed: {col}",
            ylabel=units,
            width=2,
            rfooter=source,
            lfooter=Q_FOOTER,
            annotate=True,
            show=SHOW,
        )

    # --- as a percent of total employment
    total_emp = sectors.sum(axis=1)
    for col, series in sectors.items():
        percent = (series / total_emp) * 100
        sep = "\n" if len (col) > 20 else " "
        line_plot_finalise(
            percent,
            title=f"Employment by Sector as % of Total Employed:{sep}{col}",
            ylabel="Per cent",
            width=2,
            rfooter=source,
            lfooter=Q_FOOTER,
            annotate=True,
            show=SHOW,
        )

    # --- horizontal bar charts of recent growth by sector
    for period, text in zip([1, 4], ["$Q/Q_{-1}$", "$Q/Q_{-4}$"]):
        percent = ((sectors.iloc[-1] / sectors.iloc[-1-period] - 1) * 100).sort_values()
        numeric = (sectors.iloc[-1] - sectors.iloc[-1-period]).sort_values()
        for s, stem, tag in zip([percent, numeric], [f'{text} %', f'{text}'], ['', 'numeric']):
            _fig, ax = plt.subplots()
            ax.barh(
                s.index,
                s,
            )
            finalise_plot(
                ax,
                title=f"{stem} Growth in No. Employed by Sector",
                ylabel=None,
                xlabel="Per cent" if tag == '' else "Thousand Employed",
                rfooter=source,
                lfooter=Q_FOOTER,
                x0=True,
                tag=tag,
                show=SHOW,
            )


quarterly_by_sector()

In [13]:
def job_growth_by_market_sector():

    abs_dict = abs_qtrly
    meta = qtrly_meta

    table = "6291004"
    if table not in abs_dict:
        print(f"Table {table} not found in abs_dict")
        return

    data_table = abs_dict[table]
    series_type = "Seasonally Adjusted"
    selection = meta[(meta["Series Type"] == series_type) & (meta["Table"] == table)]
    labels = selection[mc.did].str.split(" ; ").str[0]
    series_ids = selection.loc[labels.index, mc.id]

    data = data_table.loc[:, series_ids[:-1]].rename(columns=labels)

    # growth by market sector ...
    non_market = [
        "Public Administration and Safety",
        "Education and Training",
        "Health Care and Social Assistance",
    ]

    for p, title in zip([1, 4], ["$Q/Q_{-1}$", "$Q/Q_{-4}$"]):
        nm_jobs = data[non_market].sum(axis=1)
        m_jobs = data.loc[:, ~data.columns.isin(non_market)].sum(axis=1)
        nm_growth = nm_jobs.diff(p).dropna()
        m_growth = m_jobs.diff(p).dropna()
        m_data = pd.DataFrame({"Non-market": nm_growth, "Market": m_growth})
        N = 25
        ax = m_data.iloc[-N:].plot.bar(
            stacked=True, width=0.8, color=["blue", "darkorange"]
        )

        finalise_plot(
            ax,
            title=f"{title} Market and Non-market Sector Job Growth",
            ylabel="Change in Employment '000",
            xlabel=None,
            rfooter=source,
            lfooter=Q_FOOTER,
            y0=True,
            show=SHOW,
        )

    # growth paths over the past N years ...
    N = 25  # quarters
    nm_index = nm_jobs / nm_jobs.iloc[-N] * 100.0
    m_index = m_jobs / m_jobs.iloc[-N] * 100.0
    m_data = pd.DataFrame({"Non-market": nm_index, "Market": m_index})
    line_plot_finalise(
        m_data.iloc[-N:],
        title=f"Index of Market and Non-market Sector Employment (100={m_data.index[-N]})",
        ylabel=f"Index (100={m_data.index[-N]})",
        rfooter=source,
        lfooter=f"Australia. {series_type.capitalize()} series. ",
        legend={"loc": "upper left", "ncol": 2, "fontsize": 9},
        style=["-", "--", "-.", ":"],
        axhline={"y": 100, "color": "#555555", "linewidth": 0.66, "linestyle": "-"},
        width=1.5,
        show=SHOW,
    )


job_growth_by_market_sector()

### Monthly by duration of unemployment

In [14]:
def monthly_by_ue_duration() -> None:
    """Produce monthly UE duration charts."""

    abs_dict = abs_monthly
    meta = monthly_meta

    table = "6291014a"
    if table not in abs_dict:
        print(f"Table {table} not found in abs_dict")
        return
    df = abs_dict[table]
    series_type = "Original"

    cohorts = [
        "2 years and over",
        "1-2 years",
        "6-12 months",
        "3-6 months",
        "1-3 months",
        "under 1 month",
    ]

    categories = ["Unemployed total ;  Persons ;"]
    period = 12
    for category in categories:
        raw = pd.DataFrame()
        trend = pd.DataFrame()
        for cohort in cohorts:
            rows = meta[
                meta[mc.did].str.contains(category) & meta[mc.did].str.contains(cohort)
            ]
            assert len(rows) == 1
            select = rows["Series ID"].values[0]
            units = rows["Unit"].values[0]
            raw[cohort] = df[select]
            trend[cohort] = df[select].rolling(period).mean()

        title = (
            category.replace(" ; ", " ").replace(" ;", " ").replace("  ", " ").title()
        )
        lfooter = f"Australia. {series_type.capitalize()} series. "

        trend, u = ra.recalibrate(trend, units)
        raw, u = ra.recalibrate(raw, units)
        for data, t in zip([raw, trend], [title, f"{title}, {period} month ave."]):
            line_plot_finalise(
                data,
                title=t,
                ylabel=u,
                rfooter=f"{source} {table}",
                lfooter=lfooter,
                width=1.5,
                show=SHOW,
            )


monthly_by_ue_duration()

## Finished

In [15]:
# watermark
%load_ext watermark
%watermark -u -t -d --iversions --watermark --machine --python --conda

Last updated: 2025-12-27 09:48:41

Python implementation: CPython
Python version       : 3.14.0
IPython version      : 9.8.0

conda environment: n/a

Compiler    : Clang 20.1.4 
OS          : Darwin
Release     : 25.2.0
Machine     : arm64
Processor   : arm
CPU cores   : 14
Architecture: 64bit

pandas    : 2.3.3
mgplot    : 0.2.14
readabs   : 0.1.8
matplotlib: 3.10.7

Watermark: 2.5.0



In [16]:
print("Finished")

Finished
