# ABS Inflation multi-measure

## Python set-up

In [1]:
# system imports
from typing import Callable, Sequence, cast
from functools import cache

# analytic imports
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame
import numpy as np
import readabs as ra
from readabs import metacol as mc

# local imports
from abs_helper import ANNUAL_CPI_TARGET_RANGE
from plotting import clear_chart_dir, line_plot, set_chart_dir, finalise_plot

In [2]:
# pandas display settings
pd.options.display.max_rows = 999999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 100

# save charts in this notebook
CHART_DIR = "./CHARTS/Inflation/"
set_chart_dir(CHART_DIR)
clear_chart_dir(CHART_DIR)

# some plotting constants
LFOOTER = "Australia. Orig = Original series. SA = Seasonally adjusted series. "

# display charts in this notebook
SHOW = False

## Main comparative charts

### Download key ABS data

In [3]:
def download_abs(
    wanted: dict,
) -> tuple[pd.DataFrame, pd.DataFrame, dict[str, str], str]:
    """Capture data from ABS using a wanted dictionary."""

    data, meta, abbr, sources = {}, {}, {}, []
    for series_id, (category_id, seo, title, abrev) in wanted.items():
        d, m = ra.read_abs_series(category_id, series_id, single_excel_only=seo)
        series = d[series_id]
        if "IPD" in abrev:
            series = series.pct_change(4, fill_method=None) * 100
        if "Q" in m[mc.freq][series_id]:
            series = ra.qtly_to_monthly(series, interpolate=False)
        data[title] = series
        meta[title] = m.loc[series_id]
        abbr[title] = abrev
        sources.append(category_id)

    return (
        pd.DataFrame(data),
        pd.DataFrame(meta).T,
        abbr,
        f"ABS: {', '.join(sorted(set(sources)))}",
    )

### Multi-indicator plot

In [4]:
@cache
def get_headline_data() -> tuple[pd.DataFrame, pd.DataFrame, dict[str, str], str]:
    """Get a dictionary of data items from the ABS."""

    ipd = "5206005_Expenditure_Implicit_Price_Deflators"
    wanted = {
        # "Series ID": ["Category ID", "single-excel-only", "Short Series Title", "abbreviation"],
        "A128478318V": ["6484.0", "648401", "Monthly CPI (Orig)", "MCPI"],
        "A128481588C": ["6484.0", "648401", "Monthly CPI (SA)", "MCPISA"],
        "A130184497K": [
            "6484.0",
            "648401",
            "Monthly Trimmed Mean CPI (Orig)",
            "MCPITM",
        ],
        "A2325847F": ["6401.0", "640106", "Qtly CPI (Orig)", "CPI"],
        "A3604508K": ["6401.0", "640106", "Qtly CPI (SA)", "CPISA"],
        "A3604511X": ["6401.0", "640106", "Qtly Trimmed Mean CPI (SA)", "CPITM"],
        "A3604505C": ["6401.0", "640106", "Qtly Weighted Median CPI (SA)", "CPIWM"],
        "A2314867K": ["6427.0", "642701", "Producer Price Index (Orig)", "PPI"],
        "A83895396W": [
            "6345.0",
            "634501",
            "Wage Price Index (All sectors) (SA)",
            "WPI",
        ],
        "A2303940R": [
            "5206.0",
            ipd,
            "Household implicit price deflator (SA)",
            "HHIPD",
        ],
        "A2303727C": ["5206.0", ipd, "GNE implicit price deflator (SA)", "GNEIPD"],
    }

    return download_abs(wanted)

In [5]:
def plot_multi(
    starts: Sequence = (None, "2020-09-01", "2022-12-01"),
):
    """Plot multiple inflation indicators."""

    data, _meta, abbr, source = get_headline_data()
    last = pd.Series(data.ffill().iloc[-1]).rename(index=abbr)
    last_str = ", ".join([f"{k}={v:0.1f}" for k, v in last.items()])

    mixed_style = ["solid", "dotted", "dashed"] * 5
    marker_set = list("ov^<>8sp*HhdPX")
    styles = (None, mixed_style, mixed_style)
    markers = (None, marker_set, marker_set)
    tags = ("", " (recent)", " (latest)")

    for start, marker, style, tag in zip(starts, markers, styles, tags):
        d = data.loc[start:]
        line_plot(
            d,
            title=f"Inflation Indicators{tag}",
            ylabel="Per cent (through the year)",
            axhspan=ANNUAL_CPI_TARGET_RANGE,
            y0=True,
            rfooter=source,
            lheader=last_str,
            legend={
                "loc": "best",
                "fontsize": "8",
                "ncol": 2,
            },
            lfooter=LFOOTER,
            show=SHOW,
            marker=marker,
            markersize=4,
            style=style,
            dropna=True,
        )


plot_multi()

### Quarterly/Monthly CPI pairs

In [6]:
def plot_pairs(
    function: Callable,
    pairs: dict[str, tuple[str, str]],
    start: str = "2019-12-01",
    title_stem: str = "Monthly vs Quarterly",
) -> None:
    """Plot pairs of inflation indicators."""

    data, _meta, _abbr, source = function()
    data = data.loc[lambda x: x.index >= start]
    for label, pair in pairs.items():
        d = data[list(pair)]
        line_plot(
            d,
            title=f"{label}: {title_stem}",
            ylabel="Per cent (through the year)",
            axhspan=ANNUAL_CPI_TARGET_RANGE,
            y0=True,
            rfooter=source,
            legend={"loc": "best", "fontsize": "8"},
            lfooter=LFOOTER,
            width=[1, 2],
            show=SHOW,
            dropna=True,
        )


headline_pairs = {
    # "label": ("quarterly series", "monthly series")
    "Headline CPI (Orig)": ("Qtly CPI (Orig)", "Monthly CPI (Orig)"),
    "Headline CPI (SA)": ("Qtly CPI (SA)", "Monthly CPI (SA)"),
    "Trimmed Mean CPI": (
        "Qtly Trimmed Mean CPI (SA)",
        "Monthly Trimmed Mean CPI (Orig)",
    ),
    "Weighted Median CPI": (
        "Qtly Weighted Median CPI (SA)",
        "Monthly Trimmed Mean CPI (Orig)",
    ),
}

plot_pairs(get_headline_data, headline_pairs)

In [7]:
@cache
def get_analytic_data() -> tuple[pd.DataFrame, pd.DataFrame, dict[str, str], str]:
    """Get a dictionary of data items from the ABS."""

    wanted = {
        # "Series ID": ["Category ID", "single-excel-only", "Short Series Title", "abbreviation"],
        "A128483462J": ["6484.0", "648401", "Monthly CPI Goods Component (Orig)", ""],
        "A128481640A": [
            "6484.0",
            "648401",
            "Monthly CPI Services Component (Orig)",
            "",
        ],
        "A128476506T": ["6484.0", "648401", "Monthly CPI Tradables (Orig)", ""],
        "A128480134T": ["6484.0", "648401", "Monthly CPI Non-tradables (Orig)", ""],
        "A130184498L": ["6484.0", "648401", "Monthly CPI Excluding Volatile (SA)", ""],
        "A2330617V": ["6401.0", "640106", "Qtly CPI Goods Component (Orig)", ""],
        "A2330707X": ["6401.0", "640106", "Qtly CPI Services Component (Orig)", ""],
        "A2330527R": ["6401.0", "640106", "Qtly CPI Tradables (Orig)", ""],
        "A2330572A": ["6401.0", "640106", "Qtly CPI Non-tradables (Orig)", ""],
        "A2330842R": ["6401.0", "640106", "Qtly CPI Excluding Volatile (Orig)", ""],
    }
    return download_abs(wanted)

In [8]:
analytic_pairs = {
    # "label": ("quarterly series", "monthly series")
    "CPI Goods Component": (
        "Qtly CPI Goods Component (Orig)",
        "Monthly CPI Goods Component (Orig)",
    ),
    "CPI Services": (
        "Qtly CPI Services Component (Orig)",
        "Monthly CPI Services Component (Orig)",
    ),
    "CPI Tradables": ("Qtly CPI Tradables (Orig)", "Monthly CPI Tradables (Orig)"),
    "CPI Non-tradables": (
        "Qtly CPI Non-tradables (Orig)",
        "Monthly CPI Non-tradables (Orig)",
    ),
    "CPI Excluding Volatile": (
        "Qtly CPI Excluding Volatile (Orig)",
        "Monthly CPI Excluding Volatile (SA)",
    ),
}

plot_pairs(get_analytic_data, analytic_pairs)

## Annualised monthly/quarterly

***But note***: a whole host of problems annualising the monthly series.
And the data does not exist to annualise the monthly trimmed mean CPI.

In [9]:
@cache
def get_index_data() -> pd.DataFrame:
    """Get a dictionary of data items from the ABS."""

    wanted = {
        # "Series ID": ["Category ID", "single-excel-only", "Short Series Title", "abbreviation", ""],
        "A2325846C": ["6401.0", "640106", "Qtly CPI (Orig)", ""],
        "A3604506F": ["6401.0", "640106", "Quartely CPI (SA)", ""],
        "A2330841L": ["6401.0", "640106", "Qtly CPI Excluding Volatile (Orig)", ""],
        "A2330616T": ["6401.0", "640106", "Qtly CPI Goods Component (Orig)", ""],
        "A2330706W": ["6401.0", "640106", "Qtly CPI Services Component (Orig)", ""],
        "A128478317T": ["6484.0", "648401", "Monthly CPI (Orig)", ""],
        "A128481587A": ["6484.0", "648401", "Monthly CPI (SA)", ""],
        "A128473239F": [
            "6484.0",
            "648401",
            "Monthly CPI Excluding Volatile (Orig)",
            "",
        ],
        "A128483461F": ["6484.0", "648401", "Monthly CPI Goods Component (Orig)", ""],
        "A128481639T": [
            "6484.0",
            "648401",
            "Monthly CPI Services Component (Orig)",
            "",
        ],
    }

    return download_abs(wanted)[0]

In [10]:
def annualise_plot() -> None:
    """Plot annualised inflation indicators."""

    # pairs we want to plot
    data: DataFrame = get_index_data()
    pairs = {
        # "label": ("quarterly series", "monthly series")
        "Headline CPI": ("Qtly CPI (Orig)", "Monthly CPI (Orig)"),
        "Headline CPI (SA)": ("Quartely CPI (SA)", "Monthly CPI (SA)"),
        "CPI Excluding Volatile": (
            "Qtly CPI Excluding Volatile (Orig)",
            "Monthly CPI Excluding Volatile (Orig)",
        ),
        "CPI Goods Component": (
            "Qtly CPI Goods Component (Orig)",
            "Monthly CPI Goods Component (Orig)",
        ),
        "CPI Services Component": (
            "Qtly CPI Services Component (Orig)",
            "Monthly CPI Services Component (Orig)",
        ),
    }

    # calculate annualised percentage changes and plot
    months_in_year = 12
    for months in 3, 6:
        pct_d = ra.percent_change(data, n_periods=months)
        pct_d = ra.annualise_percentages(pct_d, months_in_year / months)

        def fake_capture(dataset=pct_d):
            return dataset, DataFrame(), "ABS: 6401.0, 6484.0", ""

        plot_pairs(
            fake_capture,
            pairs,
            title_stem=f"{months} months annualised",
        )


annualise_plot()

## Recent Phillips Curve

In [11]:
def xy_plot(
    frame: DataFrame,
    line_label: str,
    point_labels: Sequence[int] | None = None,
) -> plt.Axes:
    """ "Plot and label the heart of the curve.

    Arguments:
    frame is a two column DataFrame, first col is x vales, 2nd col is y values.
    line_label is the label for the line.
    point_labels is the points to label.

    Returns:
    An Axes object."""

    ax = frame.plot(
        x=frame.columns[0],
        y=frame.columns[1],
        lw=2,
        label=line_label,
    )
    point_labels = [] if point_labels is None else point_labels
    for n in point_labels:
        # Label the start and end
        ax.text(
            frame[frame.columns[0]].iloc[n],
            frame[frame.columns[1]].iloc[n],
            f"{frame.index[n]} ",
            fontsize="x-small",
            ha="right",
        )
    return ax

In [12]:
def add_regression(
    ax,
    frame: DataFrame,
    line_label: str,
    degree: int = 1,
    color: str = "darkred",
    linestyle: str = "--",
) -> None:
    """Fit a polynomial regression line to the data."""

    model = np.poly1d(
        np.polyfit(frame[frame.columns[0]], frame[frame.columns[1]], degree)
    )
    polyline = np.linspace(
        frame[frame.columns[0]].min(), frame[frame.columns[0]].max(), 50
    )
    ax.plot(
        polyline,
        model(polyline),
        color=color,
        linestyle=linestyle,
        lw=0.75,
        label=line_label,
    )

In [13]:
def phillips_curve() -> None:
    """Produce a Phillips Curve chart."""

    # --- Organise the data
    # trimmed mean annual inflation rate (seasonally adjusted)
    cpi, _ = ra.read_abs_series("6401.0", "A3604509L", single_excel_only="640106")
    tm_cpi = cpi["A3604509L"].pct_change(periods=4) * 100

    # seasonally adjusted unemployment rate
    lfs, _ = ra.read_abs_series(
        "6202.0", ["A84423043C", "A84423047L"], single_excel_only="6202001"
    )
    ue_rate = ra.monthly_to_qtly(100 - (lfs["A84423043C"] / lfs["A84423047L"] * 100))

    frame = pd.DataFrame({"Trimmed Mean CPI": tm_cpi, "_Unemployment Rate": ue_rate})
    last: float = cast(float, frame.iloc[-1, 1]) if frame.iloc[-1].isna().any() else 0.0
    last_date = frame.index[-1]
    start = "2021Q1"  # "2009Q4"  #
    frame = frame.loc[
        lambda x: x.index >= start
    ].dropna()  # drop the last row if it has a NaN

    # --- Plot the data
    ax = xy_plot(frame, "Phillips curve", point_labels=(0, -1))
    add_regression(ax, frame, "Stylised Phillips curve", degree=3)

    ax.axvline(
        2.5, color="darkblue", linestyle=":", lw=0.75, label="2.5% Inflation target"
    )
    if last > 0.0:
        ax.axhline(
            last,
            color="darkgreen",
            linestyle="-.",
            lw=0.75,
            label=f"Unemployment rate {last_date}",
        )
    finalise_plot(
        ax,
        title="Phillips Curve: Inflation vs Unemployment Rate",
        ylabel="Unemployment Rate (%)",
        xlabel="Trimmed Mean CPI Annual Growth Rate (%)",
        lfooter="Australia, Seasonally adjusted. Unemployment rate is quarterly mean. ",
        rfooter="Source: ABS 6202, 6401",
        legend={"fontsize": "x-small", "loc": "upper right"},
        show=SHOW,
    )


phillips_curve()

## Beveridge curve

In [14]:
def beveridge_curve() -> None:
    """Plot a Beveridge curve."""

    # --- Collect data for the next chart
    lfs, _ = ra.read_abs_series(
        "6202.0", ["A84423043C", "A84423047L"], single_excel_only="6202001"
    )
    ue_rate_q = ra.monthly_to_qtly(
        100 - (lfs["A84423043C"] / lfs["A84423047L"] * 100), q_ending="NOV"
    )
    la, _ = ra.read_abs_series("6354.0", "A590698F", single_excel_only="6354001")
    vr = la["A590698F"] / ra.monthly_to_qtly(lfs["A84423047L"], q_ending="NOV") * 100
    frame = pd.DataFrame({"Unemployment Rate": ue_rate_q, "Job Vacancy Rate": vr})
    start = "2010Q2"
    frame = frame.loc[lambda x: x.index >= start].dropna()

    # --- Plot the next chart
    l = [0, -1, -5, -9, -13, -17]  # Will need to update from time to time
    ax = xy_plot(frame, "Beveridge curve", point_labels=l)
    points = frame.loc[lambda x: x.index < "2020Q2"].index.union(
        frame.loc[lambda x: x.index >= "2022Q4"].index
    )
    f = frame.loc[points]
    add_regression(ax, f, "Pre-COVID Beveridge curve (stylised)", degree=2)
    points = frame.loc[lambda x: (x.index >= "2020Q3") & (x.index <= "2022Q2")].index
    f = frame.loc[points]
    add_regression(
        ax,
        f,
        "Post-COVID Beveridge curve (stylised)",
        degree=2,
        color="darkgreen",
        linestyle="-.",
    )
    finalise_plot(
        ax,
        title="Beveridge Curve: Unemployment Rate vs Job Vacancy Rate",
        ylabel="Job Vacancy Rate (%)",
        xlabel="Unemployment Rate (%)",
        lfooter="Australia, Seasonally adjusted. Unemployment rate is quarterly mean "
        + "(quarters ending Feb, May, Aug, Nov).",
        rfooter="Source: ABS 6202.0, 6354.0",
        legend={"fontsize": "x-small", "loc": "upper right"},
        show=SHOW,
    )


beveridge_curve()

### Okun's curve

Note: this is a faily weak relationship

In [15]:
def okun():
    """Plot Okun's curve."""

    # seasonally adjusted unemployment rate - post 1978
    lfs, _ = ra.read_abs_series(
        "6202.0", ["A84423043C", "A84423047L"], single_excel_only="6202001"
    )
    ue_rate = ra.monthly_to_qtly(100 - (lfs["A84423043C"] / lfs["A84423047L"] * 100))

    # get historic unemployment - quarterly data
    old, _old_meta = ra.read_abs_series(
        cat="1364.0.15.003", series_id=["A2454521V", "A2454517C"]
    )
    uemployed, labour_force = old["A2454521V"], old["A2454517C"]
    ue_hist = ((uemployed / labour_force) * 100).dropna()

    # combine into a single quarterly unemployment rate series
    u = (
        ue_rate.reindex(
            pd.period_range(start=ue_hist.index.min(), end=ue_rate.index.max())
        )
        .sort_index()
        .pipe(lambda x: x.where(x.notnull(), ue_hist))
    )

    # GDP growth rate - seasonally adjusted chain volume measure percentage change
    na, _ = ra.read_abs_series(
        "5206.0", "A2304402X", single_excel_only="5206001_Key_Aggregates"
    )
    gdp = na["A2304402X"]
    for n, n_desc in zip((1, 4), ("Q/Q", "Y/Y")):
        du = u.diff(n).dropna()

        d_gdp = gdp.pct_change(n) * 100

        # join the two series and plot
        x, y = f"Change Unemployment Rate {n_desc}", f"GDP Growth Rate {n_desc}"
        data = pd.DataFrame({x: du, y: d_gdp}).dropna()
        ax = data.plot.scatter(x=x, y=y, s=10, alpha=0.5)

        m, b = np.polyfit(data[x].to_numpy(), data[y].to_numpy(), 1)
        xx = np.linspace(data[x].min(), data[x].max(), 2)
        yy = m * xx + b
        ax.plot(xx, yy, color="red", lw=2, label=f"Regression line (m={m:0.2f})")

        colors = [
            "grey",
            "darkorange",
            "darkorchid",
            "maroon",
            "navy",
            "green",
            "hotpink",
        ]
        styles = ["--", "-.", ":"] * 3
        for i, decade in enumerate(range(1960, 2030, 10)):
            subset = data[data.index.year // 10 == decade // 10]
            m, b = np.polyfit(subset[x].to_numpy(), subset[y].to_numpy(), 1)
            xx = np.linspace(subset[x].min(), subset[x].max(), 2)
            yy = m * xx + b
            ax.plot(
                xx,
                yy,
                color=colors[i],
                ls=styles[i],
                lw=1.5,
                label=f"Regression line for {decade}s (m={m:0.2f})",
            )

        finalise_plot(
            ax,
            title=f"Okun Curve: Change in Unemployment Rate vs GDP Growth {n_desc}",
            ylabel=y + " (%)",
            xlabel=x + " (pp)",
            lfooter="Australia. Unemployment rate is quarterly mean. "
            + "In the legend, m is the slope of the regression line.",
            rfooter="ABS: 6202.0, 5206.0, 1364.0.15.003",
            legend={"fontsize": 9, "loc": "best"},
            x0=True,
            y0=True,
            show=SHOW,
        )

        if n == 1:
            # quarterly data is just too noisy to be useful
            continue

        # recent data
        recent_date = "2021Q3"
        recent_data = data[recent_date:]
        ax = xy_plot(
            recent_data,
            "Okun Curve",
            point_labels=(0, -1),
        )
        add_regression(
            ax,
            recent_data,
            "Post-COVID Okun curve (stylised)",
            degree=1,
            color="darkgreen",
            linestyle="-.",
        )

        finalise_plot(
            ax,
            title=f"{n_desc} Okun Curve: since {recent_date}",
            ylabel=y + " (%)",
            xlabel=x + " (pp)",
            lfooter="Australia. Unemployment rate is quarterly mean. ",
            rfooter="ABS: 6202.0, 5206.0",
            legend={"fontsize": 9, "loc": "best"},
            x0=True,
            y0=True,
            show=SHOW,
        )


okun()

## Finished

In [16]:
# watermark
%load_ext watermark
%watermark -u -n -t -v -iv -w

Last updated: Thu Aug 01 2024 13:29:09

Python implementation: CPython
Python version       : 3.12.4
IPython version      : 8.26.0

readabs   : 0.0.17
matplotlib: 3.9.1
pandas    : 2.2.2
numpy     : 1.26.4

Watermark: 2.4.3



In [17]:
print("Finished")

Finished
