# RBA SOMP Forecasts - Historical Performance

## Set-up

In [1]:
# system imports
from pathlib import Path

import abs_data_capture

# local imports
import common

# analytic imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from plotting import clear_chart_dir, finalise_plot, set_chart_dir

# plotting constants
chart_dir = "./CHARTS/SOMP"
Path(chart_dir).mkdir(parents=True, exist_ok=True)
clear_chart_dir(chart_dir)
set_chart_dir(chart_dir)
plt.style.use("fivethirtyeight")
SHOW = False

## Collect SOMP data

### Get raw SOMP tables

In [2]:
Q = {"feb": 1, "may": 2, "aug": 3, "nov": 4}
START_YEAR = 2018
STOP_YEAR = 2026


def collect_somp_data() -> dict[str, list[pd.DataFrame]]:
    """Collect raw SOMP data."""

    cache_dir = "./RBA_SOMP_CACHE"
    cache = Path(cache_dir)
    cache.mkdir(parents=True, exist_ok=True)

    pool = {}
    for year in range(START_YEAR, STOP_YEAR):
        for qtr in Q:
            url = (
                "https://www.rba.gov.au/publications"
                f"/smp/{year}/{qtr}/forecasts.html"
            )
            try:
                file = common.get_file(url, cache)
            except common.HttpError as e:
                print(f"Did not get: {year}-{qtr}")
                continue

            frame = pd.read_html(file, index_col=0)
            q = Q[qtr]
            index = f"{year}-Q{q}"
            pool[index] = frame

    return pool


raw_somp = collect_somp_data()

Did not get: 2018-feb
Did not get: 2018-may
Did not get: 2018-aug
Retrieving data from cache: RBA_SOMP_CACHE/7c7ac37276deacd664cc0ada1d6a1707--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/a1379dd0a92bd4626c9f86a01864ca7a--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/73d98da9e9b4cb61f9a2b7ec484666c8--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/dbc5bd06d495148c7ec5d9f423911d09--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/9a5ae921c14572463601b945bf77e2ac--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/67549187a6a3d7fffe7a792631b7b08d--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/b37e01367aeff06d7982501f07bb0dbe--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/6c8ad34d268f122e9d7f2dc5231e1fdc--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/bab514a8e41482bad760e1d86696c366--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/e8497feee2f9cc80ff8502b518709a2c--forecasts.html
Retrieving dat

### Reorganise SOMP data

In [3]:
def reorganise_somp(raw_somp) -> dict[str, pd.DataFrame]:
    """Reorganise the SOMP data into domains."""

    pool = {}
    for index, table in raw_somp.items():
        p = pd.Period(index, freq="Q-NOV")
        frame = table[0]
        for item in frame.index:
            if item[0] == "(":
                continue  # ignore footnotes
            row = frame.loc[item]
            row.name = index
            if item not in pool:
                pool[item] = pd.DataFrame(row)
            else:
                pool[item] = pd.concat([pool[item], pd.DataFrame(row)], axis=1)

    for item in pool:
        pool[item].columns = pd.PeriodIndex(pool[item].columns, freq="Q-NOV")
        pool[item].index = pd.PeriodIndex(pool[item].index, freq="Q-Dec")
        for col in pool[item].columns:
            pool[item][col] = (
                pool[item][col]
                .str.replace("¼", ".25")
                .str.replace("½", ".5")
                .str.replace("¾", ".75")
                .str.replace("−", "-")
                .astype(float)
            )

    return pool


somp = reorganise_somp(raw_somp)

In [4]:
somp.keys()

dict_keys(['Gross domestic product', 'Household consumption', 'Dwelling investment', 'Business investment', 'Public demand', 'Gross national expenditure', 'Imports', 'Exports', 'Real household disposable income', 'Terms of trade', 'Major trading partner (export-weighted) GDP', 'Unemployment rate (quarterly,\xa0%)', 'Employment', 'Wage price index', 'Nominal (non-farm) average earnings per hour', 'Trimmed mean inflation', 'Consumer price index', 'Unemployment rate (quarterly, %)'])

## Inflation

In [5]:
inflation_measures = {
    "Wage price index": [
        "6345",
        "1",
        "SA",
        "Percentage Change From Corresponding Quarter of Previous Year",
        "Australia ;  Total hourly rates of pay excluding bonuses ;  "
        + "Private and Public ;  All industries ;",
    ],
    "Trimmed mean inflation": [
        "6401",
        "8",
        "SA",
        "Percentage Change from Corresponding Quarter of Previous Year",
        "Trimmed Mean ;  Australia ;",
    ],
    "Consumer price index": [
        "6401",
        "2",
        "Orig",
        "Percentage Change from Corresponding Quarter of Previous Year",
        "All groups CPI ;  Australia ;",
    ],
}


def get_abs(measure_set):
    pool = {}
    (
        did_col,
        id_col,
        table_col,
        type_col,
        unit_col,
        tdesc_col,
    ) = abs_data_capture.get_meta_constants()

    for m, (cat, table, season, did1, did2) in measure_set.items():
        print(m)

        # get the ABS data
        data_dict = abs_data_capture.get_abs_meta_and_data(cat)
        _, _, meta_data = abs_data_capture.get_fs_constants(cat)
        meta = data_dict[meta_data]
        data = data_dict[table]

        # get the specific series we want to plot
        search_terms = {
            table: table_col,
            {"SA": "Seasonally Adjusted", "Orig": "Original"}[season]: type_col,
            did1: did_col,
            did2: did_col,
        }
        series_id, _ = abs_data_capture.find_id(meta, search_terms, verbose=True)
        series = data[series_id]
        pool[m] = series

    return pool


abs_inflation = get_abs(inflation_measures)

Wage price index
Found URL for a ZIP file on ABS web page
Retrieving data from cache: ABS_CACHE/6052268df887aaf3a0774941b419dc9b--Time-series-spreadsheets-all.zip
Extracting DataFrames from the zip-file ...
Trimmed mean inflation
Found URL for a ZIP file on ABS web page
Retrieving data from cache: ABS_CACHE/5f6bd6bc84c24ad0c445fc096bd08fbd--All-Time-Series-Spreadsheets.zip
Extracting DataFrames from the zip-file ...
Consumer price index


In [6]:
def plot_inflation():
    
    for m, series in abs_inflation.items():
        (cat, table, season, did1, did2) = inflation_measures[m]
        ax = series[series.index.year >= START_YEAR].plot(
            lw=3, color="#dd0000", label=f"ABS"
        )
        
        df = somp[m]
        n_colours = len(df.columns)
        colors = plt.cm.viridis(np.linspace(0, 1, n_colours))
        for col, color in zip(df.columns, colors):
            s = df[col].dropna()
            s.plot(ax=ax, lw=1.5, c=color, label=col)
            ax.text(x=s.index[-1], y=s.iloc[-1], s=str(col)[2:], fontsize=6)
            
        finalise_plot(
            ax,
            title=f"{m} (with Qrtly RBA SOMP forecasts)",
            ylabel="Per cent",
            lfooter=f"Australia. {season} series.",
            rfooter=f"RBA SOMP. ABS {cat} {table}.",
            legend={"ncols": 3, "fontsize": "xx-small"},
            show=SHOW,
        )


plot_inflation()