# RBA SOMP Forecasts

## Set-up

In [1]:
# system imports
from pathlib import Path

import abs_data_capture

# local imports
import common

# analytic imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from plotting import clear_chart_dir, finalise_plot, set_chart_dir

# plotting constants
chart_dir = "./CHARTS/SOMP"
Path(chart_dir).mkdir(parents=True, exist_ok=True)
clear_chart_dir(chart_dir)
set_chart_dir(chart_dir)
plt.style.use("fivethirtyeight")
SHOW = False

## Collect SOMP data

### Get raw SOMP tables

In [2]:
Q = {"feb": 1, "may": 2, "aug": 3, "nov": 4}
START_YEAR = 2019
STOP_YEAR = 2025  # - this will need updating from year to year


def collect_somp_data() -> dict[str, list[pd.DataFrame]]:
    """Collect raw SOMP data."""

    cache_dir = "./RBA_SOMP_CACHE"
    cache = Path(cache_dir)
    cache.mkdir(parents=True, exist_ok=True)

    pool = {}
    for year in range(START_YEAR, STOP_YEAR):
        for qtr in Q:
            url =  (
                "https://www.rba.gov.au/publications"
                f"/smp/{year}/{qtr}/forecasts.html"
            ) if year < 2024 else (
                # Change of RBA data location in Feb 2024 ...
                "https://www.rba.gov.au/publications"
                f"/smp/{year}/{qtr}/outlook.html"
            )
            try:
                file = common.get_file(url, cache)
            except common.HttpError as e:
                print(f"Did not get: {year}-{qtr}")
                continue

            frame = pd.read_html(file, index_col=0)
            q = Q[qtr]
            index = f"{year}-Q{q}"

            pool[index] = frame

    return pool

raw_somp = collect_somp_data()

Retrieving data from cache: RBA_SOMP_CACHE/a1379dd0a92bd4626c9f86a01864ca7a--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/73d98da9e9b4cb61f9a2b7ec484666c8--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/dbc5bd06d495148c7ec5d9f423911d09--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/9a5ae921c14572463601b945bf77e2ac--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/67549187a6a3d7fffe7a792631b7b08d--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/b37e01367aeff06d7982501f07bb0dbe--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/6c8ad34d268f122e9d7f2dc5231e1fdc--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/bab514a8e41482bad760e1d86696c366--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/e8497feee2f9cc80ff8502b518709a2c--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/889b66a1af4e66f440c092d128094cf3--forecasts.html
Retrieving data from cache: RBA_SOMP_CACHE/935401f104a40cf7282a69bdc3c647fd--for

### Reorganise SOMP data

In [3]:
def stage1_reorg(raw_somp) -> dict[str, pd.DataFrame]:
    """Reorganise the SOMP data into domains."""

    ue = "Unemployment Rate"
    pool = {}
    for index, table in raw_somp.items():
        p = pd.Period(index, freq="Q-NOV")

        # delete rows that are just text - introduced Feb 2024
        frame = table[0]

        for col in frame.columns:
            
            # Fix the funny fractions thing here
            frame[col] = (
                frame[col]
                .astype(str)
                .str.replace("¼", ".25")
                .str.replace("½", ".5")
                .str.replace("¾", ".75")
                .str.replace("−", "-")
            )

            # force to numeric or NAN and drop text (now all NAN) rows
            frame[col] = pd.to_numeric(frame[col], errors='coerce')
            frame = frame.dropna(how='all')

        # standardise index text (to Title Case) 
        #to fixed table mismatches pre-2024
        frame.index = frame.index.str.title()

        # remove odd footnotes introduced in Feb 2024
        frame.index = (
            frame.index.str.replace("\([A-Z]\)$", "", regex=True)
        )

        for item in frame.index:
            if item[0] == "(":
                continue  # ignore footnotes
            row = frame.loc[item]
            row.name = index
            item_adj = ue if ue in item else item
            if item_adj not in pool:
                pool[item_adj] = pd.DataFrame(row)
            else:
                pool[item_adj] = pd.concat([pool[item_adj], pd.DataFrame(row)], axis=1)
    return pool


somp_partial = stage1_reorg(raw_somp)

In [4]:
somp_partial.keys()

dict_keys(['Gross Domestic Product', 'Household Consumption', 'Dwelling Investment', 'Business Investment', 'Public Demand', 'Gross National Expenditure', 'Imports', 'Exports', 'Real Household Disposable Income', 'Terms Of Trade', 'Major Trading Partner (Export-Weighted) Gdp', 'Unemployment Rate', 'Employment', 'Wage Price Index', 'Nominal (Non-Farm) Average Earnings Per Hour', 'Trimmed Mean Inflation', 'Consumer Price Index', 'Hours-Based Underutilisation Rate (Quarterly, %)', 'Nominal Average Earnings Per Hour (Non-Farm)', 'Cash Rate (%)', 'Trade-Weighted Index (Index)', 'Brent Crude Oil Price (Us$/Bbl)', 'Estimated Resident Population', 'Labour Productivity', 'Household Savings Rate (%)', 'Real Wage Price Index', 'Real Average Earnings Per Hour (Non-Farm)'])

In [5]:
def reorganise_somp(pool) -> dict[str, pd.DataFrame]:
    for item in pool:
        pool[item].columns = pd.PeriodIndex(pool[item].columns, freq="Q-NOV")
        pool[item].index = pd.PeriodIndex(pool[item].index, freq="Q-DEC")
    return pool


somp = reorganise_somp(somp_partial)

In [6]:
somp.keys()

dict_keys(['Gross Domestic Product', 'Household Consumption', 'Dwelling Investment', 'Business Investment', 'Public Demand', 'Gross National Expenditure', 'Imports', 'Exports', 'Real Household Disposable Income', 'Terms Of Trade', 'Major Trading Partner (Export-Weighted) Gdp', 'Unemployment Rate', 'Employment', 'Wage Price Index', 'Nominal (Non-Farm) Average Earnings Per Hour', 'Trimmed Mean Inflation', 'Consumer Price Index', 'Hours-Based Underutilisation Rate (Quarterly, %)', 'Nominal Average Earnings Per Hour (Non-Farm)', 'Cash Rate (%)', 'Trade-Weighted Index (Index)', 'Brent Crude Oil Price (Us$/Bbl)', 'Estimated Resident Population', 'Labour Productivity', 'Household Savings Rate (%)', 'Real Wage Price Index', 'Real Average Earnings Per Hour (Non-Farm)'])

## Annual Growth Measures

In [7]:
growth_measures = {
    "Gross Domestic Product": [
        "5206",
        "1",
        "SA",
        "Chain volume measures ;",
        "Gross domestic product:",
        True,
    ],
    "Household Consumption": [
        "5206",
        "8",
        "SA",
        "Chain volume measures ;",
        "FINAL CONSUMPTION EXPENDITURE:",
        True,
    ],
    "Dwelling Investment": [
        "5206",
        "2",
        "SA",
        "Private ;  Gross fixed capital formation - Dwellings - Total ;",
        "Private ;  Gross fixed capital formation - Dwellings - Total ;",
        True,
    ],
    #"Business Investment": [
    #
    #    # -- Needs work - ‘Business investment’ and ‘Public demand’ have been 
    #    # adjusted to exclude the effects of net sales of existing capital 
    #    # assets between the private and other (public and external) sectors.
    #    
    #    "5206",
    #    "2",
    #    "SA",
    #    "Private ;  Gross fixed capital formation ;",
    #    "Private ;  Gross fixed capital formation ;",
    #    True,
    #],
    #"Public Demand": [
    #
    #    # Needs work - see above
    #    
    #    "5206",
    #    "24",
    #    "SA",
    #    "Public ;  Final demand: Chain volume measures ;",
    #    "Public ;  Final demand: Chain volume measures ;",
    #    True,
    #],
    "Gross National Expenditure": [
        "5206",
        "2",
        "SA",
        "Gross national expenditure ;",
        "Gross national expenditure ;",
        True,
    ],
    "Imports": [
        "5206",
        "2",
        "SA",
        "Imports of goods and services ;",
        "Imports of goods and services ;",
        True,
    ],
    "Exports": [
        "5206",
        "2",
        "SA",
        "Exports of goods and services ;",
        "Exports of goods and services ;",
        True,
    ],
    "Wage Price Index": [
        "6345",
        "1",
        "SA",
        "Percentage Change From Corresponding Quarter of Previous Year",
        "Australia ;  Total hourly rates of pay excluding bonuses ;  "
        + "Private and Public ;  All industries ;",
        False,
    ],
    "Trimmed Mean Inflation": [
        "6401",
        "8",
        "SA",
        "Percentage Change from Corresponding Quarter of Previous Year",
        "Trimmed Mean ;  Australia ;",
        False,
    ],
    "Consumer Price Index": [
        "6401",
        "2",
        "Orig",
        "Percentage Change from Corresponding Quarter of Previous Year",
        "All groups CPI ;  Australia ;",
        False,
    ],

    # --- this chart looks completely wrong - need to think about it
    #"Nominal (Non-Farm) Average Earnings Per Hour": [
    #    "5206",
    #    "24",
    #    "SA",
    #    "Non-farm compensation of employees per hour: Current prices ;",
    #    "Non-farm compensation of employees per hour: Current prices ;",
    #    True,
    #],
}


def get_abs(measure_set):
    pool = {}
    (
        did_col,
        id_col,
        table_col,
        type_col,
        unit_col,
        tdesc_col,
    ) = abs_data_capture.get_meta_constants()

    for m, (cat, table, season, did1, did2, calc) in measure_set.items():
        print(m)

        # get the ABS data
        data_dict = abs_data_capture.get_abs_meta_and_data(cat)
        _, _, meta_data = abs_data_capture.get_fs_constants(cat)
        meta = data_dict[meta_data]
        data = data_dict[table]

        # get the specific series we want to plot
        search_terms = {
            table: table_col,
            {"SA": "Seasonally Adjusted", "Orig": "Original"}[season]: type_col,
            did1: did_col,
            did2: did_col,
        }
        series_id, _ = abs_data_capture.find_id(meta, search_terms, verbose=True)
        series = data[series_id]
        index = series.index
        if calc:
            series = (series / series.shift(4) - 1) * 100.0
        pool[m] = series

    return pool


abs_annual_growth = get_abs(growth_measures)

Gross Domestic Product
Found URL for a ZIP file on ABS web page
Retrieving data from cache: ABS_CACHE/5017be8287dea982b865689c39593eff--All_time_series_workbooks.zip
Extracting DataFrames from the zip-file ...
Household Consumption
Dwelling Investment
Gross National Expenditure
Imports
Exports
Wage Price Index
Found URL for a ZIP file on ABS web page
Retrieving data from cache: ABS_CACHE/6052268df887aaf3a0774941b419dc9b--Time-series-spreadsheets-all.zip
Extracting DataFrames from the zip-file ...
Trimmed Mean Inflation
Found URL for a ZIP file on ABS web page
Retrieving data from cache: ABS_CACHE/009cab4e5b98e8a0836b649cecbac43a--All-Time-Series-Spreadsheets.zip
Extracting DataFrames from the zip-file ...
Consumer Price Index


In [8]:
def plot_annual_growth(growth_data, measures):
    for m, series in growth_data.items():
        (cat, table, season, did1, did2, calc) = measures[m]
        df = somp[m]
        n_colours = len(df.columns)
        colors = plt.cm.viridis(np.linspace(0, 1, n_colours))
        xy = set()
        last = df.columns[-1]

        fig, ax = plt.subplots()
        for col, color in zip(df.columns, colors):
            s = df[col].dropna().astype(float) # kludge
            if s.index[0].year < START_YEAR:
                continue
            color = 'darkred' if col == last else color
            s.plot(ax=ax, lw=1.5, c=color, label=str(col)[2:])
            x, y = s.index[-1], s.iloc[-1]
            # let's minimise over-plotting of text
            va = "bottom" if (x, y) not in xy else "top"
            ax.text(x=x, y=y, s=str(col)[2:], fontsize=6, va=va)
            xy.add((x, y))

        series[series.index.year >= START_YEAR-1].plot(
            ax=ax, lw=3, color="red", alpha=0.6, label=f"ABS"
        )
        
        title = f"{m} (with Qrtly RBA SOMP forecasts)"
        TOO_LONG = 55
        if len(title) > TOO_LONG:
            # replace the last occurrence
            old = " ("
            new = "\n("
            title = (title[::-1].replace(old[::-1],new[::-1], 1))[::-1]
        finalise_plot(
            ax,
            title=title,
            ylabel="Growth (per cent per year)",
            lfooter=f"Australia. {season} series.",
            rfooter=f"RBA SOMP. ABS {cat} {table}.",
            legend={"ncols": 1, "loc": "center left", "fontsize": "xx-small"},
            y0=True,
            show=SHOW,
        )


plot_annual_growth(abs_annual_growth, growth_measures)

## Real household disposable income
See table H from the RBA Statistical Tables: 'Real household disposable income' is household disposable income after the deduction of interest payments, deflated by the implicit price deflator for household consumption expenditure; includes income from unincorporated enterprises.

In [9]:
di_measures = {
    'Disposable income': [
        "5206",
        "20",
        "SA",
        "GROSS DISPOSABLE INCOME ;",
        "GROSS DISPOSABLE INCOME ;",
        False,
     ],
    'HFCE deflator': [
        "5206",
        "5",
        "SA",
        " ",
        "Households ;  Final consumption expenditure ;",
        False,
     ],
}

def plot_di():
    """Plot disposable income"""
    
    di_data = get_abs(di_measures)
    deflator = di_data['HFCE deflator'] / 100.0
    rhdi = di_data['Disposable income'] / deflator
    growth = (rhdi / rhdi.shift(4) - 1) * 100.0
    growth_data = {'Real Household Disposable Income': growth}
    fake_measure = {
        'Real Household Disposable Income': [
        "5206",
        ", ".join([x[1] for x in di_measures.values()]),
        "SA",
        "-",
        "-",
        False,
        ],
    }
    plot_annual_growth(growth_data, fake_measure)


plot_di()

Disposable income
HFCE deflator


## Unemployment rate

## Employment

## Finished

In [10]:
%load_ext watermark
%watermark -u -n -t -v -iv -w

Last updated: Tue Feb 06 2024 18:26:54

Python implementation: CPython
Python version       : 3.11.7
IPython version      : 8.21.0

matplotlib: 3.8.2
requests  : 2.31.0
numpy     : 1.26.3
pandas    : 2.2.0

Watermark: 2.4.3



In [11]:
print("Finished.")

Finished.
