# ABS Population Growth multi-measure

## Python set-up

In [1]:
# system imports
from functools import cache
from pathlib import Path

# analytic imports
import pandas as pd
from pandas import DataFrame
import numpy as np
import readabs as ra
from readabs import metacol as mc

from readabs import read_abs_series, recalibrate, qtly_to_monthly

In [2]:
# local imports
from plotting import (
    line_plot, 
    clear_chart_dir, 
    set_chart_dir, 
    plot_covid_recovery, 
    plot_growth_finalise, 
)


In [3]:
# pandas display settings
pd.options.display.max_rows = 999999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 100

# display charts in this notebook
CHART_DIR = "./CHARTS/Population/"
Path(CHART_DIR).mkdir(parents=True, exist_ok=True)
clear_chart_dir(CHART_DIR)
set_chart_dir(CHART_DIR)
SHOW = False
FILE_TYPE = "png"
plot_tags = "", "-recent"


## Download data from the ABS

In [4]:
@cache
def get_data() -> tuple[DataFrame, DataFrame]:
    """Get a dictionary of data items from the ABS
    All population data is in thousands.
    All population growth data is in thousands per year.
    All population growth rate data is in percent per year.
    Returns:
    A tuple  containing adataframe of data items, and a
    dataframe of metadata."""

    # data capture
    kags = "5206001_Key_Aggregates"
    wanted = {
        # "Series ID": ("Category ID", "single-excel-only table name", "Short Series Title"),
        "A2133251W": (
            "3101.0",
            "310101",
            "Official Estimated Resident Population (ERP)",
        ),
        "A2133255F": ("3101.0", "310101", "ERP Change Over Previous Year"),
        "A2133256J": ("3101.0", "310101", "Percentage ERP Change Over Previous Year"),
        "A84423091W": (
            "6202.0",
            "6202001",
            "Civilian population aged 15 years and over",
        ),
        "A2302460K": ("5206.0", kags, "GDP per capita: Chain volume measures (Orig)"),
        "A2302459A": (
            "5206.0",
            kags,
            "Gross domestic product: Chain volume measures (Orig)",
        ),
        "A2304402X": (
            "5206.0",
            kags,
            "Gross domestic product: Chain volume measures (SA)",
        ),
        "A85232568L": ("3401.0", None, "Permanent and Long-term arrivals"),
        "A85232558J": ("3401.0", None, "Permanent and Long-term departures"),
    }

    data_d, meta, seen = {}, {}, {}
    for series_id, (category_id, seo, title) in wanted.items():
        d, m = ra.read_abs_series(category_id, series_id, single_excel_only=seo)
        series = d[series_id]

        if category_id not in seen:
            seen[category_id] = series.index[-1]

        if "Q" in m[mc.freq][series_id]:
            series = ra.qtly_to_monthly(series, interpolate=False)
        data_d[title] = series
        meta[title] = m.loc[series_id]
    print(
        "Data loaded, last period is:\n"
        + f"{'\n'.join([f'{k}: {v}' for k, v in seen.items()])}"
    )

    # Further calculations
    data_d["Civilian population (aged 15 years+) growth"] = data_d[
        "Civilian population aged 15 years and over"
    ].diff(12)
    data_d["Civilian population (aged 15 years+) growth rate"] = (
        data_d["Civilian population aged 15 years and over"].pct_change(
            12, fill_method=None  # type: ignore[arg-type]
        )
        * 100
    )

    data_d["Implicit population from National Accounts"] = (
        data_d["Gross domestic product: Chain volume measures (Orig)"]
        / data_d["GDP per capita: Chain volume measures (Orig)"]
    ) * 1_000
    data_d["Implicit population (from National Accounts) growth"] = (
        # tricky: data only has active quarters so periods=4 is correct
        data_d["Implicit population from National Accounts"].diff(periods=4)
    )
    data_d["Implicit population (from National Accounts) growth rate"] = (
        # tricky: data only has active quarters so periods=4 is correct
        data_d["Implicit population from National Accounts"].pct_change(
            periods=4, fill_method=None  # type: ignore[arg-type]
        )
        * 100
    )

    data_d["12 month net permanent and long-term migration"] = (
        data_d["Permanent and Long-term arrivals"]
        - data_d["Permanent and Long-term departures"]
    ).rolling(12).sum() / 1_000

    return DataFrame(data_d), DataFrame(meta).T

## Plot

In [5]:
RECENT = 63  # months - touch over 5 years

In [6]:
def plot_pop() -> None:
    """plot population estimates"""

    population = get_data()[0][
        [
            "Official Estimated Resident Population (ERP)",
            "Civilian population aged 15 years and over",
            "Implicit population from National Accounts",
        ]
    ]
    population_r, units = recalibrate(population, "Thousands")
    line_plot(
        population_r,
        starts=(None, population_r.index[-RECENT]),
        title="Population Estimates",
        ylabel=units,
        dropna=True,
        width=[2.5, 2, 1.5],
        style=["-", "--", "-."],
        lfooter="Australia. ",
        rfooter="ABS Cat. 3101.0, 6202.0, 5206.0",
        show=SHOW,
        file_type=FILE_TYPE,
    )

    for col in population_r:
        plot_covid_recovery(
            population_r[col],
            title=col,
            ylabel=units,
            lfooter="Australia. ",
            rfooter="ABS",
            show=SHOW,
            file_type=FILE_TYPE,
        )


plot_pop()

Table 3401013 has no 'Index' sheet.
Table 3401014 has no 'Index' sheet.
Table 3401015 has no 'Index' sheet.
Table 3401016 has no 'Index' sheet.
Table 3401017 has no 'Index' sheet.
Data loaded, last period is:
3101.0: 2024Q2
6202.0: 2024-11
5206.0: 2024Q3
3401.0: 2024-10


In [7]:
def plot_growth() -> None:
    """plot population growth"""

    pop_growth_n = get_data()[0][
        [
            "ERP Change Over Previous Year",
            "Civilian population (aged 15 years+) growth",
            "Implicit population (from National Accounts) growth",
            "12 month net permanent and long-term migration",
        ]
    ]
    line_plot(
        pop_growth_n,
        title="Population Growth",
        starts=(None, pop_growth_n.index[-RECENT]),
        ylabel="Thousands per year",
        dropna=True,
        width=[3, 2.5, 2, 1.5],
        style=["-", "--", "-", ":"],
        y0=True,
        lfooter="Australia. ",
        rfooter="ABS Cat. 3101.0, 3401.0, 5206.0, 6202.0",
        show=SHOW,
        file_type=FILE_TYPE,
    )


plot_growth()

In [8]:
def plot_growth_rate() -> None:
    """plot population growth rate"""

    pop_growth_pct = get_data()[0][
        [
            "Percentage ERP Change Over Previous Year",
            "Civilian population (aged 15 years+) growth rate",
            "Implicit population (from National Accounts) growth rate",
        ]
    ]
    line_plot(
        pop_growth_pct,
        title="Population Growth Rate",
        starts=(None, pop_growth_pct.index[-RECENT]),
        ylabel="Annual percentage change",
        dropna=True,
        width=[2, 1.5, 1],
        style=["-", "--", "-", ":"],
        y0=True,
        lfooter="Australia. ",
        rfooter="ABS Cat. 3101.0, 5206.0, 6202.0",
        show=SHOW,
        file_type=FILE_TYPE,
    )


plot_growth_rate()

## GDP per capita based on projected ERP

### Get the population data on a quarterly basis

In [9]:
# Typically, about a week after GDP, an updated ERP is released (1 Q behind GDP).

def extend_erp(n: int, erp: pd.Series, growth: pd.Series) -> pd.Series:
    """Extend the ERP series by n quarters using the last value of the base series.
    Expect n to be small - say 1 or 2."""

    if n < 1:
        return erp
    if pd.isna(erp.iloc[-n]):
        erp.iloc[-n] = erp.iloc[-n - 1] * growth.iloc[-1]
    return extend_erp(n - 1, erp, growth)


def get_qrtly_napop() -> pd.Series:
    """Get the quarterly national accounts population series"""

    napop = get_data()[0]["Implicit population from National Accounts"].dropna()
    napop.index = pd.PeriodIndex(napop.index, freq='Q')
    return napop


def get_qrtly_erp() -> pd.Series:
    """Get the quarterly ERP series"""

    erp = get_data()[0]["Official Estimated Resident Population (ERP)"].dropna()
    erp.index = pd.PeriodIndex(erp.index, freq='Q')
    return erp


def get_qrtly_civpop() -> pd.Series:
    """Get the quarterly civilian population aged 15 years and over series"""

    civpop = get_data()[0]["Civilian population aged 15 years and over"]
    civpop = civpop[civpop.index.month.isin((3, 6, 9, 12))]
    civpop.index = pd.PeriodIndex(civpop.index, freq='Q')
    return civpop


def plot_erp_na() -> None:
    """Get and plot the ERP and national accounts population series,
    Returns a dataframe of the data. Note: uses the Civilian pop
    growth to extend the ERP series."""

    napop = get_qrtly_napop()
    erp = get_qrtly_erp()
    civpop = get_qrtly_civpop()
    civ_q_growth = civpop.pct_change(1, fill_method=None) + 1
    erp_proj = get_qrtly_erp().reindex(civpop.index, method=None, fill_value=None)
    erp_proj = extend_erp(4, erp_proj, civ_q_growth)


    data = pd.DataFrame(
        {
            "Projected ERP": erp_proj,
            "Estimated Resident Population (ERP)": erp,
            "Implied Population from National Accounts": napop,
        }
    ) / 1_000
    datap = data.drop("Projected ERP", axis=1)
    RECENT = 17
    line_plot(
        datap.iloc[-RECENT:], 
        title="Population Estimates", 
        ylabel="Millions", 
        rfooter="ABS Cat 5206.0, 3101.0",
        show=SHOW, 
        file_type=FILE_TYPE
    )

    datap = (datap * 1_000).diff(1)
    line_plot(
        datap.iloc[-RECENT:], 
        title="Population Growth Estimates", 
        ylabel="Thousands / Qtr", 
        rfooter="ABS Cat 5206.0, 3101.0",
        show=SHOW, 
        file_type=FILE_TYPE
    )

    return data


data = plot_erp_na()

In [10]:
def plot_gdp_pc(data: pd.DataFrame) -> None:
    """Plot GDP per capita growth using the ERP/projections"""

    gdp = get_data()[0]['Gross domestic product: Chain volume measures (SA)'].dropna()
    gdp.index = pd.PeriodIndex(gdp.index, freq='Q')
    gdp_pc = gdp / data["Projected ERP"] * 1_000
    gdp_pc_3mgrowth = (gdp_pc.pct_change(1, fill_method=None) * 100).round(2).dropna()
    gdp_pc_12mgrowth = (gdp_pc.pct_change(4, fill_method=None) * 100).round(2).dropna()

    for start, tag in zip((None, gdp_pc_3mgrowth.index[-16]), ("", "-recent")):
        plot_growth_finalise(
            gdp_pc_12mgrowth,
            gdp_pc_3mgrowth,
            from_=start,
            tag=tag,
            title=f"GDP per capita growth (using ERP/projections)",
            annotate=9,
            rfooter=f"ABS Cat. 5206.0, 3101.0 6, 6202.0",
            lfooter="Australia. Seasonally Adjusted GDP Series.",
            show=SHOW,
            file_type=FILE_TYPE,
        )


plot_gdp_pc(data)

## Finished

In [None]:
# watermark
%load_ext watermark
%watermark --python --machine --packages pandas,matplotlib,readabs --watermark

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Python implementation: CPython
Python version       : 3.12.8
IPython version      : 8.30.0

pandas    : 2.2.3
matplotlib: 3.9.3
readabs   : 0.0.17

Compiler    : Clang 18.1.8 
OS          : Darwin
Release     : 24.2.0
Machine     : arm64
Processor   : arm
CPU cores   : 14
Architecture: 64bit

Watermark: 2.5.0



In [12]:
print("Finished")

Finished
