# ABS Quarterly Estimated Resident Population 3101

## Python set-up

In [1]:
# system imports
import textwrap
import re

# analytic imports
import pandas as pd

import readabs as ra
from readabs import recalibrate
from readabs import metacol as mc

In [2]:
# local imports
import decompose
from abs_helper import get_abs_data
from henderson import hma
from mgplot import (
    get_color,
    finalise_plot,
    line_plot_finalise,
    postcovid_plot_finalise,
    series_growth_plot_finalise,
    seastrend_plot_finalise,
    revision_plot_finalise,
    multi_start,
    abbreviate_state,
)

Could not import auto_arima from pmdarima


In [3]:
# pandas display settings
pd.options.display.max_rows = 999999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 999

# Display charts in this notebook
SHOW = False

## Get data from ABS

In [4]:
abs_dict, meta, source, _ = get_abs_data("3101.0")

Table 31010do001_202409 has no 'Index' sheet.
Table 31010do002_202409 has no 'Index' sheet.
Table 31010DO003_200106 has no 'Index' sheet.
Table Regional internal migration estimates, provisional has no 'Index' sheet.


In [5]:
# list fof available tables
textwrap.wrap(", ".join(abs_dict.keys()))

['310101, 3101016A, 3101016B, 310102, 310104, 3101051, 3101052, 3101053,',
 '3101054, 3101055, 3101056, 3101057, 3101058, 3101059']

## Data revisions

In [6]:
### Data revisions
def data_revisions() -> None:
    """Obtain and plot data revisions."""

    how_far_back = 6
    dataset = [
        "Estimated Resident Population",
        "Births ;  Australia ;",
        "Deaths ;  Australia ;",
        "Natural Increase ;  Australia ;",
        "Overseas Arrivals ;  Australia ;",
        "Overseas Departures ;  Australia ;",
        "Net Overseas Migration ;  Australia ;",
    ]
    stype = "Original"
    for series in dataset:
        repository = pd.DataFrame()
        history = None
        for _i in range(how_far_back):
            # from current to historic data
            d, m = ra.read_abs_cat(
                "3101.0", single_excel_only="310101", history=history
            )
            selector = {series: mc.did, stype: mc.stype}
            t, s, u = ra.find_abs_id(m, selector, regex=False, verbose=False)
            date = f"ABS print for {d[t].index[-1].strftime("%Y-%b")}"
            repository[date] = d[t][s]
            history = (d[t].index[-1] - 1).strftime("%b-%Y").lower()

        revision_plot_finalise(
            data=repository,
            ylabel=u,
            title=f"Data revsions: {re.sub(':.*$', '', series)}",
            rfooter=source,
            lfooter=f"Australia. {stype}. ",
            legend={"loc": "best", "fontsize": 9},
            show=SHOW,
        )

        if series == "Estimated Resident Population":
            revision_plot_finalise(
                data=repository.diff(1),
                ylabel=u,
                title=f"Data revsions: {re.sub(':.*$', '', series)} Growth",
                rfooter=source,
                lfooter=f"Australia. {stype}. ",
                legend={"loc": "best", "fontsize": 9},
                show=SHOW,
            )


data_revisions()

## Plotting

### Key charts

In [7]:
### NOTE ### -- this code block is very slow -- about 3 minutes


def key_charts():
    table = "310101"
    series_type = "Original"
    data = abs_dict[table]

    key_charts = [
        "Births",
        "Deaths",
        "Natural Increase",  # births - deaths
        "Overseas Arrivals",
        "Overseas Departures",
        "Net Overseas Migration",
    ]

    discontinuities = {
        # last date in continuity ...
        "Births": [pd.Period("2020-Q4", freq="Q")],
        "Deaths": [],
        "Natural Increase": [pd.Period("2020-Q4", freq="Q")],
        "Overseas Arrivals": [pd.Period("2020-Q1", freq="Q")],
        "Overseas Departures": [pd.Period("2020-Q1", freq="Q")],
        "Net Overseas Migration": [pd.Period("2020-Q1", freq="Q")],
    }

    starts = 0, -15  # quarters
    for chart in key_charts:
        selector = {
            table: mc.table,
            series_type: mc.stype,
            chart: mc.did,
        }
        _table, id, units = ra.find_abs_id(meta, selector, verbose=False)
        series = data[id]
        series.name = chart
        series, units = ra.recalibrate(series, units)
        # print(f'End date check: {series.index[-1]}')

        # raw data plot
        common_plot_settings = {
            "title": chart,
            "y0": True,
            "ylabel": f"{units} / Quarter",
            "rfooter": source,
            "show": SHOW,
        }
        line_plot_finalise(
            series,
            lfooter=f"Australia. {series_type} series. ",
            **common_plot_settings,
        )

        # in-house seasonal decomp
        common_plot_settings["starts"] = starts
        decomposed = decompose.decompose(
            series.dropna(),
            constant_seasonal=True,
            arima_extend=True,
            discontinuity_list=discontinuities[chart],
            ignore_years=(2020, 2021),  # COVID
        )
        # display(decomposed)
        multi_start(
            decomposed[["Seasonally Adjusted", "Trend"]],
            function=seastrend_plot_finalise,
            tag="sa-trend",
            lfooter="Australia. Seasonally adjusted using in-house methods. ",
            **common_plot_settings,
        )
        postcovid_plot_finalise(
            decomposed["Seasonally Adjusted"],
            tag="covid-recovery",
            lfooter="Australia. Seasonally adjusted series plotted. "
            + "Seasonally adjusted using in-house methods. ",
            **{k: v for k, v in common_plot_settings.items() if k != "starts"},
        )


key_charts()

### Age related

#### set-up parameters

In [8]:
tables = tuple("31010" + str(i) for i in range(51, 60))
groups = ("Female", "Male", "Persons")  # persons should be last
states = {"NSW", "Vic", "Qld", "SA", "WA", "Tas", "NT", "ACT", "Australia"}
state_colors = {state: get_color(state) for state in states}

linestyle = {
    "style": [
        "-",
        "-.",
        "--",
        ":",
    ]
    * 3  # repeat enough to cover all series
}

#### Utility functions

In [9]:
def get_age_data(table: str, group: str) -> tuple[str, pd.DataFrame]:
    # identify the relevant data
    relevant = meta[(meta[mc.table] == table) & meta[mc.did].str.contains(group)]

    # identify state
    state = relevant["Table Description"].iloc[0].split(",")[-1].strip()
    abbreviation = abbreviate_state(state)

    # get raw data and associated labels
    columns = relevant[mc.id]
    data = abs_dict[table][columns]
    labels = (
        relevant[mc.did]
        .str.rsplit(";", n=2)
        .str[-2]
        .str.replace("100 and over", "100")
        .astype(int)
    )
    data_i = pd.DataFrame(data.to_numpy(), columns=labels, index=data.index)
    return abbreviation, data_i


# test
# get_age_data("3101051", "Persons")

In [10]:
def calculate_medians(data: pd.DataFrame) -> pd.Series:
    HALF = 0.5

    # calculate whole-year medians
    row_total = data.sum(axis=1)
    cumulative = data.div(row_total, axis=0).cumsum(axis=1)
    whole_median_age = cumulative.gt(HALF).idxmax(axis=1) - 1

    # calculate (imputed) fractional-year medians.
    low = pd.Series(
        {
            x: cumulative.loc[x, y]
            for x, y in zip(whole_median_age.index, whole_median_age.values)
        }
    )
    high = pd.Series(
        {
            x: cumulative.loc[x, y + 1]
            for x, y in zip(whole_median_age.index, whole_median_age.values)
        }
    )
    fractional_age = (HALF - low) / (high - low)
    fractional_median_age = whole_median_age + fractional_age
    return fractional_median_age

#### Age profiles

In [11]:
HMA_SMOOTHER = 5


def state_profiles():
    """Produce state population profiles by age."""

    for group in groups:
        state_compositions = {}

        for table in tables:
            # data capture
            state, data = get_age_data(table, group)
            period = data.index[-1]
            data_s = data.iloc[-1]

            # limited smoothing and store
            d = (data_s / data_s.sum()) * 100
            data_s = hma(d, HMA_SMOOTHER)
            state_compositions[state] = data_s

        # plot age profile ...
        state_compositions_df = pd.DataFrame(state_compositions)
        colors = [state_colors[x] for x in state_compositions_df.columns]
        ax = state_compositions_df.plot(
            lw=2,
            color=colors,
            **linestyle,
        )
        finalise_plot(
            axes=ax,
            title=f"Population distribution by Age and Jurisdiction ({group})",
            ylabel="Kernel Density Estimate (%)",
            xlabel="Age in whole years",
            legend={"loc": "best", "fontsize": "small", "ncols": 3},
            tag=group,
            lfooter=f"Australia. {period}",
            rfooter=f"Calculated from {source} {[int(i) for i in tables]}",
            show=SHOW,
        )


state_profiles()

#### Median Age by state

In [12]:
def median_age_by_state():
    """plot median age by state"""

    for group in groups:
        state_medians = {}

        # data capture
        for table in tables:
            state, df = get_age_data(table, group)
            state_medians[state] = calculate_medians(df)
        data = pd.DataFrame(state_medians)

        # plot
        colors = [get_color(x) for x in data.columns]
        line_plot_finalise(
            data,
            color=colors,
            **linestyle,
            title=f"Median Population Age by Jurisdiction ({group})",
            ylabel="Years",
            xlabel=None,
            legend={"loc": "best", "fontsize": "small", "ncols": 3},
            lfooter="Australia. ",
            rfooter=f"Calculated from {source} {[int(i) for i in tables]}",
            width=2,
            show=SHOW,
        )


median_age_by_state()

#### Median Age by Gender

In [13]:
def age_gender_profiles() -> None:
    """Produce state population profiles."""

    colors = [
        "hotpink",
        "cornflowerblue",
    ]

    for table in tables:
        # data capture
        group_medians = {}
        for group in groups[0:2]:  # assumes "Persons" is last group
            state, data = get_age_data(table, group)
            group_medians[group] = calculate_medians(data)
        data = pd.DataFrame(group_medians)

        # plot
        line_plot_finalise(
            data,
            color=colors,
            title=f"Median Population Age by Gender for {state}",
            ylabel="Years",
            rfooter=f"Calculated from {source} {table}",
            width=2,
            show=SHOW,
        )


age_gender_profiles()

### National and State Populations

In [14]:
def raw_populatons():
    """Plot raw populations by state and territory."""

    table = "310104"
    erp_df = abs_dict[table]
    erp_phrase = "Estimated Resident Population ;  Persons ;"

    states = (
        meta.loc[
            (meta[mc.table] == table) & (meta[mc.did].str.contains(erp_phrase)),
            mc.did,
        ]
        .str.replace(erp_phrase, "")
        .str.replace(" ;", "")
        .str.strip()
        .str.strip()
        .to_list()
    )

    for state in states:
        # get relevant data series
        selector = {
            table: mc.table,
            erp_phrase: mc.did,
            f";  {state} ;": mc.did,  # Australia, South Australia, etc.
        }
        _table, id, units = ra.find_abs_id(meta, selector, verbose=False)
        series = erp_df[id]
        series.name = "Estimated Resident Population"
        units = "Number Persons" if units == "Persons" else units
        series, units = recalibrate(series, units)

        # plot complete series
        title = f"Estimated Resident Population: {abbreviate_state(state)}"
        line_plot_finalise(
            series,
            title=title,
            ylabel=units,
            rfooter=f"{source} {table}",
            width=2,
            show=SHOW,
        )

        postcovid_plot_finalise(
            series,
            title=title,
            ylabel=units,
            tag="-covid",
            rfooter=f"{source} {table}",
            show=SHOW,
        )

        for start in (0, -13):
            series_growth_plot_finalise(
                series,
                plot_from=start,
                tag=f"growth-{start}",
                title=f"Growth in the {title}",
                rfooter=f"{source} {table}",
                
                show=SHOW,
            )


raw_populatons()

## Finished

In [15]:
# watermark
%load_ext watermark
%watermark -u -t -d --iversions --watermark --machine --python --conda

Last updated: 2025-06-01 17:42:01

Python implementation: CPython
Python version       : 3.13.3
IPython version      : 9.3.0

conda environment: n/a

Compiler    : Clang 20.1.0 
OS          : Darwin
Release     : 24.5.0
Machine     : arm64
Processor   : arm
CPU cores   : 14
Architecture: 64bit

readabs: 0.0.29
re     : 2.2.1
mgplot : 0.1.4
pandas : 2.2.3

Watermark: 2.5.0



In [16]:
print("Finished")

Finished
