# ABS Population Growth multi-measure

## Python set-up

In [1]:
# system imports
from dataclasses import dataclass
from pathlib import Path
from typing import TypeAlias, cast

# analytic imports
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# local imports
import abs_data_capture
import plotting
import utility

In [3]:
# pandas display settings
pd.options.display.max_rows = 999999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 100

# display charts in this notebook
plt.style.use("fivethirtyeight")
CHART_DIR = "./CHARTS/Population"
Path(CHART_DIR).mkdir(parents=True, exist_ok=True)
plotting.clear_chart_dir(CHART_DIR)
plotting.set_chart_dir(CHART_DIR)
SHOW = False

## Get data from ABS

### Typing information

In [4]:
@dataclass
class Selector:
    """Data used to select and process an ABS series."""

    catalog_id: str
    table: str
    orig_sa: str
    search1: str
    search2: str
    calc_growth: bool


# Note: use NewType in the future when moved to Python 3.12
MeasureType: TypeAlias = dict[str, Selector]
AbsDataType: TypeAlias = dict[str, pd.Series]

### Data Retrieval

In [5]:
def get_abs(measure: MeasureType) -> AbsDataType:
    """Return a dictionary of Series data from the ABS,
    Once series for each item in the measure dictionary."""

    pool = {}
    (
        did_col,
        _,  # id_col
        table_col,
        type_col,
        _,  # unit_col
        _,  # tdesc_col
    ) = abs_data_capture.get_meta_constants()

    for name, select in measure.items():
        print(name)

        # get the ABS data
        data_dict = abs_data_capture.get_abs_meta_and_data(select.catalog_id)
        _, _, meta_data = abs_data_capture.get_fs_constants(select.catalog_id)
        meta = data_dict[meta_data]
        data = data_dict[select.table]

        # get the specific series we want to plot
        search_terms = {
            select.table: table_col,
            {"SA": "Seasonally Adjusted", "Orig": "Original"}[select.orig_sa]: type_col,
            select.search1: did_col,
            select.search2: did_col,
        }
        series = data[abs_data_capture.find_id(meta, search_terms, verbose=True)[0]]
        index = cast(pd.PeriodIndex, series.index)
        if select.calc_growth:
            periods = 4 if index.freqstr[0] == "Q" else 12
            series = (series / series.shift(periods) - 1) * 100.0
        pool[name] = series

    return pool

### Data sources

In [6]:
measures: MeasureType = {
    "Estimated Resident Population": Selector(
        catalog_id="3101",
        table="1",
        orig_sa="Orig",
        search1="Percentage ERP Change Over Previous Year ;  Australia ;",
        search2="Percentage ERP Change Over Previous Year ;  Australia ;",
        calc_growth=False,
    ),
    "Civilian Population Aged 15+ years": Selector(
        catalog_id="6202",
        table="1",
        orig_sa="Orig",
        search1="Civilian population aged 15 years and over ;  Persons ;",
        search2="Civilian population aged 15 years and over ;  Persons ;",
        calc_growth=True,
    ),
}

## Plot

In [7]:
def plot_pop_growth():
    """Produce Australian population growth charts."""

    data = get_abs(measures)
    data["Estimated Resident Population"] = utility.qtly_to_monthly(
        data["Estimated Resident Population"]
    )
    df = pd.DataFrame(data)
    rfooter = "ABS"
    for _, measure in measures.items():
        rfooter += f" {measure.catalog_id} T{measure.table}"

    recent = pd.Period(pd.Timestamp("today") - pd.DateOffset(years=8), freq="M")

    plotting.line_plot(
        df,
        starts=[None, recent],
        title="Population Growth",
        ylabel="Per cent per year",
        lfooter="Australia. Original series. ",
        rfooter=rfooter,
        show=SHOW,
    )

In [8]:
plot_pop_growth()

Estimated Resident Population
Found URL for a ZIP file on ABS web page
Retrieving data from cache: ABS_CACHE/319496caf5686125e339585d019fe765--All20time20series20spreadsheets.zip
Extracting DataFrames from the zip-file ...
Civilian Population Aged 15+ years
Found URL for a ZIP file on ABS web page
Retrieving data from cache: ABS_CACHE/842830b693f9b921bbe3abd5bbd8f4d8--p6202_all_monthly_spreadsheets.zip
Extracting DataFrames from the zip-file ...


## Finished

In [9]:
# watermark
%load_ext watermark
%watermark -u -n -t -v -iv -w

Last updated: Fri Feb 16 2024 14:04:38

Python implementation: CPython
Python version       : 3.11.7
IPython version      : 8.21.0

pandas    : 2.2.0
matplotlib: 3.8.3

Watermark: 2.4.3



In [10]:
print("Finished")

Finished
