# ASIC Corporate Insolvency Data

1. Make sure you save the latest monthly data in the cache dierctory and update cell 6, before running this notebook.

2. Because the last couple of months are provisional, you may want to exclude one or both - see EXCLUDE_LATEST in cell 4 below.

## Python set-up

In [1]:
# system imports
from pathlib import Path
from typing import TypeVar
import textwrap

In [2]:
# analytic imports
import matplotlib.pyplot as plt
import pandas as pd
from mgplot import (
    multi_start,
    line_plot_finalise,
    set_chart_dir,
    clear_chart_dir,
    finalise_plot,
    get_color,
    state_names,
    abbreviate_state,
    bar_plot_finalise
)

In [3]:
# local imports
from decompose import decompose

Could not import auto_arima from pmdarima


## Plotting set-up

In [4]:
# ======= ======= ======= =======
EXCLUDE_LATEST = 1  # months of provisional data to exclude from the latest plot
# ======= ======= ======= =======

# plotting set-up
TITLE = "Corporate Insolvencies"
SOURCE = "ASIC"
LFOOTER = (
    "Australia. The first time a company enters external"
    " administration or has a controller appointed. "
)

CHART_DIR = "./CHARTS/ASIC/"
set_chart_dir(CHART_DIR)
clear_chart_dir()

SHOW = False
FILE_TYPE = "png"

## Data Capture Set-up

In [5]:
ASIC_CACHE_DIR = "./ASIC_CACHE/"
Path(ASIC_CACHE_DIR).mkdir(parents=True, exist_ok=True)

In [6]:
# YOU MUST DOWNLOAD THE LATEST DATA FROM THE ASIC WEBSITE
# and save it in the ASIC_CACHE_DIR directory
# Then amend the filename in the next lines of code ...

LATEST_DATA_FILE = (
    ASIC_CACHE_DIR
    + "asic-insolvency-statistics-series-1-and-series-2-published-16-june-2025.xlsx"
)

HISTORICAL_DATA_FILE_1 = (
    ASIC_CACHE_DIR
    + "asic-insolvency-statistics-series-1-published-8-september-2022.xlsx"
)

HISTORICAL_DATA_FILE_1A = (
    ASIC_CACHE_DIR
    + "asic-insolvency-statistics-series-1a-published-8-september-2022.xlsx"
)
# see: https://asic.gov.au/regulatory-resources/find-a-document/statistics/insolvency-statistics/insolvency-statistics-current/

## State and National Insolvency data

In [7]:
def fix_columns(df: pd.DataFrame) -> pd.DataFrame:
    """Standardise the column names in the ASIC data."""

    rename = {x: x.title() for x in df.columns}
    df = df.rename(columns=rename)

    renamer = {x: x.replace("\n", "") for x in df.columns}
    df = df.rename(columns=renamer)

    renamer = {x: x.replace("&", " and ") for x in df.columns}
    df = df.rename(columns=renamer)

    renamer = {x: x.replace("And", "and") for x in df.columns}
    df = df.rename(columns=renamer)

    renamer = {x: x.replace("  ", " ") for x in df.columns}
    df = df.rename(columns=renamer)

    return df


def get_asic_data(
    sheet: str = "1.3",
    pm: str = "Period(Month)",
    pcy: str = "Period(Calendar Year)",
):
    """Get the latest ASIC data."""

    asic_data = pd.read_excel(LATEST_DATA_FILE, sheet_name=sheet, skiprows=10)
    asic_data = fix_columns(asic_data)
    asic_data = asic_data.dropna(how="all", axis=1)  # drop nan columns
    asic_data = asic_data.loc[asic_data[pm].notna()]  # drop total rows
    # get monthly PeriodIndex
    asic_data.index = pd.PeriodIndex(
        asic_data[pcy].astype(int).astype(str) + "-" + asic_data[pm],
        freq="M",
    )
    asic_data = asic_data.rename(columns={"Total": "Australia"})

    return asic_data


current_data = get_asic_data()

In [8]:
def get_asic_history() -> pd.DataFrame:
    """Get ASIC insolvency history data."""

    # get the monthly series-1 data
    asic_history_monthly = "1.2"
    asic_data = pd.read_excel(
        HISTORICAL_DATA_FILE_1, sheet_name=asic_history_monthly, skiprows=4
    )
    asic_data = fix_columns(asic_data)

    # remove junk rows
    asic_data = asic_data.dropna(axis=0, how="all")
    asic_data = asic_data.iloc[1:-1]

    # fix the strange date format
    asic_data["Fin Year"] = asic_data.loc[asic_data["Australia"].isna(), "Period"]
    asic_data["Fin Year"] = asic_data["Fin Year"].ffill()
    asic_data = asic_data.dropna(thresh=8, axis=0)
    asic_data["Year"] = (
        asic_data["Fin Year"]
        .str.split("-")
        .str[0]
        .where(
            asic_data["Period"].isin(
                ("July", "August", "September", "October", "November", "December")
            ),
            other=asic_data["Fin Year"].str.split("-").str[1],
        )
    )
    asic_data.index = pd.PeriodIndex(
        asic_data["Year"] + "-" + asic_data["Period"], freq="M"
    )

    return asic_data


historic_data = get_asic_history()

In [9]:
Datatype = TypeVar("Datatype", pd.Series, pd.DataFrame)


def combine_data(history: Datatype, current: Datatype) -> Datatype:
    """Combine historic and current data, which is either a Series or a DataFrame."""

    combined_data = pd.concat([history, current], axis=0)
    combined_data = combined_data[~combined_data.index.duplicated(keep="last")]
    combined_data = combined_data.sort_index()
    return combined_data

In [10]:
def plot_trend():
    """Plot trend and seasonally adjusted series."""
    combined_series = combine_data(
        historic_data["Australia"], current_data["Australia"]
    )
    combined_series = (
        combined_series
        if not EXCLUDE_LATEST
        else combined_series.iloc[:-EXCLUDE_LATEST]
    )

    decomp = decompose(s=combined_series, model="multiplicative")
    when = decomp.index[-1].strftime("%b-%Y")

    multi_start(
        decomp[
            [
                "Trend",
                "Seasonally Adjusted",
            ]
        ],
        function=line_plot_finalise,
        starts=(0, pd.Period("2019-01", freq="M")),
        title=f"{TITLE}: Australia",
        ylabel="First-time Insolvencies/Month",
        width=(2.5, 1),
        rfooter=f"Calculated using data from {SOURCE}",
        lheader="Last month of (provisional) data excluded" if EXCLUDE_LATEST else None,
        lfooter=LFOOTER + f"Data to: {when}",
        show=SHOW,
        annotate=[True, False],
        file_type=FILE_TYPE,
    )


plot_trend()

In [11]:
def state_growth():
    """Calculate and plot state growth rates."""

    states = list(state_names) + ["Australia",]
    combined = combine_data(historic_data[states], current_data[states])
    combined = combined if not EXCLUDE_LATEST else combined.iloc[:-EXCLUDE_LATEST]
    when = combined.index[-1].strftime("%b-%Y")


    too_long = 58
    for col in combined:
        title = f"{TITLE}: {col} (Original series)"
        separator = "\n" if len(title) > too_long else " "
        title = title.replace(" (", f"{separator}(")
        line_plot_finalise(
            combined[col],
            title=title,
            ylabel="First-time Insolvencies/Month",
            width=1,
            rfooter=SOURCE,
            lheader=(
                "Last month of (provisional) data excluded" if EXCLUDE_LATEST else None
            ),
            lfooter=LFOOTER + f"Data to: {when}",
            tag="states",
            show=SHOW,
            annotate=True,
            file_type=FILE_TYPE,
        )

    # calculate growth rates
    state_data = combined[states]
    when = state_data.index[-1].strftime("%b-%Y")
    latest = state_data.iloc[-12:].sum()
    base_start, base_end = 2015, 2019
    years = base_end - base_start + 1
    select = (state_data.index.year >= base_start) & (state_data.index.year <= base_end)
    base = state_data.loc[select].sum() / years
    growth = ((latest / base) - 1) * 100
    # print(f"{base=}\n{latest=}\n{growth=}")

    # plot growth rates
    _fig, ax = plt.subplots()
    growth.sort_values().plot(kind="barh", ax=ax)
    finalise_plot(
        ax,
        title=f"{TITLE} in the 12 months\nto {when} by State over {base_start}-{base_end} Ave",
        xlabel="Growth (%)",
        rfooter=SOURCE,
        lheader="Last month of (provisional) data excluded" if EXCLUDE_LATEST else None,
        lfooter=LFOOTER,
        show=SHOW,
        file_type=FILE_TYPE,
    )


state_growth()

## By Industry Sector

In [12]:
industrial = [
    "Accommodation and Food Services",
    "Administrative and Support Services",
    "Agriculture, Forestry and Fishing",
    "Arts and Recreation Services",
    "Construction",
    "Education and Training",
    "Electricity, Gas, Water and Waste Services",
    "Financial and Insurance Services",
    "Health Care and Social Assistance",
    "Information Media and Telecommunications",
    "Manufacturing",
    "Mining",
    "Other Services",
    "Professional, Scientific and Technical Services",
    "Public Administration and Safety",
    "Rental, Hiring and Real Estate Services",
    "Retail Trade",
    "Transport, Postal and Warehousing",
    "Wholesale Trade",
    "Unknown",
    "Australia",
]

In [13]:
def historical_name_changes(data: pd.DataFrame) -> pd.DataFrame:
    """Column name changes for historical data.
    Purpose: to ensure historical sector names match current data."""

    # Amalgamate the Financial and Insurance Services columns
    fis = [
        "Financial and Insurance Services",
        "Fis–Credit Provider",
        "Fis–Deposit Taking Institutions",
        "Fis–Insurance",
        "Fis–Managed Investments",
        "Fis–Other Financial Services",
        "Fis–Superannuation",
    ]
    fis_rows = data[fis].sum(axis=1, skipna=True)
    data = data.drop(columns=fis)
    data[fis[0]] = fis_rows
    cols = list(data.columns)
    to_move = cols.pop(-1)
    cols.insert(9, to_move)
    data = data[cols]

    # rename columns to match with current data
    data = data.rename(
        columns={"Other (Business and Personal) Services": "Other Services"}
    )
    data = data.rename(
        columns={
            "Information Media and Tele- Communications": "Information Media and Telecommunications"
        }
    )
    data = data.rename(columns={"Total": "Australia"})

    return data

In [14]:
def grab_state_sector_data() -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """Grab the data for the sectors of interest."""

    # get historical data
    historic_table = "1A.1.2"
    hist_data = pd.read_excel(
        io=HISTORICAL_DATA_FILE_1A, sheet_name=historic_table, skiprows=5
    )
    hist_data = fix_columns(hist_data)
    hist_data = historical_name_changes(hist_data)
    hist_data = hist_data[hist_data["Period and Region"].notna()]
    hist_data = hist_data.loc[~hist_data["Period and Region"].str.contains("MONTHLY")]
    hist_data = hist_data.loc[~hist_data["Period and Region"].str.contains("Total for")]
    hist_data = hist_data.loc[
        ~hist_data["Period and Region"].str.contains(
            "Australian Securities & Investments Commission"
        )
    ]
    pattern = r"\d{4}-\d{4}"
    hist_data = hist_data.loc[~hist_data["Period and Region"].str.contains(pattern)]
    pattern = r"[A-Za-z]*\s\d{4}"
    hist_data.loc[hist_data["Period and Region"].str.contains(pattern), "month"] = (
        hist_data["Period and Region"]
    )
    hist_data.month = hist_data.month.ffill()
    hist_data.month = hist_data.month.str.split(" ").str[::-1].str.join("-")
    hist_data.month = pd.PeriodIndex(hist_data.month, freq="M")
    hist_data = hist_data.rename(columns={"Period and Region": "State"})
    state_map = {x: abbreviate_state(x) for x in state_names}
    hist_data.State = hist_data.State.replace(state_map)
    hist_data.index = pd.MultiIndex.from_tuples(zip(hist_data.month, hist_data.State))
    hist_data = hist_data.drop(columns=["State", "month"])
    hist_data.dropna(thresh=8, inplace=True)

    # get current data
    current_table = "1.4.2"
    curr_data = pd.read_excel(
        io=LATEST_DATA_FILE, sheet_name=current_table, skiprows=10
    )
    curr_data = fix_columns(curr_data)
    curr_data = curr_data.dropna(how="all", axis=1)
    curr_data = curr_data.loc[
        curr_data["Principal Place Of Business (State Or Territory)"].notna()
    ]
    curr_data["month"] = pd.PeriodIndex(
        curr_data["Period (Calendar Year)"].astype(int).astype(str)
        + "-"
        + curr_data["Period (Month)"],
        freq="M",
    )
    kill = curr_data.columns[:4]
    curr_data = curr_data.drop(columns=kill)
    curr_data = curr_data.rename(
        columns={"Principal Place Of Business (State Or Territory)": "State"}
    )
    curr_data = curr_data.rename(columns={"Total": "Australia"})
    curr_data.State = curr_data.State.replace(state_map)
    curr_data.index = pd.MultiIndex.from_tuples(zip(curr_data.month, curr_data.State))
    curr_data = curr_data.drop(columns=["State", "month"])
    curr_data.tail()

    # amalgamate historical and current data
    combined_data = pd.concat([hist_data, curr_data], axis=0)
    combined_data = combined_data[~combined_data.index.duplicated(keep="last")]
    combined_data = combined_data.sort_index()

    return hist_data, curr_data, combined_data

In [15]:
def plot_industrial():
    """Plot industry-sector data."""

    _, _, combined = grab_state_sector_data()
    consolidataed = pd.DataFrame()
    for sector in industrial:
        series = combined[sector].unstack().sum(axis=1)
        series = series if not EXCLUDE_LATEST else series.iloc[:-EXCLUDE_LATEST]
        when = series.index[-1].strftime("%b-%Y")
        consolidataed[sector] = series
        title = "\n".join(textwrap.wrap(f"{TITLE}: {sector} (Original series)", 60))
        line_plot_finalise(
            series,
            title=title,
            ylabel="First-time Insolvencies/Month",
            width=1,
            rfooter=SOURCE,
            lheader=(
                "Last month of (provisional) data excluded" if EXCLUDE_LATEST else None
            ),
            lfooter=f"{LFOOTER}Data to: {when}",
            show=SHOW,
            annotate=True,
            file_type=FILE_TYPE,
        )

    # calculate growth rates
    latest = consolidataed.iloc[-12:].sum()
    when = consolidataed.index[-1].strftime("%b-%Y")
    base_start, base_end = 2015, 2019
    years = base_end - base_start + 1
    select = (consolidataed.index.year >= base_start) & (
        consolidataed.index.year <= base_end
    )
    base = consolidataed.loc[select].sum() / years
    growth = ((latest / base) - 1) * 100
    cutoff = 150  # per year
    growth = growth[base > cutoff]
    growth = growth.drop("Australia")

    # plot growth rates
    _fig, ax = plt.subplots()
    ax = growth.sort_values().plot(kind="barh", ax=ax)
    ax.tick_params(axis="both", which="major", labelsize="x-small")
    finalise_plot(
        ax,
        title=f"{TITLE} in the 12 months\nto {when} by Sector over {base_start}-{str(base_end)[2:]} Ave",
        xlabel="Growth (%)",
        rfooter=SOURCE,
        rheader=f"Industry sectors with more than {cutoff}/year insolvencies in the {base_start}-{base_end} period.",
        lheader="Last month of (provisional) data excluded" if EXCLUDE_LATEST else None,
        lfooter=LFOOTER,
        show=SHOW,
        file_type=FILE_TYPE,
    )


plot_industrial()

## Insolvencies by State in Sectors of interest

In [16]:
sectors_of_interest = [
    "Accommodation and Food Services",
    "Construction",
    "Rental, Hiring and Real Estate Services",
    "Financial and Insurance Services",
    "Manufacturing",
    "Retail Trade",
    "Other Services",
    "Australia",
]

In [17]:
def selected_sectors():
    """Plot selected sectors."""

    _history, current, _combined = grab_state_sector_data()
    starting = 2019
    for sector in sectors_of_interest:
        df = current[sector].unstack()
        df = df[df.index.year >= starting]
        df = df if not EXCLUDE_LATEST else df.iloc[:-EXCLUDE_LATEST]
        when = df.index[-1].strftime("%b-%Y")
        state_colors = [get_color(x) for x in df.columns]
        title = "\n".join(textwrap.wrap(f"Monthly Insolvencies by State: {sector}", 60))
        bar_plot_finalise(
            df,
            stacked=True,
            color=state_colors,
            title=title,
            ylabel="First-time Insolvencies",
            legend={"loc": "upper left", "fontsize": "x-small", "ncols": 2},
            rfooter=SOURCE,
            lfooter=f"{LFOOTER}Data to: {when}",
            show=SHOW,
            file_type=FILE_TYPE,
        )


selected_sectors()

## Finished

In [18]:
%load_ext watermark
%watermark -u -t -d --iversions --watermark --machine --python --conda

Last updated: 2025-06-22 12:30:38

Python implementation: CPython
Python version       : 3.13.5
IPython version      : 9.3.0

conda environment: n/a

Compiler    : Clang 20.1.4 
OS          : Darwin
Release     : 24.5.0
Machine     : arm64
Processor   : arm
CPU cores   : 14
Architecture: 64bit

matplotlib: 3.10.3
pathlib   : 1.0.1
typing    : 3.10.0.0
mgplot    : 0.2.5a1
pandas    : 2.3.0

Watermark: 2.5.0

