# ASIC Corporate Insolvency Data

Make sure you save the latest monthly data in the cache dierctory and update cell 6, before running this notebook.

https://asic.gov.au/regulatory-resources/find-a-document/statistics/insolvency-statistics/insolvency-statistics-current/


## Python set-up

In [1]:
# system imports
from pathlib import Path
import textwrap

In [2]:
# analytic imports
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
# local imports
from plotting import line_plot, set_chart_dir, finalise_plot, state_abbr, state_colors
from decompose import decompose

Could not import auto_arima from pmdarima


In [4]:
# plotting set-up
TITLE = "Corporate Insolvencies"
SOURCE = "Source: ASIC"
LFOOTER = "The first time a company enters external administration or has a controller appointed"
plt.style.use("fivethirtyeight")
CHART_DIR = "./CHARTS/ASIC/"
Path(CHART_DIR).mkdir(parents=True, exist_ok=True)
set_chart_dir(CHART_DIR)
for filename in Path(CHART_DIR).glob("*.png"):
    filename.unlink()
SHOW = False

## State/National Data Capture

In [5]:
ASIC_CACHE_DIR = "./ASIC_CACHE/"
Path(ASIC_CACHE_DIR).mkdir(parents=True, exist_ok=True)

In [6]:
# YOU MUST DOWNLOAD THE LATEST DATA FROM THE ASIC WEBSITE
# and save it in the ASIC_CACHE_DIR directory
# Then amend the filename in the next line oif code ...
LATEST_DATA = (
    ASIC_CACHE_DIR
    + "asic-insolvency-statistics-series-1-and-series-2-published-18-april-2024.xlsx"
)

# see: https://asic.gov.au/regulatory-resources/find-a-document/statistics/insolvency-statistics/insolvency-statistics-current/


def fix_columns(df: pd.DataFrame) -> pd.DataFrame:
    """Standardise the column names in the ASIC data."""

    rename = {x: x.title() for x in df.columns}
    df = df.rename(columns=rename)

    renamer = {x: x.replace("\n", "") for x in df.columns}
    df = df.rename(columns=renamer)

    renamer = {x: x.replace("&", " and ") for x in df.columns}
    df = df.rename(columns=renamer)

    renamer = {x: x.replace("And", "and") for x in df.columns}
    df = df.rename(columns=renamer)

    renamer = {x: x.replace("  ", " ") for x in df.columns}
    df = df.rename(columns=renamer)

    return df


def get_asic_data(
    sheet: str = "1.3",
    pm: str = "Period(Month)",
    pcy: str = "Period(Calendar Year)",
):
    """Get the latest ASIC data."""

    asic_data = pd.read_excel(LATEST_DATA, sheet_name=sheet, skiprows=10)
    asic_data = fix_columns(asic_data)
    asic_data = asic_data.dropna(how="all", axis=1)  # drop nan columns
    asic_data = asic_data.loc[asic_data[pm].notna()]  # drop total rows
    # get monthly PeriodIndex
    asic_data.index = pd.PeriodIndex(
        asic_data[pcy].astype(int).astype(str) + "-" + asic_data[pm],
        freq="M",
    )
    asic_data = asic_data.rename(columns={"Total": "Australia"})

    return asic_data


current_data = get_asic_data()

In [7]:
current_data.tail()

Unnamed: 0,Period (Financial Year),Period(Calendar Year),Period(Quarter),Period(Month),New South Wales,Victoria,Queensland,South Australia,Western Australia,Tasmania,Northern Territory,Australian Capital Territory,Australia
2023-11,2023-2024,2023.0,December,Nov,353,218,206,31,58,5,3,17,891
2023-12,2023-2024,2023.0,December,Dec,357,205,132,35,41,4,7,14,795
2024-01,2023-2024,2024.0,March,Jan,182,176,110,32,43,1,2,9,555
2024-02,2023-2024,2024.0,March,Feb,399,231,195,40,86,7,2,8,968
2024-03,2023-2024,2024.0,March,Mar,476,300,202,54,62,13,2,22,1131


In [8]:
def get_asic_history() -> pd.DataFrame:
    """Get ASIC insolvency history data."""

    # get the monthly series-1 data
    asic_history_monthly = "1.2"
    asic_history = (
        ASIC_CACHE_DIR
        + "asic-insolvency-statistics-series-1-published-8-september-2022.xlsx"
    )
    asic_data = pd.read_excel(asic_history, sheet_name=asic_history_monthly, skiprows=4)
    asic_data = fix_columns(asic_data)

    # remove junk rows
    asic_data = asic_data.dropna(axis=0, how="all")
    asic_data = asic_data.iloc[1:-1]

    # fix the strange date format
    asic_data["Fin Year"] = asic_data.loc[asic_data["Australia"].isna(), "Period"]
    asic_data["Fin Year"] = asic_data["Fin Year"].ffill()
    asic_data = asic_data.dropna(thresh=8, axis=0)
    asic_data["Year"] = (
        asic_data["Fin Year"]
        .str.split("-")
        .str[0]
        .where(
            asic_data["Period"].isin(
                ("July", "August", "September", "October", "November", "December")
            ),
            other=asic_data["Fin Year"].str.split("-").str[1],
        )
    )
    asic_data.index = pd.PeriodIndex(
        asic_data["Year"] + "-" + asic_data["Period"], freq="M"
    )

    return asic_data


historic_data = get_asic_history()

## State/National plots

In [9]:
def combine_data[T: (pd.Series, pd.DataFrame)](history: T, current: T) -> T:
    """Combine historic and current data, which is either a Series or a DataFrame.
    Note: This typing definition requires Python 3.12"""
    combined_data = pd.concat([history, current], axis=0)
    combined_data = combined_data[~combined_data.index.duplicated(keep="last")]
    combined_data = combined_data.sort_index()
    return combined_data

In [10]:
def plot_trend():
    """Plot trend and seasonally adjusted series."""
    combined_series = combine_data(
        historic_data["Australia"], current_data["Australia"]
    )
    decomp = decompose(s=combined_series.iloc[:-1], model="multiplicative")

    line_plot(
        decomp[["Seasonally Adjusted", "Trend"]],
        starts=(None, "2019-01"),
        title=f"{TITLE}: Australia",
        width=(1, 2),
        ylabel="First-time Insolvencies/Month",
        rfooter=SOURCE,
        rheader="Locally calculated seasonal adjustment",
        lheader="Last month of (provisional) data excluded",
        lfooter=LFOOTER,
        show=SHOW,
    )


plot_trend()

In [11]:
def state_growth():
    """Calculate and plot state growth rates."""

    states = [
        "New South Wales",
        "Victoria",
        "Queensland",
        "South Australia",
        "Western Australia",
        "Tasmania",
        "Northern Territory",
        "Australian Capital Territory",
        "Australia",
    ]
    combined = combine_data(historic_data[states], current_data[states]).iloc[
        :-1
    ]  # ignore last row

    too_long = 58
    for col in combined:
        title = f"{TITLE}: {col} (Original series)"
        separator = "\n" if len(title) > too_long else " "
        title = title.replace(" (", f"{separator}(")
        line_plot(
            combined[col],
            title=title,
            ylabel="First-time Insolvencies/Month",
            width=1,
            rfooter=SOURCE,
            lheader="Last month of (provisional) data excluded",
            lfooter=LFOOTER,
            show=SHOW,
        )

    # calculate growth rates
    state_data = combined[states]
    latest = state_data.iloc[-12:].sum()
    base_start, base_end = 2015, 2019
    years = base_end - base_start + 1
    select = (state_data.index.year >= base_start) & (state_data.index.year <= base_end)
    base = state_data.loc[select].sum() / years
    growth = ((latest / base) - 1) * 100
    # print(f"{base=}\n{latest=}\n{growth=}")

    # plot growth rates
    _fig, ax = plt.subplots()
    growth.sort_values().plot(kind="barh", ax=ax)
    finalise_plot(
        ax,
        title=f"{TITLE} in the 12 months\nto {combined.index[-1]} by State over {base_start}-{base_end} Ave",
        xlabel="Growth (%)",
        rfooter=SOURCE,
        lheader="Last month of (provisional) data excluded",
        lfooter=LFOOTER,
        show=SHOW,
    )


state_growth()

## By Industry Sector

In [12]:
industrial = [
    "Accommodation and Food Services",
    "Administrative and Support Services",
    "Agriculture, Forestry and Fishing",
    "Arts and Recreation Services",
    "Construction",
    "Education and Training",
    "Electricity, Gas, Water and Waste Services",
    "Financial and Insurance Services",
    "Health Care and Social Assistance",
    "Information Media and Telecommunications",
    "Manufacturing",
    "Mining",
    "Other Services",
    "Professional, Scientific and Technical Services",
    "Public Administration and Safety",
    "Rental, Hiring and Real Estate Services",
    "Retail Trade",
    "Transport, Postal and Warehousing",
    "Wholesale Trade",
    "Unknown",
    "Australia",
]

In [13]:
def amalgumate_fis_columns(data: pd.DataFrame) -> pd.DataFrame:
    """Amalhumate the Financial and Insurance Services columns."""

    fis = [
        "Financial and Insurance Services",
        "Fis–Credit Provider",
        "Fis–Deposit Taking Institutions",
        "Fis–Insurance",
        "Fis–Managed Investments",
        "Fis–Other Financial Services",
        "Fis–Superannuation",
    ]
    fis_rows = data[fis].sum(axis=1, skipna=True)
    data = data.drop(columns=fis)
    data[fis[0]] = fis_rows
    return data


In [14]:
def is_history() -> pd.DataFrame:
    """Get ASIC insolvency history data by industry."""

    file = (
        ASIC_CACHE_DIR
        + "asic-insolvency-statistics-series-1a-published-8-september-2022.xlsx"
    )
    asic_data = pd.read_excel(io=file, sheet_name="1A.1.2", skiprows=5).dropna(
        thresh=8, axis=0
    )
    asic_data = fix_columns(asic_data)
    asic_data = asic_data[asic_data["Period and Region"].str.contains("Total for")]

    asic_data = amalgumate_fis_columns(asic_data)
    cols = list(asic_data.columns)
    to_move = cols.pop(-1)
    cols.insert(9, to_move)
    asic_data = asic_data[cols]

    # rename columns to match with current data
    asic_data = asic_data.rename(
        columns={"Other (Business and Personal) Services": "Other Services"}
    )
    asic_data = asic_data.rename(
        columns={
            "Information Media and Tele- Communications": "Information Media and Telecommunications"
        }
    )
    asic_data = asic_data.rename(columns={"Total": "Australia"})

    # get monthly PeriodIndex
    asic_data["Period and Region"] = asic_data["Period and Region"].str.replace(
        "Total for ", ""
    )
    index = [
        pd.Period(x, freq="M")
        for x in
        # Note: some data quality issues being addressed here
        asic_data["Period and Region"]
        .str.replace("NOVMEBER", "NOVEMBER", case=False)  # data quality fix
        .str.title()
        .str.split(" ")[::-1]
        .str.join("-")
        .str.replace("--", "-")[::-1]  # data quality fix
    ]
    asic_data.index = index
    return asic_data


is_hist = is_history()[industrial]

In [15]:
def get_combined_sectoral_data() -> pd.DataFrame:
    """Get combined sectoral data."""
    is_current_data = get_asic_data(
        sheet="1.2", pm="Period (Month)", pcy="Period (Calendar Year)"
    )[industrial]
    is_current_data = is_current_data.iloc[:-1]  # drop last provisional value

    combined = combine_data(is_hist, is_current_data)
    return combined

In [16]:
def plot_industrial():
    """Plot industry-sector data."""

    combined = get_combined_sectoral_data()
    for col in industrial:
        title = "\n".join(textwrap.wrap(f"{TITLE}: {col} (Original series)", 50))
        line_plot(
            combined[col],
            title=title,
            ylabel="First-time Insolvencies/Month",
            width=1,
            rfooter=SOURCE,
            lheader="Last month of (provisional) data excluded",
            lfooter=LFOOTER,
            show=SHOW,
        )

    # calculate growth rates
    latest = combined.iloc[-12:].sum()
    base_start, base_end = 2015, 2019
    years = base_end - base_start + 1
    select = (combined.index.year >= base_start) & (combined.index.year <= base_end)
    base = combined.loc[select].sum() / years
    growth = ((latest / base) - 1) * 100
    cutoff = 150  # per year
    growth = growth[base > cutoff]
    growth = growth.drop("Australia")

    # plot growth rates
    _fig, ax = plt.subplots()
    ax = growth.sort_values().plot(kind="barh", ax=ax)
    ax.tick_params(axis="both", which="major", labelsize="x-small")
    finalise_plot(
        ax,
        title=f"{TITLE} in the 12 months\nto {combined.index[-1]} by Sector over {base_start}-{str(base_end)[2:]} Ave",
        xlabel="Growth (%)",
        rfooter=SOURCE,
        rheader=f"Industry sectors with more than {cutoff}/year insolvencies in the {base_start}-{base_end} period.",
        lheader="Last month of (provisional) data excluded",
        lfooter=LFOOTER,
        show=SHOW,
    )


plot_industrial()

## Sectors of interest

In [17]:
sectors_of_interest = [
    "Accommodation and Food Services",
    "Construction",
    "Rental, Hiring and Real Estate Services",
    "Financial and Insurance Services",
    "Manufacturing",
    "Retail Trade",
    "Other Services",
    "Total",
]

In [18]:
def grab_data() -> tuple[pd.DataFrame, pd.DataFrame]:
    """Grab the data for the sectors of interest."""

    # get historical data
    historic_table = "1A.1.2"
    file = (
        ASIC_CACHE_DIR
        + "asic-insolvency-statistics-series-1a-published-8-september-2022.xlsx"
    )
    hist_data = (
        pd.read_excel(io=file, sheet_name=historic_table, skiprows=5)
    )
    hist_data = fix_columns(hist_data)
    hist_data = amalgumate_fis_columns(hist_data)
    hist_data = hist_data[hist_data["Period and Region"].notna()]
    hist_data = hist_data.loc[~hist_data["Period and Region"].str.contains("MONTHLY")]
    hist_data = hist_data.loc[~hist_data["Period and Region"].str.contains("Total for")]
    hist_data = hist_data.loc[~hist_data["Period and Region"].str.contains("Australian Securities & Investments Commission")]
    pattern = r"\d{4}-\d{4}"
    hist_data = hist_data.loc[~hist_data["Period and Region"].str.contains(pattern)]
    pattern = r"[A-Za-z]*\s\d{4}"
    hist_data.loc[hist_data["Period and Region"].str.contains(pattern), "month"] = hist_data["Period and Region"]
    hist_data.month = hist_data.month.ffill()
    hist_data.month = hist_data.month.str.split(" ").str[::-1].str.join("-")
    hist_data.month = pd.PeriodIndex(hist_data.month, freq="M")
    hist_data = hist_data.rename(columns={'Period and Region': 'State'})
    hist_data.State = hist_data.State.replace(state_abbr)
    hist_data.index = pd.MultiIndex.from_tuples(zip(hist_data.month, hist_data.State))
    hist_data = hist_data.drop(columns=["State", "month"])
    hist_data.dropna(thresh=8, inplace=True)

    # get current data
    current_table = "1.4.2"
    curr_data = (
        pd.read_excel(io=LATEST_DATA, sheet_name=current_table, skiprows=10)
    )
    curr_data = fix_columns(curr_data)
    curr_data = curr_data.dropna(how="all", axis=1)
    curr_data = curr_data.loc[curr_data["Principal Place Of Business (State Or Territory)"].notna()]
    curr_data["month"] = pd.PeriodIndex(curr_data["Period (Calendar Year)"].astype(int).astype(str) + '-' + curr_data["Period (Month)"], freq="M")
    kill = curr_data.columns[:4]
    curr_data = curr_data.drop(columns=kill)
    curr_data = curr_data.rename(columns={'Principal Place Of Business (State Or Territory)': 'State'})
    curr_data.State = curr_data.State.replace(state_abbr)
    curr_data.index = pd.MultiIndex.from_tuples(zip(curr_data.month, curr_data.State))
    curr_data = curr_data.drop(columns=["State", "month"])
    curr_data.tail()

    return hist_data, curr_data


history, current = grab_data()



In [19]:
# some recent, across-period, sector-totals
current.unstack().sum().unstack().sum(axis=1)

Accommodation and Food Services                     3026.0
Administrative and Support Services                 1018.0
Agriculture, Forestry and Fishing                    253.0
Arts and Recreation Services                         349.0
Construction                                        5639.0
Education and Training                               171.0
Electricity, Gas, Water and Waste Services           277.0
Financial and Insurance Services                     687.0
Health Care and Social Assistance                    421.0
Information Media and Telecommunications             513.0
Manufacturing                                       1040.0
Mining                                               319.0
Other Services                                      2097.0
Professional, Scientific and Technical Services     1121.0
Public Administration and Safety                     118.0
Rental, Hiring and Real Estate Services              789.0
Retail Trade                                        1395

In [20]:
for sector in sectors_of_interest:
    ax = current[sector].unstack().plot.bar(stacked=True, color=state_colors)
    title = "\n".join(textwrap.wrap(f"{sector}: Monthly Insolvencies by State", 50))
    finalise_plot(
        ax,
        title=title,
        ylabel="First-time Insolvencies",
        legend={"loc": "upper left", "fontsize": "x-small", "ncols": 2},
        rfooter=SOURCE,
        lfooter=LFOOTER,
        show=SHOW,
    )


## Finished

In [21]:
# watermark
%load_ext watermark
%watermark -u -n -t -v -iv -w

Last updated: Thu Apr 18 2024 19:40:28

Python implementation: CPython
Python version       : 3.12.3
IPython version      : 8.22.2

pandas    : 2.2.2
matplotlib: 3.8.4

Watermark: 2.4.3



In [22]:
print("done.")

done.
