# ABS Retail Turnover 8501

## Python set-up

In [1]:
# system imports
import sys
import pathlib
import calendar

# analytic imports
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

# local imports
from plotting import (
    finalise_plot,
    set_chart_dir,
    recalibrate_series,
    clear_chart_dir,
    plot_covid_recovery,
)    
from abs_data_capture import (
    get_fs_constants,
    get_ABS_meta_and_data,
    get_plot_constants,
    find_id,
    clear_cache,
)

# pandas display settings
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

# plotting stuff
plt.style.use('fivethirtyeight')

## Get data from the ABS

In [2]:
# get ABS retail data
retail_catalogue_id = '8501'
source, CHART_DIR, META_DATA = get_fs_constants(retail_catalogue_id)
retail_dict = get_ABS_meta_and_data(retail_catalogue_id)
if retail_dict is None:
    sys.exit(-1)
retail_meta = retail_dict[META_DATA]

# get ABS inflation data
cpi_catalogue_id = '6401'
cpi_dict = get_ABS_meta_and_data(cpi_catalogue_id)
if cpi_dict is None:
    sys.exit(-1)
cpi_meta = cpi_dict[META_DATA]


Found URL for a ZIP file on ABS web page
Retrieving zip-file from cache ...
Extracting DataFrames from the zip-file ...
Found URL for a ZIP file on ABS web page
Retrieving zip-file from cache ...
Extracting DataFrames from the zip-file ...


## Plot - retail turnover by industry

### Plotting initialisation / constants

In [3]:
# clear the old charts
clear_chart_dir(CHART_DIR)
set_chart_dir(CHART_DIR)

# --- Globals ---- Note: this use of globals
#                  is a bit ugly and should 
#                  be refactored

# multi-line the title if might be too long.
LONG_LABEL = 30 # characters

table_col = 'Table'
did_col = 'Data Item Description'
type_col = 'Series Type'

FROM_DATE = pd.Period('2017-01-01', freq='M')
LIN_REGRESS = pd.Period('2020-01-01', freq='M')

retail_type = 'Seasonally Adjusted'

### Data extraction and plotting functions

In [4]:
def get_retail_series(retail_label: str,
    ) -> tuple[pd.Series, str]:
    
    id_selector = {
        retail_table: table_col,
        retail_label: did_col,
        retail_type: type_col,
    }
    retail_series_id, retail_units = find_id(retail_meta, id_selector)
    retail_series, retail_units = (
        recalibrate_series(retail_df[retail_series_id], retail_units)
    )
    retail_series.index = pd.PeriodIndex(retail_series.index, freq='M')
    retail_series = retail_series[retail_series.index >= FROM_DATE]

    return retail_series, retail_units

In [5]:
def plot_retail_series(retail_series: pd.Series, 
                       retail_units:str, 
                       title: str
    ) -> None:
    
    ax = retail_series.plot(lw=2, c='#0000dd')

    lfooter = f'{retail_type}. Current prices. '
    finalise_plot(
        ax, 
        title=title, 
        ylabel=f'{retail_units}',
        rfooter=f'{source} {retail_table}',
        lfooter=lfooter,
        #show=True,
    )    

In [6]:
def get_cpi_series(cpi_label: str,
                   retail_series:pd.Series,
    ) -> pd.Series:
    
    id_selector = {
        cpi_table: table_col,
        cpi_label: did_col,
        'Index Numbers': did_col,
    }
    cpi_series_id, cpi_units = find_id(cpi_meta, id_selector, )
    cpi_series = cpi_df[cpi_series_id] / 100 # rebase index to 1
    cpi_series = cpi_series / cpi_series.iloc[-1] # rebase to latest period
    cpi_series.index = pd.PeriodIndex(cpi_series.index, freq='Q')
    cpi_series = cpi_series.resample('M').interpolate(method='linear', pad=None)
    
    # TO DO: augment CPI monthly series when not aligned with Retail Traide

    cpi_series = cpi_series[cpi_series.index >= FROM_DATE]
    
    return cpi_series

### Main plotting loop

In [7]:
# --- frame --- this is a bit ugly, as some of these names
#               are accessed by the above functions
#               from the global environment. Which is
#               why we have them in the same cell as the 
#               plotting loop

industry_map = {
    # retail turnover: cpi index,
    'Food retailing': 'Food and non-alcoholic beverages',
    'Household goods retailing': 'Furnishings, household equipment and services',
    'Clothing, footwear and personal accessory retailing': 'Clothing and footwear',
    'Department stores': 'All groups CPI', 
    'Other retailing': 'All groups CPI',
    'Cafes, restaurants and takeaway food services': 'All groups CPI',
    'Total (Industry)': 'All groups CPI',
}

retail_table = '1'
retail_df = retail_dict[retail_table]

cpi_table = '4'
cpi_df = cpi_dict[cpi_table]

# --- plotting loop ---
for retail_label, cpi_label in industry_map.items():
    
    retail_series, retail_units = get_retail_series(retail_label)

    separator = '\n' if len(retail_label) > LONG_LABEL else ' '
    title = f'Retail Turnover:{separator}{retail_label}'
    plot_retail_series(retail_series, retail_units, title)
    
    cpi_series = get_cpi_series(cpi_label, retail_series)
    adj_retail_series = retail_series / cpi_series
    adj_retail_series.name = 'Retail turnover (CPI Adj)'
    lfooter =  f'Seas Adj. CPI price adjust: {cpi_label}. '
    plot_covid_recovery(
        adj_retail_series, # series
        title=title, # title
        ylabel=f'{retail_units} CPI Adjusted',
        tag='-CPI-adjusted-covid',
        rfooter=f'{source} {retail_table} {cpi_catalogue_id} {cpi_table}',
        lfooter=lfooter,
        #show=True,
    )    

## Plot - retail turnover by state

In [8]:
# --- Note: --- Quite a bit of code duplication in this cell 
#               from the previous cell. It should be refactored 
#               at some point.

# -- initialisation
state_map = {
    'New South Wales': 'Sydney',
    'Victoria': 'Melbourne',
    'Queensland': 'Brisbane',
    'South Australia': 'Adelaide',
    'Western Australia': 'Perth',
    'Tasmania': 'Hobart',
    'Northern Territory': 'Darwin',
    'Australian Capital Territory': 'Canberra',
}

retail_table = '3'
retail_df = retail_dict[retail_table]

cpi_table = '2'
cpi_df = cpi_dict[cpi_table]


# -- plotting loop
for state_label, city_label in state_map.items():
    
    state_series, state_units = get_retail_series(state_label)
    
    separator = '\n' if len(state_label) > LONG_LABEL else ' '
    title = f'Retail Turnover:{separator}{state_label}'
    plot_retail_series(state_series, state_units, title)
    
    # Index Numbers ;  All groups CPI ;  Sydney ;
    cpi_label = f'All groups CPI ;  {city_label}'
    cpi_series = get_cpi_series(cpi_label, state_series)

    adj_state_series = state_series / cpi_series    
    adj_state_series.name = 'Retail turnover'

    lfooter =  f'Seas Adj. CPI price adjust: {city_label}. '
    plot_covid_recovery(
        adj_state_series,
        title=title, 
        ylabel=f'{state_units} CPI Adjusted',
        tag='-CPI-adjusted-covid',
        rfooter=f'{source} {retail_table} {cpi_catalogue_id} {cpi_table}',
        lfooter=lfooter,
        #show=True,
    )     

## Finished

In [9]:
# watermark
%load_ext watermark
%watermark -u -n -t -v -iv -w

Last updated: Wed Feb 22 2023 07:34:02

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.10.0

statsmodels: 0.13.5
pandas     : 1.5.3
matplotlib : 3.7.0
sys        : 3.11.0 | packaged by conda-forge | (main, Jan 15 2023, 05:44:48) [Clang 14.0.6 ]
numpy      : 1.24.2

Watermark: 2.3.1



In [10]:
print('Finished')

Finished
