# ABS Retail Turnover 8501

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Python-set-up" data-toc-modified-id="Python-set-up-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Python set-up</a></span></li><li><span><a href="#Python-environment" data-toc-modified-id="Python-environment-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Python environment</a></span></li><li><span><a href="#Get-data-from-ABS" data-toc-modified-id="Get-data-from-ABS-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Get data from ABS</a></span></li><li><span><a href="#Plot" data-toc-modified-id="Plot-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Plot</a></span><ul class="toc-item"><li><span><a href="#COVID-Index-comparisons" data-toc-modified-id="COVID-Index-comparisons-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>COVID Index comparisons</a></span></li></ul></li><li><span><a href="#Finished" data-toc-modified-id="Finished-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Finished</a></span></li></ul></div>

## Python set-up

In [1]:
# system imports
from pathlib import Path
import calendar

# analytic imports
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

# local imports
from python_env import python_env
from finalise_plot import finalise_plot
from abs_common import (get_fs_constants,
                        #get_plot_constants,
                        get_ABS_meta_and_data,
                        get_identifier)

# pandas display settings
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

# plotting stuff
plt.style.use('ggplot')

In [2]:
cat_num = '8501'
CACHE_DIR, CHART_DIR, source = get_fs_constants(cat_num)

## Python environment

In [3]:
python_env()

--------------------------------------------------
System:     Darwin
Release:    20.1.0
Machine:    x86_64
Processor:  i386
RAM:        64GB
--------------------------------------------------
Python:     3.8.3
Psutil:     5.7.0
Pandas:     1.0.5
Numpy:      1.18.5
Matplotlib: 3.2.2
--------------------------------------------------


## Get data from ABS

In [4]:
# do the data capture and extraction to a DataFrame
url_template = ('https://www.abs.gov.au/statistics/industry/'
                'retail-and-wholesale-trade/retail-trade-australia/'
                'MONTH-YEAR/All%20time%20series%20spreadsheets.zip')

meta, data = get_ABS_meta_and_data(url_template, CACHE_DIR)

File has been cached already
File for oct-2020 of size 3.7 MB


## Plot

### COVID Index comparisons

In [5]:
# used for all COVID indexes
start = pd.Timestamp('2019-01-01')
marked = pd.Timestamp('2020-02-01')
m_string = f'{calendar.month_abbr[marked.month]} {marked.year}'
index_base = 100
df = data[data.index >= start]
series_type = "Seasonally Adjusted"

In [6]:
# COVID/Victoria comparison chart
table = '3'

# calculate indexes
plot = 'Turnover ;  X ;  Total (Industry) ;'
national, _ = get_identifier(meta, 
                             plot.replace('X', 'Total (State)'),
                             series_type, table)
victoria, _ = get_identifier(meta, 
                             plot.replace('X', 'Victoria'), 
                             series_type, table)

frame = df[[victoria, national]].copy()
frame[national] = frame[national] - frame[victoria] # ROA
index_row = frame.loc[marked]
frame = frame.div(index_row) * index_base

# plot
ax = frame.plot(lw=2, color=['darkorange', 'blue'])
ax.legend(['Victoria', 'Rest of Australia'], loc='best')
finalise_plot(ax, 'Retail Trade Turnover Index', 
              f'Index ({m_string} = {index_base})', 
              'Vic-compared', CHART_DIR,
              rfooter=f'{source} {table}',
              lfooter=f'Calculated from {series_type.lower()} series')      

In [7]:
# COVID Industry comparison with index
table = '1'
plots = meta[meta['Table'] == table]['Data Item Description'].unique()
industries = []
names = []
for plot in plots:
    if '(Industry)' in plot:
        continue
    id, _ = get_identifier(meta, plot,
                           series_type, table)
    industries.append(id)
    name = plot.replace('Turnover ;  Total (State) ;  ', '')
    name = name.replace(' ;', '')
    names.append(name)

frame = df[industries].copy()
index_row = frame.loc[marked]
frame = frame.div(index_row) * index_base
ax = frame.plot(lw=2.5)
ax.legend(names, loc='best')
title = f"Retail Trade Turnover by Industry (Index)"
finalise_plot(ax, title, 
              f'Index ({m_string} = {index_base})', 
              '', CHART_DIR,
              rfooter=f'{source} {table}',
              lfooter=f'Calculated from {series_type.lower()} series')      

## Finished

In [8]:
print('Finished')

Finished
