# COVID-19 in Australia (from covidlive.com.au)

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Python-set-up" data-toc-modified-id="Python-set-up-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Python set-up</a></span><ul class="toc-item"><li><span><a href="#other-useful-information" data-toc-modified-id="other-useful-information-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>other useful information</a></span></li><li><span><a href="#functions" data-toc-modified-id="functions-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>functions</a></span></li></ul></li><li><span><a href="#Total-Cases" data-toc-modified-id="Total-Cases-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Total Cases</a></span></li><li><span><a href="#Active-Cases" data-toc-modified-id="Active-Cases-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Active Cases</a></span></li><li><span><a href="#Source-of-infection" data-toc-modified-id="Source-of-infection-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Source of infection</a></span></li><li><span><a href="#Local-cases---speculative" data-toc-modified-id="Local-cases---speculative-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Local cases - speculative</a></span></li><li><span><a href="#Hospitalised" data-toc-modified-id="Hospitalised-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Hospitalised</a></span></li><li><span><a href="#Tests" data-toc-modified-id="Tests-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Tests</a></span></li><li><span><a href="#The-End" data-toc-modified-id="The-End-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>The End</a></span></li></ul></div>

## Python set-up

In [1]:
# imports
import sys
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.units as munits
import matplotlib.dates as mdates
import datetime

from pathlib import Path

from typing import Dict, List

#pandas
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

# scraping
from time import sleep
import requests
from bs4 import BeautifulSoup

# local imports
sys.path.append(r'../bin')
import plotstuff as ps

# plotting
plt.style.use('ggplot')
%matplotlib inline

# matplotlib stuff for date formatting xticklabels
converter = mdates.ConciseDateConverter()
munits.registry[np.datetime64] = converter
munits.registry[datetime.date] = converter
munits.registry[datetime.datetime] = converter

### other useful information

In [2]:
# save location
CHART_DIRECTORY = '../charts'
Path(CHART_DIRECTORY).mkdir(parents=True, exist_ok=True)
CHART_DIRECTORY += '/!AS-'

source = 'Source: https://covidlive.com.au/'

months = ['-', 'Jan', 'Feb', 'Mar', 'Apr',
               'May', 'Jun', 'Jul', 'Aug',
               'Sep', 'Oct', 'Nov', 'Dec',]

warning_count = 0

### functions

In [3]:
def get_dict_of_frames(url_stem: str, states: List[str])-> Dict[str, pd.DataFrame]:
    """Return a dictionary of scraped dataframes for each state"""
    
    rememberall = {}
    for state in states:
        url = url_stem + state
        html = requests.get(url).text
        soup = BeautifulSoup(html, 'lxml')
        #print(soup.prettify())
        soup = soup.find('div', {'id': 'content'})
        tables = soup.findAll('table')
        for table in tables:
            if table.findParent("table") is None:
                df = pd.read_html(table.prettify(), header=0, index_col=0,
                                 na_values=['', '-'])[0]
                df = df.dropna(axis='columns', how='all')
                df = df.dropna(axis='rows', how='all')
                
                # assume tables are in reverse date order - add years
                df = df[::-1]
                year = 2020
                base = 0
                index = 0
                new_year = df.index[df.index.str.contains('01 Jan')]
                series = pd.Series(df.index, index=df.index)
                while index < len(new_year):
                    y = df.index.get_loc(new_year[index])
                    series.iloc[base:y] = series.iloc[base:y] + ' ' + str(year)
                    year += 1
                    index += 1
                    base = y
                series.iloc[base:] = series.iloc[base:] + ' ' + str(year)
                df.index = pd.DatetimeIndex(series)

                df = df.sort_index(ascending=True)
                rememberall[state.upper()] = df
            
        sleep(1) # let's be nice - not overload the servers, etc.

    return rememberall

In [4]:
def get_most_recent(d: Dict[str, pd.DataFrame])-> Dict[str, pd.Series]:
    
    # construct a table of the latest data - can be different
    # in respect of diferent days if state updates not aligned
    ret_val = {}
    ret_date = {}
    for name in d:
        ret_val[name] = d[name].dropna().iloc[-1] # last non null
        ret_date[name] = d[name].dropna().index[-1]
    
    # warn if the data looks mid-update for latest day in frame
    check = list(ret_date.values())[0]
    if not all(value == check for value in ret_date.values()):
        print('Warning: states updated to different days')
        display(pd.DataFrame(pd.Series(ret_date)))
        # update the global warning counter
        global warning_count 
        warning_count += 1
        
    return ret_val

In [5]:
def get_national_col(d: Dict[str, pd.DataFrame], col: str)-> pd.DataFrame:
    ret = {}
    for name in d:
        if col not in d[name].columns: 
            continue
        ret[name] = d[name][col]
    ret_frame = pd.DataFrame(ret)
    
    # drop last row if missing values
    if ret_frame.iloc[-1].isna().any():
        ret_frame = ret_frame[:-1]
        
    return ret_frame

## Total Cases

In [6]:
states = ['nsw', 'vic', 'qld', 'wa', 'sa', 'tas', 'act', 'nt']
url_stem = 'https://covidlive.com.au/report/daily-cases/'
cases_all = get_dict_of_frames(url_stem, states)

## Active Cases

In [7]:
colors = ['#ef4444', '#faa31b', '#eee000', '#82c341',
          '#009f75', 'dodgerblue', '#394ba0', '#d54799']   # '#88c6ed', 

mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=colors)

In [8]:
states = ['nsw', 'vic', 'qld', 'wa', 'sa', 'tas', 'act', 'nt']
url_stem = 'https://covidlive.com.au/report/daily-active-cases/'
active_all = get_dict_of_frames(url_stem, states)
active_most_recent = get_most_recent(active_all)

In [9]:
active = get_national_col(active_all, 'ACTIVE')
print(active[-7:])
ax = active.plot.line(lw=2.5)
title = 'Active COVID19 Cases by Date - Australian States'
ps.finalise_plot(ax, title=title, xlabel=None, 
             chart_directory=CHART_DIRECTORY,
             ylabel='Number of Active Cases',
             rfooter=source)

              NSW   VIC   QLD    WA    SA  TAS  ACT    NT
DATE                                                     
2020-12-27  134.0  10.0  10.0  13.0   5.0  0.0  1.0  11.0
2020-12-28  139.0  11.0  13.0  15.0   5.0  0.0  1.0   6.0
2020-12-29  142.0   7.0  11.0  14.0   5.0  0.0  1.0   7.0
2020-12-30  160.0   8.0  11.0  12.0   8.0  0.0  1.0   4.0
2020-12-31  170.0  10.0  14.0  14.0   7.0  0.0  1.0   4.0
2021-01-01  173.0  18.0  13.0  16.0  11.0  0.0  1.0   4.0
2021-01-02  180.0  29.0  13.0  19.0  10.0  0.0  1.0  11.0


In [10]:
active = pd.DataFrame(active_most_recent).T
display(active)
active = active.astype(float).astype(int) # makesure everything an integer
title = 'Active COVID19 Cases - Australian States'
ps.plot_barh(active['ACTIVE'].sort_values(ascending=True), 
    title=title,
    xlabel='Currently active cases',
    save_as = f'{CHART_DIRECTORY}{title}.png',
    rfooter=source,
)

Unnamed: 0,ACTIVE,NET
NSW,180.0,7.0
VIC,29.0,11.0
QLD,13.0,0.0
WA,19.0,3.0
SA,10.0,-1.0
TAS,0.0,0.0
ACT,1.0,0.0
NT,11.0,7.0


## Source of infection

In [11]:
states = ['nsw', 'vic', 'qld', 'wa', 'sa', 'tas', 'act', 'nt']
url_stem = 'https://covidlive.com.au/report/daily-source-of-infection/'
infection_all = get_dict_of_frames(url_stem, states)
infection_most_recent = get_most_recent(infection_all)

In [12]:
columns={
    'CONTACT': 'Contact',
    'I/STATE': 'Interstate',
    'INVES': 'Investigating',
    'O/SEAS': 'Overseas',
    'UNKNOWN': 'Unknown',
}

In [13]:
soi = pd.DataFrame(infection_most_recent).T
soi = soi.rename(columns = columns)
soi = soi[['Overseas', 'Interstate', 'Contact', 
           'Unknown', 'Investigating']]

In [14]:
serious = ['dodgerblue', '#aaaaaa', '#555555', '#dd0000', 'darkorange',]
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=serious)
if soi_warn_count := soi.isna().sum().sum():
    print('Warning: there ara NaNs in the data. ')
    print('Note: Victoria does not report interstate')
    if soi_warn_count > 1: # ignore Vic interstate
        warning_count += 1 
    display(soi)        
    soi = soi.astype(float)
ax = soi[::-1].plot.barh(stacked=True)

title = 'Source of COVID19 Infection - Australian States'
ps.finalise_plot(ax, title=title, 
             chart_directory=CHART_DIRECTORY,
             xlabel='Cumulative cases', ylabel=None,
             rfooter=source)

Note: Victoria does not report interstate


Unnamed: 0,Overseas,Interstate,Contact,Unknown,Investigating
NSW,2807.0,90.0,1603.0,434.0,13.0
VIC,1010.0,,15601.0,3762.0,12.0
QLD,951.0,23.0,239.0,41.0,0.0
WA,768.0,7.0,78.0,13.0,3.0
SA,392.0,13.0,165.0,9.0,0.0
TAS,85.0,3.0,141.0,5.0,0.0
ACT,89.0,3.0,25.0,1.0,0.0
NT,76.0,4.0,2.0,0.0,0.0


In [15]:
investigate = get_national_col(infection_all, 'INVES')
investigate = investigate.astype(float)

In [16]:
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=colors)
ax = investigate.plot.line(lw=2.5)
title = 'Source of COVID19 Infection being Investigated by Date'

ps.finalise_plot(ax, title=title, 
             chart_directory=CHART_DIRECTORY,
             xlabel=None, ylabel='Number under investigation',
             rfooter=source)

In [17]:
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=colors)
recent = 42
MARGINS = (0.005, 0.03)

fig, ax = plt.subplots()
ax.xaxis_date()
ax.margins(*MARGINS)
for state in investigate.columns:
    ax.plot(investigate.index[-recent:].values, 
            investigate[state].iloc[-recent:], 
            label=state, lw=2.5)
ax.legend(loc='best')

title = 'Source of COVID19 Infection being Investigated by Date (Recent)'
ps.finalise_plot(ax, title=title, 
             chart_directory=CHART_DIRECTORY,
             xlabel=None, ylabel='Number under investigation',
             rfooter=source)

In [18]:
# Historic May, June, July 2020
if False: # switch
    
    YEAR = 2020
    MARGINS = (0.005, 0.03)
    mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=colors)
    for month in 5, 6, 7:
        history = investigate.loc[
                            (investigate.index.month == month) &
                            (investigate.index.year == YEAR)
                        ]
        fig, ax = plt.subplots()
        ax.xaxis_date()
        ax.margins(*MARGINS)
        for state in history.columns:
            ax.plot(history.index.values, history[state], label=state, lw=2.5)
        ax.legend(loc='best')

        title = ('Source of COVID19 Infection - Investigating Source by Date'
                 f' ({months[month]} {YEAR})')
        ps.finalise_plot(ax, title=title, 
                     chart_directory=CHART_DIRECTORY,
                     xlabel=None, ylabel='Number under investigation',
                     rfooter=source)

In [19]:
# Historic - NSW in July, August and September 2020
if False: # switch - turn on/off this code block
    
    state = 'NSW'
    YEAR = 2020
    monthset = [7, 8, 9]
    mtext = ', '.join(map(lambda x: months[x], monthset))
    history = investigate.loc[
                                (investigate.index.month.isin(monthset)) &
                                (investigate.index.year == YEAR) 
                             ][state]
    MARGINS = 0.01
    fig, ax = plt.subplots()
    ax.xaxis_date()
    ax.margins(MARGINS) # seems to work here
        
    ax.plot(history.index.values, history, color='#dd0000')
    title = (f'Source of COVID19 Infection under investigation in {state} '
             f' ({mtext} {YEAR})')

    ps.finalise_plot(ax, 
                     title=title, 
                     chart_directory=CHART_DIRECTORY,
                     xlabel=None,
                     ylabel='Cases under investigation',
                     rfooter=source)

In [20]:
national_soi = pd.DataFrame()
for col in columns:
    national_soi[col] = get_national_col(infection_all, col).sum(axis=1)
national_soi = national_soi.rename(columns=columns)

mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=serious)

days = pd.to_datetime(national_soi.index)
fig, ax = plt.subplots()
ax.stackplot(days, national_soi['Overseas'], national_soi['Interstate'], national_soi['Contact'], 
               national_soi['Unknown'], national_soi['Investigating'], 
                labels = ['Overseas', 'Interstate', 'Contact', 'Unknown', 'Investigating'])
ax.legend(loc='upper left')
ax.margins(0.01)

title = 'Source of COVID-19 Infection by Date - Australia'
ps.finalise_plot(ax, title=title, 
             chart_directory=CHART_DIRECTORY,
             xlabel=None, ylabel='Cumulative infections',
             rfooter=source)

## Local cases - speculative 

In [21]:
def recent(df, mode):
    RECENT = 42 # days
    MA1 = 7 # days
    MA2 = 14 # dats
    plt.style.use('ggplot')
    
    for col in df.columns:
        series = df[col]
        
        if series.iloc[-RECENT:].sum() <= 0: 
            continue
        ma1 = series.rolling(MA1).mean()
        ma2 = series.rolling(MA2).mean()
        
        MARGINS = 0.01
        fig, ax = plt.subplots()
        ax.xaxis_date()
        ax.margins(MARGINS) # seems to work here
        
        ax.bar(series.iloc[-RECENT:].index.values, series.iloc[-RECENT:], label=mode, color='#dd0000')
        ax.plot(ma1.iloc[-RECENT:].index.values, ma1.iloc[-RECENT:], label=f'{MA1} day ave.', color='darkorange')
        ax.plot(ma2.iloc[-RECENT:].index.values, ma2.iloc[-RECENT:], label=f'{MA2} day ave.', color='cornflowerblue')
        ax.legend(loc='best')

        title = f'New Locally Acquired COVID19 {mode.title()} in {col} - Recent'
        ps.finalise_plot(ax, 
                     title=title, 
                     chart_directory=CHART_DIRECTORY,
                     xlabel=None,
                     ylabel=f'Daily New {mode.title()}',
                     lfooter='local = all - overseas - interstate',
                     rfooter=source)

In [22]:
# get daily overall and non-local totals
all_cum = pd.DataFrame()
overseas_cum = pd.DataFrame()
for state, frame in infection_all.items():
    all_cum[state] = frame.sum(axis=1, skipna=True)
    overseas_cum[state] = frame['O/SEAS']
    if 'I/STATE' in frame.columns:
        overseas_cum[state] += frame['I/STATE']


In [23]:
# from previous cell, calculate local transmission, 
# and adjust for negatives
local_daily_raw = ((all_cum - overseas_cum)
                   .diff()
                   .fillna(0))
local_daily_cooked = pd.DataFrame()
for col in local_daily_raw.columns:
    fix = local_daily_raw[col].cumsum()
    fix = fix.where(fix < 0, other=0) * -1
    fixed = local_daily_raw[col] + fix
    local_daily_cooked[col] = (
        ps.negative_correct_daily(fixed))

There are negatives in NSW
DATE
2020-05-04   -3.0
2020-05-11   -1.0
2020-05-20   -2.0
2020-05-23   -1.0
2020-05-27   -3.0
2020-06-04   -1.0
2020-06-07   -1.0
2020-06-12   -4.0
2020-06-23   -2.0
2020-07-07   -3.0
2020-11-13   -2.0
2020-12-10   -1.0
Name: NSW, dtype: float64
There are negatives in VIC
DATE
2020-04-12    -7.0
2020-04-15   -46.0
2020-04-20    -3.0
2020-04-22    -3.0
2020-05-18    -7.0
2020-05-24    -2.0
2020-06-05    -1.0
2020-06-06    -1.0
2020-06-09    -1.0
2020-10-18    -2.0
2020-10-26    -2.0
2020-11-01    -1.0
2020-11-02    -1.0
2020-12-15    -1.0
Name: VIC, dtype: float64
There are negatives in QLD
DATE
2020-04-04   -9.0
2020-04-06   -4.0
2020-04-09   -2.0
2020-04-15   -7.0
2020-04-17   -1.0
2020-04-20   -8.0
2020-04-23   -1.0
2020-04-24   -6.0
2020-04-28   -3.0
2020-04-30   -2.0
2020-05-08   -1.0
2020-05-10   -2.0
2020-05-13   -4.0
2020-05-14   -1.0
2020-05-16   -2.0
2020-05-17   -1.0
2020-05-19   -3.0
2020-05-25   -1.0
2020-06-02   -1.0
2020-06-03   -1.0
2020-06-16

In [24]:
recent(local_daily_cooked, "cases")

In [25]:
# Activity over the past week
local_daily_cooked.iloc[-7:]

Unnamed: 0_level_0,NSW,VIC,QLD,WA,SA,TAS,ACT,NT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-27,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-12-28,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-12-29,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2020-12-30,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-12-31,10.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-01-01,3.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-01-02,7.0,7.0,0.0,3.0,0.0,0.0,0.0,0.0


In [26]:
# Compare the source of infection (SOI) totals  
# with total daily cases. These should be equal  
# for every day (where both have been collected). 
# Where they are not equal it indicates data 
# consistency problems between the different 
# collections. I wrote this comparison because
# I have found occasions where the SOI data 
# implies local transmission on days when the SOI
# data totals are inconsistent with the daily 
# case tallies. I occasionally get implied local 
# transmissions on days where states say there  
# have been no local transmissions. 

issues = {}
issues_last_week = {}
RECENT = 14
for state, frame in cases_all.items():
    infections = infection_all[state].sum(axis=1)
    cases = frame['CASES'].loc[infections.index]
    issues[state] = (cases != infections).sum()
    issues_last_week[state] = ((cases != infections)
                               [-RECENT:].sum())

report = pd.DataFrame([issues, issues_last_week], 
                      index=['Total inconsistencies', 
                             'Inconsistencies in past '
                             f'{RECENT} days'])

print('Data inconsistencies')
report

Data inconsistencies


Unnamed: 0,NSW,VIC,QLD,WA,SA,TAS,ACT,NT
Total inconsistencies,1,7,3,2,5,36,1,3
Inconsistencies in past 14 days,0,1,1,1,4,0,0,0


In [27]:
# Historic local daily in Vic in May, June and July 2020
# Also NSW in December 2020
if False: # switch - turn on/off this code block

    YEAR = 2020
    sets = [['VIC', [5, 6, 7]],
            ['NSW', [12]]]

    for s in sets:
        state, monthset = s
        for month in monthset:
            history = local_daily.loc[
                                      (local_daily.index.month == month) &
                                      (local_daily.index.year == YEAR) 
                                     ][state]

            MARGINS = 0.01
            fig, ax = plt.subplots()
            ax.xaxis_date()
            ax.margins(MARGINS) # seems to work here
        
            ax.bar(history.index.values, history, color='#dd0000')
            title = (f'New Locally Acquired COVID19 cases in {state}'
                     f' ({months[month]} {YEAR})')

            ps.finalise_plot(ax, 
                             title=title, 
                             chart_directory=CHART_DIRECTORY,
                             xlabel=None,
                             ylabel='Daily New Cases',
                             lfooter='local = all - overseas - interstate',
                             rfooter=source)

In [28]:
# NSW in July, August and September 2020
if False: # switch - turn on/off this code block
    
    state = 'NSW'
    YEAR = 2020
    monthset = [7, 8, 9]
    mtext = ', '.join(map(lambda x: months[x], monthset))
    history = local_daily.loc[
                                (local_daily.index.month.isin(monthset)) &
                                (local_daily.index.year == YEAR) 
                             ][state]
    MARGINS = 0.01
    fig, ax = plt.subplots()
    ax.xaxis_date()
    ax.margins(MARGINS) # seems to work here
        
    ax.bar(history.index.values, history, color='#dd0000')
    title = (f'New Locally Acquired COVID19 Cases in {state}'
             f' ({mtext} {YEAR})')

    ps.finalise_plot(ax, 
                     title=title, 
                     chart_directory=CHART_DIRECTORY,
                     xlabel=None,
                     ylabel='Daily New Cases',
                     lfooter='local = all - overseas - interstate',
                     rfooter=source)

## Hospitalised

In [29]:
states = ['nsw', 'vic', 'qld', 'wa', 'sa', 'tas', 'act', 'nt']
url_stem = 'https://covidlive.com.au/report/daily-hospitalised/'
hospital_all = get_dict_of_frames(url_stem, states)
hospital_most_recent = get_most_recent(hospital_all)

In [30]:
hosp = pd.DataFrame(hospital_most_recent).T
hosp = hosp.astype(float)
display(hosp) # Vic often has missing ventilator data

Unnamed: 0,HOSP,ICU,VENT
NSW,1.0,0.0,0.0
VIC,0.0,0.0,0.0
QLD,13.0,0.0,0.0
WA,0.0,0.0,0.0
SA,0.0,0.0,0.0
TAS,0.0,0.0,0.0
ACT,0.0,0.0,0.0
NT,4.0,0.0,0.0


In [31]:
title = 'Hospitalised COVID19 Cases - Australian States'
ps.plot_barh(hosp['HOSP'].astype(int).sort_values(ascending=True), 
    title=title,
    xlabel='Current hospitalised cases',
    save_as = f'{CHART_DIRECTORY}{title}.png',
    rfooter=source,
)

In [32]:
title = 'ICU COVID19 Cases - Australian States'
ps.plot_barh(hosp['ICU'].astype(int).sort_values(ascending=True), 
    title=title,
    xlabel='Current ICU cases',
    save_as = f'{CHART_DIRECTORY}{title}.png',
    rfooter=source,
)

In [33]:
hosp = get_national_col(hospital_all, 'HOSP')
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=colors)
ax = hosp.plot.line(lw=2.5)

title = 'Hospitalised COVID19 Cases by Date - Australian States'
ps.finalise_plot(ax, title=title, 
             chart_directory=CHART_DIRECTORY,
             xlabel=None, ylabel='Number in Hospital',
             rfooter=source)

## Tests

In [34]:
state_pop = {
    # source: https://www.abs.gov.au/ausstats/abs@.nsf/Latestproducts/3101.0Main%20Features3Sep%202019?opendocument&tabname=Summary&prodno=3101.0&issue=Sep%202019&num=&view=
    'NSW': 8_118_000,
    'VIC': 6_629_900,
    'QLD': 5_115_500,
    'SA':  1_756_500,
    'WA':  2_630_600,
    'TAS':   535_500,
    'ACT':   428_100,
    'NT':    245_600,
}
power = 6
factor = 10 ** power
state_pop = pd.Series(state_pop) / factor

In [35]:
states = ['nsw', 'vic', 'qld', 'wa', 'sa', 'tas', 'act', 'nt']
url_stem = 'https://covidlive.com.au/report/daily-tests/'
tests_all = get_dict_of_frames(url_stem, states)

In [36]:
tests = get_national_col(tests_all, 'TESTS')
tests_daily_u, tests_daily_c, tests_c = ps.dataframe_correction(tests)

There are negatives in VIC
DATE
2020-06-06   -12142.0
2020-08-08   -11984.0
Name: VIC, dtype: float64
Spikes in VIC
DATE     2020-09-03
spike  82309.000000
mean   16328.214286
zeros      0.000000
Spikes in QLD
DATE     2020-03-19  2020-06-22
spike  27064.000000     49207.0
mean     991.571429      4402.5
zeros      7.000000         0.0
Spikes in WA
DATE    2020-03-16
spike  5906.000000
mean    298.714286
zeros     7.000000
There are negatives in TAS
DATE
2020-07-07   -707.0
Name: TAS, dtype: float64
Spikes in TAS
DATE    2020-03-29   2020-05-30
spike  1736.313243  3197.702098
mean     74.093097   609.527144
zeros     9.000000     0.000000
Spikes in ACT
DATE    2020-03-29
spike  4307.000000
mean     74.785714
zeros     7.000000
There are negatives in NT
DATE
2020-07-15   -1022.0
Name: NT, dtype: float64
Spikes in NT
DATE    2020-07-14
spike  2439.429042
mean    303.768568
zeros     0.000000


In [37]:
# if last row all zeros, ignore it
if tests_daily_c.iloc[-1].sum() == 0:
    tests_daily_c.drop(tests_daily_c.tail(1).index,inplace=True)

# check if we have test data from every state
if (tests_daily_c.iloc[-1] == 0).any():
    # Note: WA only reports tests on business days
    print("Warning: it's possible that not all test data is in")
    display(tests_daily_c[-7:])
    warning_count += 1



Unnamed: 0_level_0,NSW,VIC,QLD,WA,SA,TAS,ACT,NT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-27,23933.0,5848.0,1616.0,0.0,3172.0,215.0,470.0,140.0
2020-12-28,15364.0,5880.0,3104.0,0.0,4357.0,234.0,367.0,120.0
2020-12-29,16329.0,6899.0,2413.0,7986.0,4370.0,466.0,479.0,370.0
2020-12-30,17267.0,8731.0,4918.0,2810.0,4929.0,597.0,592.0,469.0
2020-12-31,27894.0,13097.0,4796.0,2762.0,4386.0,596.0,689.0,499.0
2021-01-01,32010.0,13108.0,5195.0,0.0,4533.0,627.0,574.0,389.0
2021-01-02,31864.0,18337.0,3360.0,0.0,5967.0,0.0,538.0,0.0


In [38]:
# Make zeros in the tail into NANs
# we are assuming every state does at least one test every day
cumsum = tests_daily_c.cumsum()
mask = ~((cumsum.eq(cumsum.iloc[-1], axis=1)) &
         (cumsum == cumsum.shift(1)))
tests_daily_c = tests_daily_c.where(mask, other=np.nan)

In [39]:
# remember un_normed tests
un_normed_tests = tests_daily_c.copy()

In [40]:
RECENT = 42
for state in un_normed_tests:
    
    test_data = un_normed_tests[state].iloc[-RECENT:]
    
    fig, ax = plt.subplots()
    ax.xaxis_date()
    ax.margins(0.01, 0.03)
    ax.bar(test_data.index, test_data, color="#dd0000", width=0.8)
    title = f'Tests for COVID19 recently completed in {state}'
    ps.finalise_plot(ax, title=title, xlabel=None, 
                 chart_directory=CHART_DIRECTORY,
                 ylabel=f'Completed tests',
                 lfooter='Extreme outliers have been adjusted '
                         'before smoothing',
                 rfooter=source)    

In [41]:
ROLLING = 7
tests_daily_c = (tests_daily_c / state_pop).rolling(ROLLING).mean()
tests_daily_c = tests_daily_c[list(state_pop.keys())]

In [42]:
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=colors)
RECENT = -42
MARGINS = 0.005, 0.03
for commencing in [0, RECENT]: 
    fig, ax = plt.subplots()
    ax.xaxis_date()
    ax.margins(*MARGINS)
    for col in tests_daily_c.columns:
        ax.plot(tests_daily_c.iloc[commencing:][col].index,
                tests_daily_c.iloc[commencing:][col],
                label=col, lw=2.5)
    ax.legend(loc='best')
    title = ('Tests per capita for COVID19 '
             f'({ROLLING}-day rolling ave)')
    if commencing != 0:
        title = title + ' - recent'

    ps.finalise_plot(ax, title=title, xlabel=None, 
                 chart_directory=CHART_DIRECTORY,
                 ylabel= 'Completed tests\n'
                        f'per $10^{power}$ population',
                 lfooter='Extreme outliers have been adjusted '
                         'before smoothing',
                 rfooter=source)

In [43]:
# latest daily testing rates per million by state
tests_daily_c.iloc[-7:].round(1)

Unnamed: 0_level_0,NSW,VIC,QLD,SA,WA,TAS,ACT,NT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-27,5595.5,2016.5,1247.0,2445.4,897.0,1072.4,1884.1,1702.0
2020-12-28,5187.0,1846.8,1221.6,2608.8,358.9,992.4,1767.3,1633.3
2020-12-29,4691.8,1661.0,1058.2,2634.5,716.7,874.7,1579.7,1591.4
2020-12-30,4258.9,1354.5,827.9,2522.1,749.4,809.7,1240.4,1445.4
2020-12-31,3690.7,1222.4,670.1,2471.6,736.3,791.2,1191.6,1361.1
2021-01-01,3025.6,1259.9,669.9,2405.9,,958.5,1121.2,1587.4
2021-01-02,2897.6,1549.3,709.4,2579.3,,,1237.7,


In [44]:
# Victorian testing history
if False: # switch for this code block
    
    YEAR = 2020
    STATE = 'VIC'
    MARGINS = 0.005, 0.03

    for month in 5, 6, 7:
    
        history = tests_daily_c[
                                (tests_daily_c.index.year == YEAR) &
                                (tests_daily_c.index.month == month)
        ][STATE]
    
        fig, ax = plt.subplots()
        ax.xaxis_date()
        ax.margins(*MARGINS)
        ax.plot(history.index, history,
                label=STATE, lw=2.5, color='darkorange')
        ax.legend(loc='best')
        title = ('Tests for COVID19 in {STATE} '
                 f'({ROLLING} day rolling ave) '
                 f'({months[month]} {YEAR})')

        ps.finalise_plot(ax, title=title,
                     chart_directory=CHART_DIRECTORY,
                     xlabel=None, 
                     ylabel=f'Rate per $10^{power}$ population',
                     lfooter='Extreme outliers have been adjusted '
                             'before smoothing',
                     rfooter=source)

## The End

In [45]:
print('Finished')

if warning_count:
    print(f'Check {warning_count} '
          f'warning{"s" if warning_count > 1 else ""} above')

Finished
