In [11]:
import pandas as pd
from datetime import date, timedelta
import matplotlib.pyplot as plt
import mpld3

In [12]:
file_path = "https://www.dhhs.vic.gov.au/ncov-covid-cases-by-age-group-csv"
df = pd.read_csv(file_path)

In [13]:
df.tail()

Unnamed: 0,diagnosis_date,agegroup
724642,2022-03-24,20-29
724643,2022-03-24,30-39
724644,2022-03-24,70-79
724645,2022-03-24,10-19_
724646,2022-03-24,40-49


In [14]:
df_pivot = pd.pivot_table(df, index = 'diagnosis_date', columns = 'agegroup', 
                         values = 'agegroup', aggfunc = len, fill_value = 0)

In [15]:
df_pivot.tail()

agegroup,0-9,10-19_,20-29,30-39,40-49,50-59,60-69,70-79,80-89,90+
diagnosis_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-03-20,223,354,339,370,322,227,152,90,34,6
2022-03-21,258,377,395,473,416,299,201,118,47,18
2022-03-22,343,563,657,685,548,419,273,159,65,18
2022-03-23,274,449,682,656,550,396,280,217,77,25
2022-03-24,254,392,597,567,495,395,263,166,59,23


In [16]:
# check that the latest date's sum matches the number of cases reported by Vic govt
# the latest row's date is always yesterday

today = date.today()
yesterday = today - timedelta(days = 1)

# the dates in the pivot table are strings
# so use isoformat() to convert date into string before looking it up in the table
df_pivot.loc[yesterday.isoformat()].sum()

3211

In [17]:
# open my csv containing 0-19 case numbers collected so far

df_0_19 = pd.read_csv("../data/cases_0_19.csv", index_col = 0)
df_0_19.tail()

Unnamed: 0,date,cases_0_9,cases_10_19
105,2022-03-20,225,333
106,2022-03-21,220,346
107,2022-03-22,241,354
108,2022-03-23,313,498
109,2022-03-24,256,410


In [18]:
# get the latest date from pivot table, add it to my 0-19 csv, then save my csv
# the latest date is always yesterday, because today's cases were diagnosed yesterday

df_0_19 = df_0_19.append({'date': today.isoformat(), 'cases_0_9': df_pivot.loc[yesterday.isoformat(), '0-9'], 
                          'cases_10_19': df_pivot.loc[yesterday.isoformat(), '10-19_']}, ignore_index = True)

  df_0_19 = df_0_19.append({'date': today.isoformat(), 'cases_0_9': df_pivot.loc[yesterday.isoformat(), '0-9'],


In [19]:
df_0_19.tail()

Unnamed: 0,date,cases_0_9,cases_10_19
106,2022-03-21,220,346
107,2022-03-22,241,354
108,2022-03-23,313,498
109,2022-03-24,256,410
110,2022-03-25,254,392


In [20]:
df_0_19.to_csv("../data/cases_0_19.csv")