In [18]:
import pandas as pd

csv_url = "https://health-infobase.canada.ca/src/data/covidLive/covid19.csv"
df = pd.read_csv(csv_url)


In [19]:
#parse government data so it is usable
df['dates'] = pd.to_datetime(df['date'], dayfirst = True)
df['YMD'] = df['dates'].dt.date
df = df.drop(['date', 'dates', 'prnameFR', 'pruid'], axis = 1)
#if provinces don't report active cases, assume they have 0, so the data will plot
df['numactive'].fillna(0.0, inplace = True)

#List of unique provinces, Canada, ordered alphabetically with Canada first
provnames = df.prname.unique()
provnames = list(provnames)
provnames.pop()
provnames.sort()
myorder = [2, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
provnames = [provnames[i] for i in myorder]
dropnames = ['Canada', 'Nunavut', 'Yukon', 'Northwest Territories']
select_provs = [x for x in provnames if x not in dropnames]

#function to make province-wise dataset
def make_dataset(pnames):
    
    by_prov = pd.DataFrame()

    # Iterate through all the provinces
    for i in pnames:

        # Subset to the province
        sub = df[df['prname'] == i]

        # Add to the overall dataframe
        by_prov = by_prov.append(sub)
    
    return by_prov

#must pivot datasets for linear plot data so prov names are columns
def format_dataset(df, value):
    df_pivot = pd.pivot_table(df,
    columns = 'prname',
    index = 'YMD',
    values = value)
    
    df_pivot = df_pivot.rename_axis(None)

    return df_pivot

In [16]:
two_week_active = format_dataset(make_dataset(select_provs), 'numtotal_last14')
two_week_active
#print(df.columns)

#pd.to_datetime

Index(['prname', 'update', 'numconf', 'numprob', 'numdeaths', 'numtotal',
       'numtested', 'numtests', 'numrecover', 'percentrecover', 'ratetested',
       'ratetests', 'numtoday', 'percentoday', 'ratetotal', 'ratedeaths',
       'numdeathstoday', 'percentdeath', 'numtestedtoday', 'numteststoday',
       'numrecoveredtoday', 'percentactive', 'numactive', 'rateactive',
       'numtotal_last14', 'ratetotal_last14', 'numdeaths_last14',
       'ratedeaths_last14', 'numtotal_last7', 'ratetotal_last7',
       'numdeaths_last7', 'ratedeaths_last7', 'avgtotal_last7',
       'avgincidence_last7', 'avgdeaths_last7', 'avgratedeaths_last7', 'YMD'],
      dtype='object')


In [14]:
subset = two_week_active[two_week_active.index == pd.to_datetime('2021-03-12')]
subset

prname,Alberta,British Columbia,Manitoba,New Brunswick,Newfoundland and Labrador,Nova Scotia,Ontario,Prince Edward Island,Quebec,Saskatchewan
2021-03-12,4774.0,7527.0,886.0,37.0,36.0,31.0,16322.0,22.0,9984.0,2025.0


In [15]:
subset.to_csv('data/12-MAR_subset.csv', encoding = 'utf-8')

In [45]:
dataset = format_dataset(make_dataset(select_provs), 'numtoday')
index_list = pd.date_range(start = '2021-01-29', end = '2021-02-19', closed = 'right')
month_period = dataset.loc[index_list, :]
print(month_period)
month_total = month_period.sum(axis = 0)
print(month_total)
month_total.to_csv('data/29JAN_19FEB_totals.csv', encoding = 'utf8')

prname      Alberta  British Columbia  Manitoba  New Brunswick  \
2021-01-30    383.0             408.0     166.0           12.0   
2021-01-31    461.0             473.0     118.0           26.0   
2021-02-01    355.0             277.0      87.0            8.0   
2021-02-02    268.0             429.0      82.0           24.0   
2021-02-03    259.0             414.0     125.0           14.0   
2021-02-04    582.0             465.0     110.0           16.0   
2021-02-05    396.0             471.0     110.0            7.0   
2021-02-06    348.0             428.0      80.0           12.0   
2021-02-07    351.0             465.0      79.0            7.0   
2021-02-08    269.0             343.0      52.0            2.0   
2021-02-09    195.0             435.0      71.0           15.0   
2021-02-10    339.0             469.0      57.0           14.0   
2021-02-11    351.0             449.0      90.0            2.0   
2021-02-12    314.0             445.0      81.0            5.0   
2021-02-13