# Ontario COVID-19 data visualizations of confirmed cases
> "Maps and graphs of confirmed COVID-19 cases in Ontario, Canada"

- author: <a href=https://github.com/anomal/>Sophiah Ho</a> 
- categories: [ontario, canada, covid-19, province, hotspot, toronto, waterloo, ottawa, peel, york region, halton, testing]
- image: images/ontario_confirmed_map.png
- permalink: /ontario-confirmed-cases-per-region/
- toc: true

In [1]:
#hide
import urllib, json, os, time
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import dateutil.parser
import re
from IPython.display import HTML
from datetime import datetime, timedelta, timezone

url = 'https://data.ontario.ca/dataset/f4112442-bdc8-45d2-be3c-12efae72fb27/resource/455fd63b-603d-4608-8216-7d8647f43350/download/conposcovidloc.csv'  
cachefilename = "data_files/ontario-cases-data.cache.csv"
try:
    origdf = pd.read_csv(url)
    origdf.to_csv(cachefilename, index=False)
    refreshtime = datetime.now()
    cached=False
except:
    cached=True
    origdf = pd.read_csv(cachefilename)

df = origdf[(origdf["Accurate_Episode_Date"] < "2100") & (origdf["Accurate_Episode_Date"] > "2020")]
df

Unnamed: 0,Row_ID,Accurate_Episode_Date,Case_Reported_Date,Test_Reported_Date,Specimen_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Outbreak_Related,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude
0,1,2020-01-22,2020-01-23,2020-01-27,2020-01-25,50s,FEMALE,Travel,Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
1,2,2020-01-21,2020-01-23,2020-01-27,2020-01-23,50s,MALE,Travel,Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
2,3,2020-01-24,2020-01-24,2020-02-03,2020-01-24,20s,FEMALE,Travel,Resolved,,Middlesex-London Health Unit,50 King Street,London,N6A 5L7,www.healthunit.com,42.981468,-81.254016
3,4,2020-02-05,2020-02-21,2020-02-24,2020-02-22,20s,FEMALE,Travel,Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
4,5,2020-02-16,2020-02-25,2020-02-25,2020-02-25,60s,FEMALE,Travel,Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34649,34650,2020-05-07,2020-05-11,2020-05-11,2020-05-11,50s,MALE,No Epi-link,Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
34650,34651,2020-05-23,2020-05-25,2020-05-25,2020-05-23,40s,MALE,CC,Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
34651,34652,2020-04-16,2020-04-24,2020-04-24,2020-04-22,60s,MALE,OB,Resolved,Yes,Niagara Region Public Health Department,1815 Sir Isaac Brock Way,Thorold,L2V 4T7,www.niagararegion.ca/health,43.116537,-79.241220
34652,34653,2020-05-05,2020-05-09,2020-05-09,2020-05-06,40s,MALE,No Info-Unk,Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358


In [2]:
#hide
df \
.groupby(["Accurate_Episode_Date"]) \
.count()

Unnamed: 0_level_0,Row_ID,Case_Reported_Date,Test_Reported_Date,Specimen_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Outbreak_Related,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude
Accurate_Episode_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-01-01,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2
2020-01-10,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1
2020-01-21,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1
2020-01-22,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1
2020-01-24,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-23,119,119,119,119,119,119,119,119,18,119,119,119,119,119,119,119
2020-06-24,123,123,122,122,123,123,123,123,20,123,123,123,123,123,123,123
2020-06-25,69,69,68,68,69,69,69,69,4,69,69,69,69,69,69,69
2020-06-26,16,16,13,13,16,16,16,16,0,16,16,16,16,16,16,16


In [3]:
#hide

mindate = dateutil.parser.parse(df["Accurate_Episode_Date"].min()).date()
maxdate = dateutil.parser.parse(df["Accurate_Episode_Date"].max()).date()

In [4]:
#hide_input

if cached:
    errormsg = 'An error occurred retrieving data from <a href="https://data.ontario.ca/en/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/455fd63b-603d-4608-8216-7d8647f43350" title="Ontario data">Confirmed positive cases of COVID19 in Ontario</a>. Cached data is used. '
else:
    errormsg = ''

HTML(f"{errormsg}The graphs are from either Ontario status of confirmed cases data, where the date used is the client's symptom onset date, or Ontario testing status data, where the date used is the reported date (rep. date). Data based on Symptom Onset Date is lagging and can be updated retroactively. Most of the graphs are interactive. You can toggle off figures by clicking on the item in the legend, or select an area on the graph to zoom in.")

In [5]:
#hide
activedf = df[df["Outcome1"] == "Not Resolved"]

activedatetimes = activedf.apply(lambda row: dateutil.parser.parse(row["Accurate_Episode_Date"]), axis=1)

activedf = activedf.assign(Datetime=activedatetimes.values)

activedf

Unnamed: 0,Row_ID,Accurate_Episode_Date,Case_Reported_Date,Test_Reported_Date,Specimen_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Outbreak_Related,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Datetime
1225,1226,2020-03-24,2020-03-26,2020-03-26,2020-03-25,60s,MALE,Travel,Not Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358,2020-03-24
1321,1322,2020-03-26,2020-03-27,2020-03-26,2020-03-26,60s,MALE,No Info-Missing,Not Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358,2020-03-26
1415,1416,2020-03-24,2020-03-27,2020-03-25,2020-03-24,60s,FEMALE,No Info-Missing,Not Resolved,,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358,2020-03-24
1468,1469,2020-03-11,2020-03-27,2020-04-01,2020-03-20,40s,MALE,No Epi-link,Not Resolved,,Halton Region Health Department,1151 Bronte Road,Oakville,L6M 3Ll,www.halton.ca/For-Residents/Public-Health/,43.413997,-79.744796,2020-03-11
1558,1559,2020-03-29,2020-03-30,2020-03-30,2020-03-29,90s,FEMALE,OB,Not Resolved,Yes,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893,2020-03-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34537,34538,2020-06-16,2020-06-19,2020-06-19,2020-06-16,60s,MALE,No Info-Missing,Not Resolved,,Windsor-Essex County Health Unit,1005 Ouellette Avenue,Windsor,N9A 4J8,www.wechu.org,42.308796,-83.033670,2020-06-16
34541,34542,2020-06-20,2020-06-21,2020-06-21,2020-06-20,<20,FEMALE,OB,Not Resolved,Yes,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358,2020-06-20
34597,34598,2020-06-18,2020-06-19,2020-06-19,2020-06-18,20s,MALE,CC,Not Resolved,,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893,2020-06-18
34634,34635,2020-06-24,2020-06-26,2020-06-26,2020-06-24,20s,FEMALE,No Info-Missing,Not Resolved,,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893,2020-06-24


In [6]:
#hide
agactivedf = activedf \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivedf["OnsetWithin"] = "Any date"

agactivedf

Unnamed: 0,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Cases,OnsetWithin
0,Brant County Health Unit,43.151811,-80.274374,1,Any date
1,Chatham-Kent Health Unit,42.403861,-82.208561,4,Any date
2,Durham Region Health Department,43.898605,-78.940341,36,Any date
3,Eastern Ontario Health Unit,45.029152,-74.736298,7,Any date
4,Grey Bruce Health Unit,44.576196,-80.94098,9,Any date
5,Haldimand-Norfolk Health Unit,42.847825,-80.303815,13,Any date
6,"Haliburton, Kawartha, Pine Ridge District Heal...",43.968173,-78.285792,10,Any date
7,Halton Region Health Department,43.413997,-79.744796,39,Any date
8,Hamilton Public Health Services,43.257631,-79.871341,51,Any date
9,Huron Perth District Health Unit,43.368662,-81.001913,1,Any date


In [7]:
#hide
now = datetime.now()
agactivelast14df = activedf[activedf["Datetime"] >= now - timedelta(days=14)] \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivelast14df["OnsetWithin"] = "14 days"

agactivelast14df

Unnamed: 0,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Cases,OnsetWithin
0,Chatham-Kent Health Unit,42.403861,-82.208561,4,14 days
1,Durham Region Health Department,43.898605,-78.940341,29,14 days
2,Eastern Ontario Health Unit,45.029152,-74.736298,4,14 days
3,Grey Bruce Health Unit,44.576196,-80.94098,9,14 days
4,Haldimand-Norfolk Health Unit,42.847825,-80.303815,7,14 days
5,"Haliburton, Kawartha, Pine Ridge District Heal...",43.968173,-78.285792,10,14 days
6,Halton Region Health Department,43.413997,-79.744796,29,14 days
7,Hamilton Public Health Services,43.257631,-79.871341,40,14 days
8,"Kingston, Frontenac and Lennox & Addington Pub...",44.227874,-76.525211,18,14 days
9,Lambton Public Health,42.986416,-82.404808,3,14 days


In [8]:
#hide
agactivelast7df = activedf[activedf["Datetime"] >= now - timedelta(days=7)] \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivelast7df["OnsetWithin"] = "7 days"

agactivelast7df

Unnamed: 0,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Cases,OnsetWithin
0,Chatham-Kent Health Unit,42.403861,-82.208561,1,7 days
1,Durham Region Health Department,43.898605,-78.940341,7,7 days
2,Grey Bruce Health Unit,44.576196,-80.94098,4,7 days
3,"Haliburton, Kawartha, Pine Ridge District Heal...",43.968173,-78.285792,2,7 days
4,Halton Region Health Department,43.413997,-79.744796,13,7 days
5,Hamilton Public Health Services,43.257631,-79.871341,14,7 days
6,"Kingston, Frontenac and Lennox & Addington Pub...",44.227874,-76.525211,14,7 days
7,Middlesex-London Health Unit,42.981468,-81.254016,9,7 days
8,Niagara Region Public Health Department,43.116537,-79.24122,5,7 days
9,North Bay Parry Sound District Health Unit,46.313207,-79.467841,1,7 days


In [9]:
#hide
agactivelast3df = activedf[activedf["Datetime"] >= now - timedelta(days=3)] \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivelast3df["OnsetWithin"] = "3 days"

agactivelast3df

Unnamed: 0,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Cases,OnsetWithin
0,Peel Public Health,43.647471,-79.708893,7,3 days
1,Toronto Public Health,43.656591,-79.379358,6,3 days
2,Wellington-Dufferin-Guelph Public Health,43.524881,-80.233743,2,3 days
3,Windsor-Essex County Health Unit,42.308796,-83.03367,7,3 days
4,York Region Public Health Services,44.048023,-79.480239,1,3 days


In [10]:
#hide
appendagactivedf = agactivedf.append(agactivelast14df).append(agactivelast7df).append(agactivelast3df)
appendagactivedf

Unnamed: 0,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Cases,OnsetWithin
0,Brant County Health Unit,43.151811,-80.274374,1,Any date
1,Chatham-Kent Health Unit,42.403861,-82.208561,4,Any date
2,Durham Region Health Department,43.898605,-78.940341,36,Any date
3,Eastern Ontario Health Unit,45.029152,-74.736298,7,Any date
4,Grey Bruce Health Unit,44.576196,-80.940980,9,Any date
...,...,...,...,...,...
0,Peel Public Health,43.647471,-79.708893,7,3 days
1,Toronto Public Health,43.656591,-79.379358,6,3 days
2,Wellington-Dufferin-Guelph Public Health,43.524881,-80.233743,2,3 days
3,Windsor-Essex County Health Unit,42.308796,-83.033670,7,3 days


## Hotspots

In [76]:
#hide_input
activemap = px.scatter_mapbox(appendagactivedf, lat="Reporting_PHU_Latitude", lon="Reporting_PHU_Longitude",  
            color="OnsetWithin", 
            color_discrete_sequence=["maroon", "darkorange", "yellow", "white"], 
            size="Cases", hover_name="Reporting_PHU", 
            size_max=28, zoom=5.4, 
            center=dict(lat=45,lon=-79.4), 
            height=800, 
            labels={"OnsetWithin" : "Onset w/in last"}, 
            title="Currently Active (Not Resolved) Confirmed Cases per Public Health Unit" )
activemap.update_layout(mapbox_style="open-street-map")
if not cached:
    activemap.show()

In [12]:
#hide
import re

traveldf = df[df["Case_AcquisitionInfo"] == "Travel"] \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Travelled_Count"}) 

pendingdf = df[(df["Case_AcquisitionInfo"] == "No Info-Missing") | (df["Case_AcquisitionInfo"] == "No Info-Unk")] \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Pending_Count"}) 

countdf = df \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Count"}) 
countdf

joindf = pd.merge(countdf, traveldf, how="left", \
left_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)

joindf = pd.merge(joindf, pendingdf, how="left", \
left_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)
                  
joindf["Datetime"] = joindf.apply(lambda row: dateutil.parser.parse(row["Accurate_Episode_Date"]), axis=1)

phudf = countdf.groupby(by=["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"], as_index=False) \
    .first()[["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"]]
phudf["key"] = 0

dates = pd.date_range(mindate, maxdate).tolist()
datetimedf = pd.DataFrame(dates, columns=["Datetime"])
datetimedf["key"] = 0

cartesianproddf = pd.merge(datetimedf, phudf, on="key")
cartesianproddf = cartesianproddf.drop(columns=['key'])

joindf = pd.merge(cartesianproddf, joindf, how="left",
left_on=["Datetime", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Datetime", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)

joindf["Date"] = joindf.apply(lambda row: str(row["Datetime"].date()), axis=1)

aggdf = joindf[["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", \
                "Count", "Travelled_Count", "Pending_Count"]].sort_values(by=["Date","Reporting_PHU"]).reset_index(drop=True)

cumsumdf = joindf \
.groupby(["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]).sum() \
.groupby(level=1).cumsum() \
.reset_index() \
.rename(columns={"Count" : "Total", "Travelled_Count" : "Total Travelled", \
                 "Pending_Count" : "Total Information Pending"}) 

aggdf = pd.merge(aggdf, cumsumdf, how="left", \
left_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) 

incdf = aggdf \
.groupby(["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]).sum() \
.groupby(level=1).pct_change().mul(100) \
.reset_index() \
.rename(columns={"Total" : "Percent Increase"}) \
[["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", "Percent Increase"]]

aggdf = pd.merge(aggdf, incdf, how="left", \
left_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) 

def cumulativeNotTravelled(total, totalTravelled, totalPending):
    return total - totalTravelled - totalPending

def cumulativePercentTravelled(total, totalTravelled, totalPending):
    denominator = total - totalPending
    if denominator != 0:
        return 100 * totalTravelled / denominator
    else:
        return 0

def getRegion(phu_name):
    region = re.sub("(, )?(((&|and) )?District|Health|Public).+$", "", phu_name).replace(" and ", " & ")
    if region.startswith("Hastings & Prince Edward Counties"):
        return "Hastings & P.E. Counties"
    elif region.startswith("North Bay Parry Sound District"):
        return "North Bay Parry Sound"
    elif region.startswith("Kingston, Frontenac"):
        return "Kingston, Front./Len., Add."
    elif region.startswith("Haliburton, Kawartha"):
        return "Haliburton, Kawar., Pine R."
    else:    
        return region
    
def getArea(phu_name):
    if phu_name == "Toronto Public Health" or phu_name == "York Region Public Health Services" \
        or phu_name == "Peel Public Health" or phu_name == "Durham Region Health Department" \
        or phu_name == "Halton Region Health Department" :
            return "GTA"
    elif phu_name == "Ottawa Public Health":
        return "Ottawa"
    elif phu_name == "Windsor-Essex County Health Unit":
        return "Windsor-Essex"
    else:
        return "Other"

aggdf["Total Not Travelled"] = aggdf.apply(lambda row: cumulativeNotTravelled(row["Total"], row["Total Travelled"], row["Total Information Pending"]), axis=1) 
aggdf["Confirmed Percent Travelled"] = aggdf.apply(lambda row: cumulativePercentTravelled(row["Total"], row["Total Travelled"], row["Total Information Pending"]), axis=1)
aggdf["Region"] = aggdf.apply(lambda row: getRegion(row["Reporting_PHU"]), axis=1)
aggdf["Area"] = aggdf.apply(lambda row: getArea(row["Reporting_PHU"]), axis=1)
aggdf

Unnamed: 0,Date,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Count,Travelled_Count,Pending_Count,Total,Total Travelled,Total Information Pending,Percent Increase,Total Not Travelled,Confirmed Percent Travelled,Region,Area
0,2020-01-01,Algoma Public Health Unit,46.532373,-84.314836,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000000,Algoma,Other
1,2020-01-01,Brant County Health Unit,43.151811,-80.274374,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000000,Brant County,Other
2,2020-01-01,Chatham-Kent Health Unit,42.403861,-82.208561,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000000,Chatham-Kent,Other
3,2020-01-01,Durham Region Health Department,43.898605,-78.940341,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000000,Durham Region,GTA
4,2020-01-01,Eastern Ontario Health Unit,45.029152,-74.736298,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.000000,Eastern Ontario,Other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6081,2020-06-27,Timiskaming Health Unit,47.509284,-79.681632,0.0,0.0,0.0,18.0,2.0,0.0,0.000000,16.0,11.111111,Timiskaming,Other
6082,2020-06-27,Toronto Public Health,43.656591,-79.379358,1.0,0.0,0.0,13024.0,374.0,1707.0,0.007679,10943.0,3.304763,Toronto,GTA
6083,2020-06-27,Wellington-Dufferin-Guelph Public Health,43.524881,-80.233743,0.0,0.0,0.0,480.0,19.0,12.0,0.000000,449.0,4.059829,Wellington-Dufferin-Guelph,Other
6084,2020-06-27,Windsor-Essex County Health Unit,42.308796,-83.033670,5.0,0.0,5.0,1393.0,119.0,143.0,0.360231,1131.0,9.520000,Windsor-Essex County,Windsor-Essex


In [13]:
#hide
def getAgeNumber(ageGroup):
    if ageGroup == "UNKNOWN":
        return -1
    elif ageGroup == "<20":
        return 0
    elif re.compile("^[0-9]+").match(ageGroup):
        return int(ageGroup[:-1])
    else:
        return -2

In [14]:
#hide
latestdf = aggdf[aggdf["Date"] == str(maxdate)].sort_values(by=["Count"])
latestdf

Unnamed: 0,Date,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Count,Travelled_Count,Pending_Count,Total,Total Travelled,Total Information Pending,Percent Increase,Total Not Travelled,Confirmed Percent Travelled,Region,Area
6052,2020-06-27,Algoma Public Health Unit,46.532373,-84.314836,0.0,0.0,0.0,24.0,6.0,1.0,0.0,17.0,26.086957,Algoma,Other
6083,2020-06-27,Wellington-Dufferin-Guelph Public Health,43.524881,-80.233743,0.0,0.0,0.0,480.0,19.0,12.0,0.0,449.0,4.059829,Wellington-Dufferin-Guelph,Other
6081,2020-06-27,Timiskaming Health Unit,47.509284,-79.681632,0.0,0.0,0.0,18.0,2.0,0.0,0.0,16.0,11.111111,Timiskaming,Other
6080,2020-06-27,Thunder Bay District Health Unit,48.400572,-89.258851,0.0,0.0,0.0,91.0,9.0,0.0,0.0,82.0,9.89011,Thunder Bay,Other
6079,2020-06-27,Sudbury & District Health Unit,46.466092,-80.998059,0.0,0.0,0.0,67.0,20.0,3.0,0.0,44.0,31.25,Sudbury,Other
6078,2020-06-27,Southwestern Public Health,42.777804,-81.151156,0.0,0.0,0.0,82.0,13.0,0.0,0.0,69.0,15.853659,Southwestern,Other
6077,2020-06-27,Simcoe Muskoka District Health Unit,44.410713,-79.686306,0.0,0.0,0.0,599.0,51.0,9.0,0.0,539.0,8.644068,Simcoe Muskoka,Other
6076,2020-06-27,Renfrew County and District Health Unit,45.799406,-77.118727,0.0,0.0,0.0,27.0,5.0,3.0,0.0,19.0,20.833333,Renfrew County,Other
6075,2020-06-27,"Region of Waterloo, Public Health",43.462876,-80.520913,0.0,0.0,0.0,1266.0,47.0,24.0,0.0,1195.0,3.784219,Region of Waterloo,Other
6074,2020-06-27,Porcupine Health Unit,48.47251,-81.32875,0.0,0.0,0.0,67.0,7.0,0.0,0.0,60.0,10.447761,Porcupine,Other


In [15]:
#hide
import datetime as dt
last7days = maxdate - dt.timedelta(days=7)
last7df = aggdf[aggdf["Date"] == str(last7days)].rename(columns={"Total" : "TotalLast7"}) 

def getTotalChangePercent(totalChange, totalLast7):
    if totalLast7 == 0:
        return 100
    else:
        return totalChange / totalLast7 * 100

last7deltadf = pd.merge(latestdf.rename(columns={"Total" : "TotalLatest"}) , last7df, how="left", \
left_on=["Region", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Region", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
[["Region", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", "TotalLatest", "TotalLast7"]]
last7deltadf["TotalChange"] = last7deltadf.apply(lambda row: row["TotalLatest"] - row["TotalLast7"], axis=1)
last7deltadf["TotalChangePercent"] = last7deltadf.apply(lambda row: getTotalChangePercent(row["TotalChange"], row["TotalLast7"]), axis=1)
last7deltadf = last7deltadf.sort_values(by=["TotalChange", "TotalChangePercent"], ascending=[True,True]).reset_index()
last7deltadf

Unnamed: 0,index,Region,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,TotalLatest,TotalLast7,TotalChange,TotalChangePercent
0,0,Algoma,Algoma Public Health Unit,46.532373,-84.314836,24.0,24.0,0.0,0.0
1,2,Timiskaming,Timiskaming Health Unit,47.509284,-79.681632,18.0,18.0,0.0,0.0
2,3,Thunder Bay,Thunder Bay District Health Unit,48.400572,-89.258851,91.0,91.0,0.0,0.0
3,4,Sudbury,Sudbury & District Health Unit,46.466092,-80.998059,67.0,67.0,0.0,0.0
4,5,Southwestern,Southwestern Public Health,42.777804,-81.151156,82.0,82.0,0.0,0.0
5,7,Renfrew County,Renfrew County and District Health Unit,45.799406,-77.118727,27.0,27.0,0.0,0.0
6,10,Peterborough,Peterborough Public Health,44.301632,-78.321347,95.0,95.0,0.0,0.0
7,12,Northwestern,Northwestern Health Unit,49.769615,-94.488254,36.0,36.0,0.0,0.0
8,17,Brant County,Brant County Health Unit,43.151811,-80.274374,130.0,130.0,0.0,0.0
9,20,Eastern Ontario,Eastern Ontario Health Unit,45.029152,-74.736298,164.0,164.0,0.0,0.0


In [16]:
#hide_input
px.bar(last7deltadf, y='Region', x='TotalChange',
             hover_name="Reporting_PHU", color='TotalChangePercent',
             labels={"Reporting_PHU":"Public Health Unit", "TotalChangePercent":"% increase", "TotalChange":"Increase in cases", "Region" : "Public Health Unit"},
             orientation="h", height=800,
             title="Increase in cases for week of " + str(last7days) + " to " + str(maxdate)
    )

## Ontario-wide

### Cases

In [17]:
#hide
outcomestartdate = "2020-02-15"

outcomedf = df[df["Accurate_Episode_Date"] >= outcomestartdate + "T00:00:00"] \
.groupby(["Accurate_Episode_Date", "Outcome1"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Cases"})

outcomedf["Outcome"] = pd.Categorical(outcomedf["Outcome1"], ["Fatal", "Resolved", "Not Resolved"])

outcomedf = outcomedf.sort_values(by="Outcome")

outcomedf

Unnamed: 0,Accurate_Episode_Date,Outcome1,Cases,Outcome
165,2020-04-27,Fatal,36,Fatal
198,2020-05-08,Fatal,24,Fatal
85,2020-03-31,Fatal,27,Fatal
241,2020-05-24,Fatal,4,Fatal
83,2020-03-30,Fatal,49,Fatal
...,...,...,...,...
220,2020-05-16,Not Resolved,3,Not Resolved
109,2020-04-08,Not Resolved,1,Not Resolved
112,2020-04-09,Not Resolved,2,Not Resolved
89,2020-04-01,Not Resolved,2,Not Resolved


In [18]:
#hide
teststatusurl = "https://data.ontario.ca/dataset/f4f86e54-872d-43f8-8a86-3892fd3cb5e6/resource/ed270bb8-340b-41f9-a7c6-e8ef587e6d11/download/covidtesting.csv"
teststatuscache = "data_files/ontario-teststatus-data.cache.csv"
try:
    teststatusdf = pd.read_csv(teststatusurl)
    teststatusdf.to_csv(teststatuscache, index=False)
except:
    teststatusdf = pd.read_csv(teststatuscache)
teststatusdf

Unnamed: 0,Reported Date,Confirmed Negative,Presumptive Negative,Presumptive Positive,Confirmed Positive,Resolved,Deaths,Total Cases,Total patients approved for testing as of Reporting Date,Total tests completed in the last day,Under Investigation,Number of patients hospitalized with COVID-19,Number of patients in ICU with COVID-19,Number of patients in ICU on a ventilator with COVID-19,Total Positive LTC Resident Cases,Total Positive LTC HCW Cases,Total LTC Resident Deaths,Total LTC HCW Deaths
0,2020-01-26,,,1.0,,,,,,,9,,,,,,,
1,2020-01-27,,,2.0,,,,,,,19,,,,,,,
2,2020-01-28,,,1.0,1.0,,,1.0,,,11,,,,,,,
3,2020-01-30,,,0.0,2.0,,,2.0,,,27,,,,,,,
4,2020-01-31,,,0.0,2.0,,,2.0,,,17,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2020-06-24,,,,2049.0,29336.0,2631.0,34016.0,1236023.0,23207.0,21398,278.0,73.0,48.0,5427.0,2244.0,1683.0,5.0
140,2020-06-25,,,,2036.0,29528.0,2641.0,34205.0,1263534.0,27511.0,24607,270.0,69.0,47.0,5436.0,2258.0,1689.0,5.0
141,2020-06-26,,,,1918.0,29754.0,2644.0,34316.0,1294314.0,30780.0,27344,256.0,61.0,41.0,5445.0,2268.0,1692.0,5.0
142,2020-06-27,,,,1892.0,29932.0,2652.0,34476.0,1327806.0,33492.0,33759,252.0,54.0,35.0,5457.0,2282.0,1698.0,5.0


In [19]:
#hide
testconfirmeddf = teststatusdf.copy() \
[["Reported Date", "Total Cases", "Deaths", "Total tests completed in the last day", "Total Positive LTC Resident Cases", "Total Positive LTC HCW Cases"]]

testconfirmeddf["ReportedDate"] = testconfirmeddf.apply(lambda r: dateutil.parser.parse(r["Reported Date"]), axis=1)
testconfirmeddf["New Cases"] = testconfirmeddf["Total Cases"].fillna(0).diff()
testconfirmeddf["New Deaths"] = testconfirmeddf["Deaths"].fillna(0).diff()
testconfirmeddf["New LTC Resident Cases"] = testconfirmeddf["Total Positive LTC Resident Cases"].diff()
testconfirmeddf["New LTC HCW Cases"] = testconfirmeddf["Total Positive LTC HCW Cases"].diff()
testconfirmeddf["Total Tests"] = testconfirmeddf["Total tests completed in the last day"].cumsum()

diffDays = 5

testconfirmeddf["Cases Inc 5d"] = testconfirmeddf["Total Cases"].fillna(0).diff(periods=diffDays)
testconfirmeddf["Tests Inc 5d"] = testconfirmeddf["Total Tests"].fillna(0).diff(periods=diffDays)
testconfirmeddf["New Cases Mean 5d"] = testconfirmeddf["New Cases"].rolling(diffDays).mean()
testconfirmeddf["New Tests Mean 5d"] = testconfirmeddf["Total tests completed in the last day"].rolling(diffDays).mean()
testconfirmeddf["New Deaths Mean 5d"] = testconfirmeddf["New Deaths"].rolling(diffDays).mean()

def getPercentPositive(numPos, numTests):
    if numTests == 0:
        return 0
    else:
        return numPos/numTests * 100

testconfirmeddf["Daily Percent Tested Positive"] = testconfirmeddf.apply(lambda r: getPercentPositive(r["New Cases"], r["Total tests completed in the last day"]), axis=1)
testconfirmeddf["Total Percent Tested Positive"] = testconfirmeddf.apply(lambda r: getPercentPositive(r["Total Cases"], r["Total Tests"]), axis=1)
testconfirmeddf["5d Avg Percent Tested Positive"] = testconfirmeddf.apply(lambda r: getPercentPositive(r["Cases Inc 5d"], r["Tests Inc 5d"]), axis=1)
testconfirmeddf["Diff 5d avg and avg of avgs"] = testconfirmeddf.apply(lambda r: r["5d Avg Percent Tested Positive"] - getPercentPositive(r["New Cases Mean 5d"], r["New Tests Mean 5d"]), axis=1)

testconfirmeddf

Unnamed: 0,Reported Date,Total Cases,Deaths,Total tests completed in the last day,Total Positive LTC Resident Cases,Total Positive LTC HCW Cases,ReportedDate,New Cases,New Deaths,New LTC Resident Cases,...,Total Tests,Cases Inc 5d,Tests Inc 5d,New Cases Mean 5d,New Tests Mean 5d,New Deaths Mean 5d,Daily Percent Tested Positive,Total Percent Tested Positive,5d Avg Percent Tested Positive,Diff 5d avg and avg of avgs
0,2020-01-26,,,,,,2020-01-26,,,,...,,,,,,,,,,
1,2020-01-27,,,,,,2020-01-27,0.0,0.0,,...,,,,,,,,,,
2,2020-01-28,1.0,,,,,2020-01-28,1.0,0.0,,...,,,,,,,,,,
3,2020-01-30,2.0,,,,,2020-01-30,1.0,0.0,,...,,,,,,,,,,
4,2020-01-31,2.0,,,,,2020-01-31,0.0,0.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2020-06-24,34016.0,2631.0,23207.0,5427.0,2244.0,2020-06-24,163.0,12.0,35.0,...,1122941.0,921.0,112091.0,184.2,22418.2,13.4,0.702374,3.029189,0.821654,1.110223e-16
140,2020-06-25,34205.0,2641.0,27511.0,5436.0,2258.0,2020-06-25,189.0,10.0,9.0,...,1150452.0,904.0,112215.0,180.8,22443.0,9.2,0.686998,2.973179,0.805596,0.000000e+00
141,2020-06-26,34316.0,2644.0,30780.0,5445.0,2268.0,2020-06-26,111.0,3.0,9.0,...,1181232.0,840.0,119587.0,168.0,23917.4,7.6,0.360624,2.905102,0.702417,1.110223e-16
142,2020-06-27,34476.0,2652.0,33492.0,5457.0,2282.0,2020-06-27,160.0,8.0,12.0,...,1214724.0,839.0,131179.0,167.8,26235.8,8.6,0.477726,2.838176,0.639584,-1.110223e-16


In [20]:
#hide
testconfirmeddf[testconfirmeddf["Diff 5d avg and avg of avgs"] != 0] \
[["Reported Date", "Cases Inc 5d", "Tests Inc 5d", "New Cases Mean 5d", "New Tests Mean 5d", "5d Avg Percent Tested Positive", "Diff 5d avg and avg of avgs"]]

Unnamed: 0,Reported Date,Cases Inc 5d,Tests Inc 5d,New Cases Mean 5d,New Tests Mean 5d,5d Avg Percent Tested Positive,Diff 5d avg and avg of avgs
0,2020-01-26,,,,,,
1,2020-01-27,,,,,,
2,2020-01-28,,,,,,
3,2020-01-30,,,,,,
4,2020-01-31,,,,,,
...,...,...,...,...,...,...,...
135,2020-06-20,931.0,125819.0,186.2,25163.8,0.739952,1.110223e-16
136,2020-06-21,922.0,127503.0,184.4,25500.6,0.723120,-1.110223e-16
139,2020-06-24,921.0,112091.0,184.2,22418.2,0.821654,1.110223e-16
141,2020-06-26,840.0,119587.0,168.0,23917.4,0.702417,1.110223e-16


In [21]:
#hide
casesdf = df[df["Accurate_Episode_Date"] >= outcomestartdate] \
.groupby(["Accurate_Episode_Date"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Count"})

casesdf["Count 5d Mean right"] = casesdf["Count"].rolling(diffDays).mean()
casesdf["Count 5d Mean"] = casesdf["Count"].rolling(diffDays, center=True).mean()
casesdf["Count 5d Mean - 1d"] = casesdf.shift(periods=-1)["Count 5d Mean"]
casesdf["Count 5d Mean - 5d"] = casesdf.shift(periods=-5)["Count 5d Mean"]
casesdf["Count 5d Mean - 14d"] = casesdf.shift(periods=-14)["Count 5d Mean"]

casesdf

Unnamed: 0,Accurate_Episode_Date,Count,Count 5d Mean right,Count 5d Mean,Count 5d Mean - 1d,Count 5d Mean - 5d,Count 5d Mean - 14d
0,2020-02-15,1,,,,2.8,12.0
1,2020-02-16,1,,,1.6,3.4,13.2
2,2020-02-17,1,,1.6,1.6,3.2,17.2
3,2020-02-19,1,,1.6,2.2,3.4,20.0
4,2020-02-20,4,1.6,2.2,2.8,2.8,23.6
...,...,...,...,...,...,...,...
128,2020-06-23,119,131.2,108.6,91.8,,
129,2020-06-24,123,124.4,91.8,66.8,,
130,2020-06-25,69,108.6,66.8,,,
131,2020-06-26,16,91.8,,,,


In [22]:
#hide_input
totalbar = px.bar(outcomedf, y="Cases", x="Accurate_Episode_Date",
             hover_data=["Outcome1"], 
             color="Outcome1", color_discrete_sequence=["#ff6666", "#b0e8b0", "#ffd11a"],
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Outcome1" : "By onset date"},
             title="Ontario Confirmed Cases by Symptom Onset Date<br>by Outcome from " + outcomestartdate + " to " + str(maxdate)
    )
totalbar.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean right"], mode="lines",
                             name="5d avg", 
                              line=dict(color="black", dash="solid")))
totalbar.add_trace(go.Scatter(x=testconfirmeddf["Reported Date"], y=testconfirmeddf["New Cases Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="silver", dash="dot")))

In [23]:
#hide

def getAcquisition(info):
    if info == "CC":
        return "Confirmed case"
    elif info == "No Epi-link":
        return "Comm. spread"
    elif info == "OB":
        return "Inst. outbreak"
    elif info == "No Info-Missing":
        return "Missing info"
    elif info == "No Info-Unk" :
        return "Unknown"
    else:
        return info

acqOrder = ["Travel", "Confirmed case", "Comm. spread", "Inst. outbreak", "Unknown", "Missing info"]
acqColours = ["mediumblue", "#ff6666", "red", "#990000", "grey", "silver"]

onacqdf = origdf[origdf["Accurate_Episode_Date"] >= outcomestartdate] \
.groupby(["Accurate_Episode_Date", "Case_AcquisitionInfo"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Count"}) 


onacqdf["Acquisition"] = onacqdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)
onacqdf["Acquisition"] = pd.Categorical(onacqdf["Acquisition"], acqOrder)
onacqdf = onacqdf.sort_values(by="Acquisition")
onacqdf

Unnamed: 0,Accurate_Episode_Date,Case_AcquisitionInfo,Count,Acquisition
0,2020-02-15,Travel,1,Travel
217,2020-04-05,Travel,9,Travel
223,2020-04-06,Travel,7,Travel
229,2020-04-07,Travel,9,Travel
235,2020-04-08,Travel,3,Travel
...,...,...,...,...
429,2020-05-12,No Info-Missing,7,Missing info
435,2020-05-13,No Info-Missing,2,Missing info
440,2020-05-14,No Info-Missing,4,Missing info
391,2020-05-05,No Info-Missing,6,Missing info


In [24]:
#hide_input

onacqbar = px.bar(onacqdf, y="Count", x="Accurate_Episode_Date",
             #hover_data=["Outcome1"], 
             color="Acquisition", color_discrete_sequence=acqColours,
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Outcome1" : "By onset date"},
             title="Ontario Confirmed Cases by Symptom Onset Date<br>by Acquisition from " + outcomestartdate + " to " + str(maxdate)
    )
onacqbar.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean right"], mode="lines",
                             name="5d avg", 
                              line=dict(color="black", dash="solid")))
onacqbar.add_trace(go.Scatter(x=testconfirmeddf["Reported Date"], y=testconfirmeddf["New Cases Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="darkgrey", dash="dot")))

In [25]:
#hide
def getInfectionSource(info):
    if info.startswith("No Info"):
        return "No info"
    elif info == "CC" or info == "No Epi-link":
        return "Other"
    elif info == "OB":
        return "Inst. outbreak"
    else:
        return info

onacqdf["Infection Source"] = onacqdf.apply(lambda r: getInfectionSource(r["Case_AcquisitionInfo"]), axis=1)

onacqlinedf = onacqdf.groupby(["Accurate_Episode_Date", "Infection Source"]).sum() \
.reset_index()
onacqlinedf

Unnamed: 0,Accurate_Episode_Date,Infection Source,Count
0,2020-02-15,Travel,1
1,2020-02-16,Travel,1
2,2020-02-17,Travel,1
3,2020-02-19,Travel,1
4,2020-02-20,Inst. outbreak,1
...,...,...,...
465,2020-06-25,Travel,1
466,2020-06-26,No info,14
467,2020-06-26,Other,2
468,2020-06-27,No info,6


In [26]:
#hide_input

rollingbardf = testconfirmeddf[testconfirmeddf["ReportedDate"] >= "2020-03-01"]

rollingbar = px.bar(rollingbardf, 
       x="ReportedDate", y="New Cases", 
       color="5d Avg Percent Tested Positive", range_color=[0,8],
       color_continuous_scale=px.colors.sequential.Inferno,
       hover_data=["New Cases", "Total tests completed in the last day"],
       labels={"5d Avg Percent Tested Positive" : "% pos. 5d avg", "New Cases" : "Cases", "ReportedDate" : "Reported Date"},
       title="Ontario Confirmed Cases by Reported Date<br>by Percent Tests per Day with Positive Result (5-Day Rolling Average)",
      )
rollingbar.update_layout(bargap=0)
rollingbar.add_trace(go.Scatter(x=rollingbardf["ReportedDate"], y=rollingbardf["New Cases Mean 5d"], mode="lines",
                             name="Cases 5d Avg", showlegend=False,
                              line=dict(color="darkgrey", dash="solid")))

In [27]:
#hide_input
HTML('For the graph above, data for "Total tests completed in the last day" is not available before 15 Apr 2020.')

In [28]:
#hide_input

testeddf = testconfirmeddf[(testconfirmeddf["Total tests completed in the last day"].notnull()) & (testconfirmeddf["Total tests completed in the last day"] != 0)]

testsbar = px.bar(testeddf, 
       x="ReportedDate", y="Total tests completed in the last day", 
       color="5d Avg Percent Tested Positive", range_color=[0,8],
       color_continuous_scale=px.colors.sequential.Inferno,
       hover_data=["New Cases", "Total tests completed in the last day"],
       labels={"5d Avg Percent Tested Positive" : "% pos. 5d avg", "New Cases" : "Cases", "ReportedDate" : "Reported Date"},
       title="Ontario Tests Completed in the Last Day by Reported Date<br>by Percent Tests per Day with Positive Result (5-Day Rolling Average)",
      )

testsbar.update_layout(bargap=0, annotations=[
        dict(
            x="2020-04-24",
            y=12295,
            xref="x",
            yref="y",
            text="Apr 24 peak<br>in reported<br>confirmed cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-80
        ),
        dict(
            x="2020-05-24",
            y=11383,
            xref="x",
            yref="y",
            text="May 24<br>peak in<br>reported<br>confirmed<br>cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-100
        ),
        dict(
            x="2020-06-06",
            y=11383,
            xref="x",
            yref="y",
            text="Jun 6<br>peak in<br>reported<br>confirmed<br>cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-180
        )
    ]
                      )
testsbar.add_trace(go.Scatter(x=testeddf["ReportedDate"], y=testeddf["New Tests Mean 5d"], mode="lines",
                             name="Tests 5d Avg", showlegend=False,
                              line=dict(color="deepskyblue", dash="solid")))

In [29]:
#hide_input
import math

posline = px.line(testeddf, 
       x="ReportedDate", y="5d Avg Percent Tested Positive", log_y=True,
       hover_data=["New Cases", "Total tests completed in the last day"],
       labels={"5d Avg Percent Tested Positive" : "Percent Tests Positive (5d avg)", "ReportedDate" : "Reported Date"},
       title="Percentage of Ontario Tests per Day with SARS-CoV-2 Positive Result<br>(5-Day Rolling Average) by Reported Date",
        color_discrete_sequence=["red"]
      )
posline.update_layout(annotations=[
        dict(
            x="2020-04-24",
            y=math.log(5.773, 10),
            xref="x",
            yref="y",
            text="Apr 24 peak <br>in reported<br>confirmed cases",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-60
        ),
        dict(
            x="2020-05-24",
            y=math.log(4.103, 10),
            xref="x",
            yref="y",
            text="May 24 peak<br>in reported<br>confirmed cases",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-60
        ),
        dict(
            x="2020-06-06",
            y=math.log(testeddf[testeddf["ReportedDate"] == "2020-06-06"]["5d Avg Percent Tested Positive"], 10),
            xref="x",
            yref="y",
            text="Jun 6<br>peak in<br>reported<br>confirmed<br>cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-100
        )
    ]
)

In [30]:
#hide
ageactivedf = activedf[activedf["Age_Group"] != "Unknown"]

aggageactivedf = ageactivedf \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivedf["OnsetWithin"] = "Any date"

aggageactivelast14df = ageactivedf[ageactivedf["Datetime"] >= now - timedelta(days=14)] \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivelast14df["OnsetWithin"] = "14 days"

aggageactivelast7df = ageactivedf[ageactivedf["Datetime"] >= now - timedelta(days=7)] \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivelast7df["OnsetWithin"] = "7 days"

aggageactivelast3df = ageactivedf[ageactivedf["Datetime"] >= now - timedelta(days=3)] \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivelast3df["OnsetWithin"] = "3 days"

appendaggageactivedf = aggageactivedf.append(aggageactivelast14df).append(aggageactivelast7df).append(aggageactivelast3df)
appendaggageactivedf

Unnamed: 0,Accurate_Episode_Date,Age_Group,Cases,OnsetWithin
0,2020-03-02,60s,1,Any date
1,2020-03-10,50s,1,Any date
2,2020-03-11,40s,1,Any date
3,2020-03-13,60s,1,Any date
4,2020-03-16,80s,1,Any date
...,...,...,...,...
4,2020-06-26,90s,1,3 days
5,2020-06-26,<20,2,3 days
6,2020-06-27,20s,4,3 days
7,2020-06-27,30s,2,3 days


In [31]:
#hide_input

sumageactivedf = appendaggageactivedf \
.groupby(["OnsetWithin", "Age_Group"]) \
.sum() \
.reset_index()

sumageactivedf["Age Group"] = sumageactivedf.apply(lambda row: getAgeNumber(row["Age_Group"]), axis=1)
sumageactivedf["Onset"] = pd.Categorical(sumageactivedf["OnsetWithin"], ["Any date", "14 days", "7 days", "3 days"])

sumageactivedf = sumageactivedf.sort_values(by=["Onset", "Age Group"])


activeagebar = px.bar(sumageactivedf, y="Cases", x='Age_Group',
    color="Onset", color_discrete_sequence=["maroon", "darkorange", "yellow", "white"],
    labels={"Onset" : "Onset w/in last", "Age_Group" : "Age Group"},
    title="Currently Active Confirmed Cases by Age Group",
  hover_name="Age_Group"

    )
activeagebar.update_layout(barmode='overlay', hovermode="x")

In [32]:
#hide_input

HTML("The incubation period between SARS-CoV-2 infection to COVID-19 symptom onset is 1 to 14 days, with an average of 5 days. The graph below explores whether there is a relationship between confirmed cases by offset Symptom Onset Date and notable social events.")

In [33]:
#hide

events = [
    { "name" : "Good Friday", "date" : "2020-04-10" },
    { "name" : "Easter Monday", "date" : "2020-04-13", "ax" : 40 },
    { "name" : "Mother's Day", "date" : "2020-05-10"},
    { "name" : "Victoria Day", "date" : "2020-05-18"},
    { "name" : "Trinity Bellwoods<br>Park weekend", "date" : "2020-05-23", "ay" : -200},
    { "name" : "Father's Day", "date" : "2020-06-21"},
    { "name" : "Canada Day", "date" : "2020-07-01"},
    { "name" : "Civic Holiday", "date" : "2020-08-03"},
]

maxinfectiondate = casesdf[casesdf["Count 5d Mean - 5d"].notnull()]["Accurate_Episode_Date"].max()

annotations = []
for event in events:
    if event["date"] <= maxinfectiondate:
        ax = event.get("ax")
        if ax is None:
            ax = 0
        ay = event.get("ay")
        if ay is None:
            ay = -60
        annotations.append(dict(
            x=event["date"],
            y=casesdf[casesdf["Accurate_Episode_Date"] == event["date"]]["Count 5d Mean - 5d"].values[0],
            xref="x",
            yref="y",
            text=event["name"],
            showarrow=True,
            arrowhead=7,
            ax=ax,
            ay=ay
        ))
    
annotations

[{'x': '2020-04-10',
  'y': 646.6,
  'xref': 'x',
  'yref': 'y',
  'text': 'Good Friday',
  'showarrow': True,
  'arrowhead': 7,
  'ax': 0,
  'ay': -60},
 {'x': '2020-04-13',
  'y': 557.4,
  'xref': 'x',
  'yref': 'y',
  'text': 'Easter Monday',
  'showarrow': True,
  'arrowhead': 7,
  'ax': 40,
  'ay': -60},
 {'x': '2020-05-10',
  'y': 357.0,
  'xref': 'x',
  'yref': 'y',
  'text': "Mother's Day",
  'showarrow': True,
  'arrowhead': 7,
  'ax': 0,
  'ay': -60},
 {'x': '2020-05-18',
  'y': 308.0,
  'xref': 'x',
  'yref': 'y',
  'text': 'Victoria Day',
  'showarrow': True,
  'arrowhead': 7,
  'ax': 0,
  'ay': -60},
 {'x': '2020-05-23',
  'y': 297.6,
  'xref': 'x',
  'yref': 'y',
  'text': 'Trinity Bellwoods<br>Park weekend',
  'showarrow': True,
  'arrowhead': 7,
  'ax': 0,
  'ay': -200}]

In [34]:
#hide_input
infectionline = go.Figure()
infectionline.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean - 1d"], mode="lines",
                              showlegend=True, name="-1d (min)",
                              line=dict(color="pink", dash="dot")))
infectionline.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean - 14d"], mode="lines",
                              showlegend=True, name="-14d (max)",
                              line=dict(color="thistle", dash="dot")))
infectionline.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean - 5d"], mode="lines",
                              showlegend=True, name="-5d (avg)",
                              line=dict(color="hotpink", dash="dot")))
infectionline.update_layout(annotations=annotations, 
                            title="Confirmed Cases (5-Day Rolling Average) by Projected Infection Date<br>Based on Symptom Onset Date",
                           xaxis_title="Projected Infection Date based on Symptom Onset Date", yaxis_title="Confirmed Cases (5d Average)",
                           legend_title_text="Onset date offset")

### Deaths

In [35]:
#hide
ageacqdeathdf = origdf[origdf["Outcome1"] == "Fatal"] \
.groupby(["Age_Group", "Case_AcquisitionInfo"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Deaths"})

ageacqdeathdf["AgeGroup"] = ageacqdeathdf.apply(lambda r: getAgeNumber(r["Age_Group"]), axis=1)
ageacqdeathdf["Acquisition"] = ageacqdeathdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)
ageacqdeathdf["Acquisition"] = pd.Categorical(ageacqdeathdf["Acquisition"], acqOrder)
ageacqdeathdf = ageacqdeathdf.sort_values(by=["AgeGroup", "Acquisition"])
ageacqdeathdf

ageacqdeathdf

Unnamed: 0,Age_Group,Case_AcquisitionInfo,Deaths,AgeGroup,Acquisition
43,<20,No Epi-link,1,0,Comm. spread
0,20s,CC,1,20,Confirmed case
1,20s,No Epi-link,2,20,Comm. spread
2,20s,No Info-Unk,1,20,Unknown
3,30s,CC,1,30,Confirmed case
4,30s,No Epi-link,3,30,Comm. spread
6,30s,OB,2,30,Inst. outbreak
5,30s,No Info-Unk,1,30,Unknown
12,40s,Travel,1,40,Travel
7,40s,CC,9,40,Confirmed case


In [36]:
#hide

deathsdf = df[df["Outcome1"] == "Fatal"] \
.groupby(["Accurate_Episode_Date", "Age_Group"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Cases"})

deathsdf["Age Group"] = deathsdf.apply(lambda r: getAgeNumber(r["Age_Group"]), axis=1)

deathsdf = deathsdf.sort_values(by=["Age Group", "Accurate_Episode_Date"])

deathsdf

Unnamed: 0,Accurate_Episode_Date,Age_Group,Cases,Age Group
431,2020-06-18,<20,1,0
71,2020-03-23,20s,1,20
146,2020-04-06,20s,1,20
215,2020-04-19,20s,1,20
380,2020-05-26,20s,1,20
...,...,...,...,...
418,2020-06-09,90s,1,90
422,2020-06-11,90s,1,90
424,2020-06-13,90s,1,90
430,2020-06-18,90s,1,90


In [37]:
#hide
testsfirstdeath = testconfirmeddf[testconfirmeddf["New Deaths"] > 0]["Reported Date"].min()

testsdeathsdf = testconfirmeddf[testconfirmeddf["Reported Date"] >= testsfirstdeath]

In [38]:
#hide
deathacqdf = df[df["Outcome1"] == "Fatal"] \
.groupby(["Accurate_Episode_Date", "Case_AcquisitionInfo"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Count"})

deathacqdf["Acquisition"] = deathacqdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)
deathacqdf["Acquisition"] = pd.Categorical(deathacqdf["Acquisition"], acqOrder)
deathacqdf = deathacqdf.sort_values(by="Acquisition")
deathacqdf

Unnamed: 0,Accurate_Episode_Date,Case_AcquisitionInfo,Count,Acquisition
73,2020-03-26,Travel,4,Travel
26,2020-03-15,Travel,4,Travel
55,2020-03-22,Travel,2,Travel
105,2020-04-02,Travel,1,Travel
96,2020-03-31,Travel,3,Travel
...,...,...,...,...
174,2020-04-19,No Info-Missing,1,Missing info
182,2020-04-21,No Info-Missing,1,Missing info
191,2020-04-23,No Info-Missing,1,Missing info
58,2020-03-23,No Info-Missing,1,Missing info


In [39]:
#hide_input

deathacqbar = px.bar(deathacqdf, y="Count", x="Accurate_Episode_Date",
             color="Acquisition", 
             color_discrete_sequence=acqColours,
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Count" : "Deaths"},
             title="Ontario COVID-19 Deaths by Symptom Onset Date<br>by Acquisition"
    )

deathacqbar.add_trace(go.Scatter(x=testsdeathsdf["Reported Date"], y=testsdeathsdf["New Deaths Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="black", dash="dot")))

In [40]:
#hide_input

deathbar = px.bar(deathsdf, y="Cases", x="Accurate_Episode_Date",
             color="Age_Group", 
             color_discrete_sequence=["maroon", "red", "darkorange", "yellow", "#b3ffb3", "turquoise", "#3385ff", "#9999ff", "#dab3ff", "white"],
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Age_Group" : "Age Group", "Cases" : "Deaths"},
             title="Ontario COVID-19 Deaths by Symptom Onset Date<br>by Age Group"
    )

deathbar.add_trace(go.Scatter(x=testsdeathsdf["Reported Date"], y=testsdeathsdf["New Deaths Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="black", dash="dot")))

In [41]:
#hide
deathdemdf = teststatusdf[teststatusdf["Reported Date"].notnull()] \
[["Reported Date", "Deaths", "Total LTC Resident Deaths", "Total LTC HCW Deaths"]].copy()

deathdemdf["ReportedDate"] = deathdemdf.apply(lambda r: dateutil.parser.parse(r["Reported Date"]), axis=1)
deathdemdf["Total Deaths"] = deathdemdf["Deaths"].fillna(0)
deathdemdf["New Deaths"] = deathdemdf["Total Deaths"].diff()
deathdemdf["New LTC Resident Deaths"] = deathdemdf["Total LTC Resident Deaths"].diff()
deathdemdf["New LTC HCW Deaths"] = deathdemdf["Total LTC HCW Deaths"].diff()

deathdemdf["New Other Deaths"] = deathdemdf.apply(lambda r: r["New Deaths"] - r["New LTC Resident Deaths"] - r["New LTC HCW Deaths"], axis=1)

deathdemdf

Unnamed: 0,Reported Date,Deaths,Total LTC Resident Deaths,Total LTC HCW Deaths,ReportedDate,Total Deaths,New Deaths,New LTC Resident Deaths,New LTC HCW Deaths,New Other Deaths
0,2020-01-26,,,,2020-01-26,0.0,,,,
1,2020-01-27,,,,2020-01-27,0.0,0.0,,,
2,2020-01-28,,,,2020-01-28,0.0,0.0,,,
3,2020-01-30,,,,2020-01-30,0.0,0.0,,,
4,2020-01-31,,,,2020-01-31,0.0,0.0,,,
...,...,...,...,...,...,...,...,...,...,...
139,2020-06-24,2631.0,1683.0,5.0,2020-06-24,2631.0,12.0,19.0,-1.0,-6.0
140,2020-06-25,2641.0,1689.0,5.0,2020-06-25,2641.0,10.0,6.0,0.0,4.0
141,2020-06-26,2644.0,1692.0,5.0,2020-06-26,2644.0,3.0,3.0,0.0,0.0
142,2020-06-27,2652.0,1698.0,5.0,2020-06-27,2652.0,8.0,6.0,0.0,2.0


In [42]:
#hide
unknowndeathsdf = deathdemdf[(deathdemdf["New Other Deaths"].isnull()) | (deathdemdf["New Other Deaths"] < 0) | (deathdemdf["New LTC Resident Deaths"] < 0)].copy()

unknowndeathsdf["Situation"] = "Unknown"
unknowndeathsdf["DeathCount"] = unknowndeathsdf["New Deaths"]

unknowndeathsdf = unknowndeathsdf[(unknowndeathsdf["DeathCount"].notnull()) & (unknowndeathsdf["DeathCount"] != 0)] \
[["ReportedDate", "Situation", "DeathCount"]]
unknowndeathsdf

Unnamed: 0,ReportedDate,Situation,DeathCount
40,2020-03-17,Unknown,1.0
42,2020-03-19,Unknown,1.0
45,2020-03-22,Unknown,3.0
46,2020-03-23,Unknown,1.0
47,2020-03-24,Unknown,2.0
...,...,...,...
104,2020-05-20,Unknown,43.0
117,2020-06-02,Unknown,17.0
129,2020-06-14,Unknown,12.0
137,2020-06-22,Unknown,3.0


In [43]:
#hide
sitdeathsdf =  deathdemdf[(deathdemdf["New Other Deaths"].notnull()) & (deathdemdf["New Other Deaths"] >= 0) & (deathdemdf["New LTC Resident Deaths"] >= 0)]

otherdeathsdf = sitdeathsdf.copy()

otherdeathsdf["Situation"] = "Other"

otherdeathsdf["DeathCount"] = otherdeathsdf.apply(lambda r: r["New Deaths"] - r["New LTC Resident Deaths"] - r["New LTC HCW Deaths"], axis=1)

otherdeathsdf = otherdeathsdf[["ReportedDate", "Situation", "DeathCount"]]

otherdeathsdf

Unnamed: 0,ReportedDate,Situation,DeathCount
105,2020-05-21,Other,13.0
106,2020-05-22,Other,8.0
107,2020-05-23,Other,7.0
108,2020-05-24,Other,7.0
109,2020-05-25,Other,6.0
110,2020-05-26,Other,9.0
111,2020-05-27,Other,15.0
112,2020-05-28,Other,8.0
113,2020-05-29,Other,6.0
114,2020-05-30,Other,6.0


In [44]:
#hide
residentdeathsdf = sitdeathsdf.copy()
residentdeathsdf["Situation"] = "LTC Resident"
residentdeathsdf["DeathCount"] = residentdeathsdf["New LTC Resident Deaths"]
residentdeathsdf = residentdeathsdf[["ReportedDate", "Situation", "DeathCount"]]
residentdeathsdf

Unnamed: 0,ReportedDate,Situation,DeathCount
105,2020-05-21,LTC Resident,18.0
106,2020-05-22,LTC Resident,20.0
107,2020-05-23,LTC Resident,20.0
108,2020-05-24,LTC Resident,18.0
109,2020-05-25,LTC Resident,23.0
110,2020-05-26,LTC Resident,12.0
111,2020-05-27,LTC Resident,17.0
112,2020-05-28,LTC Resident,25.0
113,2020-05-29,LTC Resident,35.0
114,2020-05-30,LTC Resident,11.0


In [45]:
#hide
lhcwdeathsdf = sitdeathsdf.copy()
lhcwdeathsdf["Situation"] = "LTC HCW"
lhcwdeathsdf["DeathCount"] = lhcwdeathsdf["New LTC HCW Deaths"]
lhcwdeathsdf = lhcwdeathsdf[["ReportedDate", "Situation", "DeathCount"]]
lhcwdeathsdf

Unnamed: 0,ReportedDate,Situation,DeathCount
105,2020-05-21,LTC HCW,0.0
106,2020-05-22,LTC HCW,0.0
107,2020-05-23,LTC HCW,0.0
108,2020-05-24,LTC HCW,0.0
109,2020-05-25,LTC HCW,0.0
110,2020-05-26,LTC HCW,0.0
111,2020-05-27,LTC HCW,0.0
112,2020-05-28,LTC HCW,1.0
113,2020-05-29,LTC HCW,0.0
114,2020-05-30,LTC HCW,0.0


In [46]:
#hide
situationdeathsdf = unknowndeathsdf.append(residentdeathsdf).append(lhcwdeathsdf).append(otherdeathsdf)
situationdeathsdf

Unnamed: 0,ReportedDate,Situation,DeathCount
40,2020-03-17,Unknown,1.0
42,2020-03-19,Unknown,1.0
45,2020-03-22,Unknown,3.0
46,2020-03-23,Unknown,1.0
47,2020-03-24,Unknown,2.0
...,...,...,...
138,2020-06-23,Other,0.0
140,2020-06-25,Other,4.0
141,2020-06-26,Other,0.0
142,2020-06-27,Other,2.0


In [47]:
#hide_input

testsdeathsavg = testconfirmeddf[(testconfirmeddf["New Deaths Mean 5d"].notnull()) & (testconfirmeddf["New Deaths Mean 5d"] != 0)]

sitdeathsbar = px.bar(situationdeathsdf, x="ReportedDate", y="DeathCount",
      color="Situation",
      color_discrete_sequence=["silver", "pink", "lightskyblue", "#ff9999"],
    title="Ontario COVID-19 Deaths by Reported Date",
                      labels={"DeathCount" : "Deaths"}
      )
sitdeathsbar.add_trace(go.Scatter(x=testsdeathsavg["ReportedDate"], y=testsdeathsavg["New Deaths Mean 5d"], mode="lines",
                             name="5d avg", 
                              line=dict(color="black", dash="solid")))

## Regional by Public Health Unit

In [48]:
#hide
top4growth = last7deltadf.sort_values(by=["TotalChangePercent"], ascending=[False]) \
.head(4)[["Reporting_PHU", "TotalChangePercent"]].reset_index()

activetop4df = pd.merge(top4growth, ageactivedf, how="left", \
left_on=["Reporting_PHU"], \
right_on=["Reporting_PHU"])

activetop4df

Unnamed: 0,index,Reporting_PHU,TotalChangePercent,Row_ID,Accurate_Episode_Date,Case_Reported_Date,Test_Reported_Date,Specimen_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Outbreak_Related,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Datetime
0,27,"Kingston, Frontenac and Lennox & Addington Pub...",25.000000,22517,2020-06-18,2020-06-20,2020-06-20,2020-06-19,20s,FEMALE,No Epi-link,Not Resolved,,221 Portsmouth Avenue,Kingston,K7M 1V5,www.kflaph.ca,44.227874,-76.525211,2020-06-18
1,27,"Kingston, Frontenac and Lennox & Addington Pub...",25.000000,23301,2020-06-25,2020-06-27,2020-06-26,2020-06-25,50s,MALE,OB,Not Resolved,Yes,221 Portsmouth Avenue,Kingston,K7M 1V5,www.kflaph.ca,44.227874,-76.525211,2020-06-25
2,27,"Kingston, Frontenac and Lennox & Addington Pub...",25.000000,23340,2020-06-23,2020-06-24,2020-06-24,2020-06-23,30s,MALE,CC,Not Resolved,,221 Portsmouth Avenue,Kingston,K7M 1V5,www.kflaph.ca,44.227874,-76.525211,2020-06-23
3,27,"Kingston, Frontenac and Lennox & Addington Pub...",25.000000,23341,2020-06-22,2020-06-23,2020-06-23,2020-06-23,30s,FEMALE,CC,Not Resolved,,221 Portsmouth Avenue,Kingston,K7M 1V5,www.kflaph.ca,44.227874,-76.525211,2020-06-22
4,27,"Kingston, Frontenac and Lennox & Addington Pub...",25.000000,23342,2020-06-25,2020-06-27,2020-06-26,2020-06-25,20s,FEMALE,No Info-Missing,Not Resolved,,221 Portsmouth Avenue,Kingston,K7M 1V5,www.kflaph.ca,44.227874,-76.525211,2020-06-25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,29,Wellington-Dufferin-Guelph Public Health,3.671706,32800,2020-06-18,2020-06-20,,,<20,FEMALE,OB,Not Resolved,Yes,160 Chancellors Way,Guelph,N1G 0E1,www.wdgpublichealth.ca,43.524881,-80.233743,2020-06-18
76,29,Wellington-Dufferin-Guelph Public Health,3.671706,33096,2020-06-14,2020-06-16,2020-06-18,2020-06-14,40s,MALE,OB,Not Resolved,Yes,160 Chancellors Way,Guelph,N1G 0E1,www.wdgpublichealth.ca,43.524881,-80.233743,2020-06-14
77,29,Wellington-Dufferin-Guelph Public Health,3.671706,33358,2020-06-22,2020-06-23,2020-06-23,2020-06-22,30s,MALE,No Info-Missing,Not Resolved,,160 Chancellors Way,Guelph,N1G 0E1,www.wdgpublichealth.ca,43.524881,-80.233743,2020-06-22
78,29,Wellington-Dufferin-Guelph Public Health,3.671706,33996,2020-05-24,2020-05-27,2020-05-27,2020-05-24,60s,MALE,No Epi-link,Not Resolved,,160 Chancellors Way,Guelph,N1G 0E1,www.wdgpublichealth.ca,43.524881,-80.233743,2020-05-24


In [49]:
#hide
agactivetop4df = activetop4df \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4df["OnsetWithin"] = "Any date"

agactivetop4last14df = activetop4df[activetop4df["Datetime"] >= now - timedelta(days=14)] \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4last14df["OnsetWithin"] = "14 days"

agactivetop4last7df = activetop4df[activetop4df["Datetime"] >= now - timedelta(days=7)] \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4last7df["OnsetWithin"] = "7 days"

agactivetop4last3df = activetop4df[activetop4df["Datetime"] >= now - timedelta(days=3)] \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4last3df["OnsetWithin"] = "3 days"

appendagactivetop4df = agactivetop4df.append(agactivetop4last14df).append(agactivetop4last7df).append(agactivetop4last3df)

sumactivetop4df = appendagactivetop4df \
.groupby(["Reporting_PHU", "TotalChangePercent", "OnsetWithin", "Age_Group"]) \
.sum() \
.reset_index()

sumactivetop4df["Age Group"] = sumactivetop4df.apply(lambda row: getAgeNumber(row["Age_Group"]), axis=1)
sumactivetop4df["Onset"] = pd.Categorical(sumactivetop4df["OnsetWithin"], ["Any date", "14 days", "7 days", "3 days"])

sumactivetop4df = sumactivetop4df.sort_values(by=["Age Group", "Cases", "Onset"], ascending=[True, False, True])

sumactivetop4df

Unnamed: 0,Reporting_PHU,TotalChangePercent,OnsetWithin,Age_Group,Cases,Age Group,Onset
47,Wellington-Dufferin-Guelph Public Health,3.671706,Any date,<20,8,0,Any date
33,Wellington-Dufferin-Guelph Public Health,3.671706,14 days,<20,8,0,14 days
39,Wellington-Dufferin-Guelph Public Health,3.671706,7 days,<20,4,0,7 days
23,"Kingston, Frontenac and Lennox & Addington Pub...",25.0,Any date,<20,2,0,Any date
13,"Kingston, Frontenac and Lennox & Addington Pub...",25.0,14 days,<20,2,0,14 days
18,"Kingston, Frontenac and Lennox & Addington Pub...",25.0,7 days,<20,2,0,7 days
34,Wellington-Dufferin-Guelph Public Health,3.671706,3 days,<20,2,0,3 days
40,Wellington-Dufferin-Guelph Public Health,3.671706,Any date,20s,9,20,Any date
27,Wellington-Dufferin-Guelph Public Health,3.671706,14 days,20s,7,20,14 days
19,"Kingston, Frontenac and Lennox & Addington Pub...",25.0,Any date,20s,5,20,Any date


In [50]:
#hide_input
activetop4bar = px.bar(sumactivetop4df, y="Cases", x='Age_Group', facet_col="Reporting_PHU", facet_col_wrap=2,
    color="Onset", color_discrete_sequence=["maroon", "darkorange", "yellow", "white"],
    labels={"Onset" : "Onset w/in last", "Age_Group" : "Age Group", "Reporting_PHU" : "PHU"},
    title="Active Cases by Age Group in Top Four Growth Regions",
    hover_name="Age_Group"
    )
activetop4bar.update_layout(barmode='overlay', hovermode="x")

In [51]:
#hide
startdate = "2020-03-01"
fromto = "from " + startdate + " to " + str(maxdate)
agdf = aggdf[aggdf["Date"] >= startdate]
prefix = "Cumulative Confirmed Cases per Ontario Region"
titlelatest = prefix + " up to " + str(maxdate)

In [52]:
#hide_input
dailyline = px.line(agdf, x="Date", y="Count", color="Area", 
                    color_discrete_sequence=["limegreen", "slategray", "magenta", "peru"],
                    hover_name="Reporting_PHU", line_group="Reporting_PHU",
                    title="Daily Confirmed Cases per Ontario Region " + fromto,
       labels={"Reporting_PHU" : "Public Health Unit", "Date" : "Symptom Onset Date"}
       )
dailyline.show()

In [53]:
#hide_input
cumline = px.line(agdf, x="Date", y="Total", color="Area", 
                  color_discrete_sequence=["limegreen", "slategray", "magenta", "peru"],
                  hover_name="Reporting_PHU", line_group="Reporting_PHU",
                  title=prefix + " " + fromto,
        labels={"Reporting_PHU" : "Public Health Unit", "Date" : "Symptom Onset Date"}
)
cumline.show()

In [54]:
#hide
agephuobdf = origdf[origdf["Case_AcquisitionInfo"] == "OB"] \
.groupby(["Age_Group", "Reporting_PHU"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "OB Count"})

agephudf = origdf \
.groupby(["Age_Group", "Reporting_PHU"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Total"})

totalagephudf = origdf \
.groupby(["Reporting_PHU"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agephuobdf = pd.merge(agephuobdf, agephudf, how="right",
left_on=["Age_Group", "Reporting_PHU"], \
right_on=["Age_Group", "Reporting_PHU"]) \
.fillna(0)

agephuobdf = pd.merge(agephuobdf, totalagephudf, how="right",
left_on=["Reporting_PHU"], \
right_on=["Reporting_PHU"]) \
.fillna(0)

agephuobdf["AgeGroup"] = agephuobdf.apply(lambda r: getAgeNumber(r["Age_Group"]), axis=1)
agephuobdf["PercentTotal"] = agephuobdf.apply(lambda r: r["OB Count"]/r["Total"] * 100, axis=1)
agephuobdf["Area"] = agephuobdf.apply(lambda r: getArea(r["Reporting_PHU"]), axis=1)

agephuobdf = agephuobdf.sort_values(by=["Cases", "AgeGroup"], ascending=[False, True])

agephuobdf.head(20)

Unnamed: 0,Age_Group,Reporting_PHU,OB Count,Total,Cases,AgeGroup,PercentTotal,Area
236,UNKNOWN,Toronto Public Health,5.0,8,13026,-1,62.5,GTA
235,<20,Toronto Public Health,83.0,537,13026,0,15.456238,GTA
227,20s,Toronto Public Health,345.0,1747,13026,20,19.74814,GTA
228,30s,Toronto Public Health,513.0,1808,13026,30,28.373894,GTA
229,40s,Toronto Public Health,602.0,1868,13026,40,32.226981,GTA
230,50s,Toronto Public Health,711.0,2115,13026,50,33.617021,GTA
231,60s,Toronto Public Health,547.0,1459,13026,60,37.491432,GTA
232,70s,Toronto Public Health,558.0,986,13026,70,56.592292,GTA
233,80s,Toronto Public Health,1212.0,1444,13026,80,83.933518,GTA
234,90s,Toronto Public Health,985.0,1054,13026,90,93.45351,GTA


In [55]:
#hide_input
px.scatter(agephuobdf, x="Age_Group", y="PercentTotal", 
           color="Area", color_discrete_sequence=["slategray", "magenta", "peru", "limegreen"],
           size="OB Count", size_max=30,
           hover_name="Reporting_PHU", hover_data=["Total"],
           labels={"PercentTotal" : "% Cases Acquired from Inst. Outbreak", "Age_Group" : "Age Group", "OB Count" : "Cases"},
           title="Percentage of Cases Acquired from Institutional Outbreak<br>by Age Group by Public Health Unit"
          )

In [56]:
#hide
fataldf = origdf[(origdf["Age_Group"] != "Unknown") & (origdf["Outcome1"] == "Fatal")] \
.groupby(["Reporting_PHU", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Total Fatal"})

fataldf

Unnamed: 0,Reporting_PHU,Age_Group,Total Fatal
0,Brant County Health Unit,50s,1
1,Brant County Health Unit,60s,1
2,Brant County Health Unit,80s,3
3,Chatham-Kent Health Unit,80s,1
4,Durham Region Health Department,30s,2
...,...,...,...
122,York Region Public Health Services,50s,14
123,York Region Public Health Services,60s,16
124,York Region Public Health Services,70s,38
125,York Region Public Health Services,80s,81


In [57]:
#hide
concludeddf = origdf[(origdf["Age_Group"] != "Unknown") & (origdf["Outcome1"] != "Not Resolved")] \
.groupby(["Reporting_PHU", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Total Concluded"})

concludeddf

Unnamed: 0,Reporting_PHU,Age_Group,Total Concluded
0,Algoma Public Health Unit,20s,3
1,Algoma Public Health Unit,30s,1
2,Algoma Public Health Unit,40s,4
3,Algoma Public Health Unit,50s,5
4,Algoma Public Health Unit,60s,6
...,...,...,...
291,York Region Public Health Services,60s,356
292,York Region Public Health Services,70s,230
293,York Region Public Health Services,80s,294
294,York Region Public Health Services,90s,193


In [58]:
#hide
concludedfataldf = pd.merge(concludeddf, fataldf, how="left", \
left_on=["Reporting_PHU", "Age_Group"], \
right_on=["Reporting_PHU", "Age_Group"]) \
.fillna(0) \
.sort_values(by=["Total Fatal", "Age_Group"], ascending=[False,True]).reset_index()

pattern = re.compile('^[0-9]+s$')
    
concludedfataldf["Age Group"] = concludedfataldf.apply(lambda row: getAgeNumber(row["Age_Group"]), axis=1)
concludedfataldf["% fatal"] = concludedfataldf.apply(lambda row: row["Total Fatal"]/row["Total Concluded"]*100, axis=1)
concludedfataldf["Area"] = concludedfataldf.apply(lambda row: getArea(row["Reporting_PHU"]), axis=1)

concludedfataldf

Unnamed: 0,index,Reporting_PHU,Age_Group,Total Concluded,Total Fatal,Age Group,% fatal,Area
0,265,Toronto Public Health,80s,1380,372.0,80,26.956522,GTA
1,266,Toronto Public Health,90s,1015,347.0,90,34.187192,GTA
2,264,Toronto Public Health,70s,948,196.0,70,20.675105,GTA
3,179,Peel Public Health,80s,354,112.0,80,31.638418,GTA
4,170,Ottawa Public Health,80s,270,106.0,80,39.259259,Ottawa
...,...,...,...,...,...,...,...,...
291,277,Wellington-Dufferin-Guelph Public Health,<20,20,0.0,0,0.000000,Other
292,286,Windsor-Essex County Health Unit,<20,31,0.0,0,0.000000,Windsor-Essex
293,295,York Region Public Health Services,<20,110,0.0,0,0.000000,GTA
294,182,Peel Public Health,UNKNOWN,2,0.0,-1,0.000000,GTA


In [59]:
#hide
totalconcludedfataldf = concludedfataldf.groupby(["Age Group", "Age_Group"]).sum().reset_index()
totalconcludedfataldf["% fatal"] = totalconcludedfataldf.apply(lambda row: row["Total Fatal"]/row["Total Concluded"]*100, axis=1)
totalconcludedfataldf

Unnamed: 0,Age Group,Age_Group,index,Total Concluded,Total Fatal,% fatal
0,-1,UNKNOWN,450,10,0.0,0.0
1,0,<20,4928,1452,1.0,0.068871
2,20,20s,4885,4751,4.0,0.084193
3,30,30s,4919,4496,7.0,0.155694
4,40,40s,4699,4680,20.0,0.42735
5,50,50s,4986,5372,84.0,1.563663
6,60,60s,5020,3772,231.0,6.124072
7,70,70s,4892,2532,476.0,18.799368
8,80,80s,4550,3340,970.0,29.041916
9,90,90s,4331,2360,865.0,36.652542


In [60]:
#hide_input
agedeathsbubble = px.scatter(concludedfataldf, x="Age Group", y="% fatal", 
           hover_data=["Total Concluded"], hover_name="Reporting_PHU",
           color="Area", color_discrete_sequence=["slategray", "magenta", "limegreen", "peru"],
            size='Total Fatal', size_max=45, 
           labels={"% fatal" : "Percent Fatal", "Total Fatal" : "Total Fatalities", "Reporting_PHU" : "Public Health Unit"},
           title="Percent Fatality for Concluded Cases per Age Group per Public Health Unit"
          )
agedeathsbubble.add_trace(go.Scatter(x=totalconcludedfataldf["Age Group"], y=totalconcludedfataldf["% fatal"], mode="lines",
                             name="ON avg", 
                              line=dict(color="black", dash="solid")))

In [61]:
#hide
origdf.groupby(["Case_AcquisitionInfo"]).count()

Unnamed: 0_level_0,Row_ID,Accurate_Episode_Date,Case_Reported_Date,Test_Reported_Date,Specimen_Date,Age_Group,Client_Gender,Outcome1,Outbreak_Related,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude
Case_AcquisitionInfo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
CC,10237,10237,10237,10173,10190,10237,10237,10237,0,10237,10237,10237,10237,10237,10237,10237
No Epi-link,6301,6301,6301,6241,6259,6301,6301,6301,0,6301,6301,6301,6301,6301,6301,6301
No Info-Missing,1154,1154,1154,1091,1097,1154,1154,1154,0,1154,1154,1154,1154,1154,1154,1154
No Info-Unk,1336,1335,1336,1334,1335,1336,1336,1336,0,1336,1336,1336,1336,1336,1336,1336
OB,13906,13905,13906,13688,13741,13906,13906,13906,13906,13906,13906,13906,13906,13906,13906,13906
Travel,1720,1720,1720,1677,1696,1720,1720,1720,8,1720,1720,1720,1720,1720,1720,1720


In [62]:
#hide

acqdf = origdf \
.groupby(["Case_AcquisitionInfo", "Reporting_PHU"]).count()["Row_ID"].reset_index().rename(columns={"Row_ID" : "Count"})
acqdf["Acquisition"] = acqdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)

acqdf = pd.merge(origdf.groupby(["Reporting_PHU"]).count()["Row_ID"].reset_index().rename(columns={"Row_ID" : "Total"}), \
                 acqdf, how="left", \
left_on=["Reporting_PHU"], \
right_on=["Reporting_PHU"]) \
.fillna(0)

acqdf["Region"] = acqdf.apply(lambda r: getRegion(r["Reporting_PHU"]), axis=1)
acqdf["Acquisition"] = pd.Categorical(acqdf["Acquisition"], acqOrder)
acqdf = acqdf.sort_values(by=["Total", "Acquisition"], ascending=[False, True])

acqdf

Unnamed: 0,Reporting_PHU,Total,Case_AcquisitionInfo,Count,Acquisition,Region
154,Toronto Public Health,13026,Travel,374,Travel,Toronto
149,Toronto Public Health,13026,CC,3393,Confirmed case,Toronto
150,Toronto Public Health,13026,No Epi-link,1990,Comm. spread,Toronto
153,Toronto Public Health,13026,OB,5561,Inst. outbreak,Toronto
152,Toronto Public Health,13026,No Info-Unk,1245,Unknown,Toronto
...,...,...,...,...,...,...
2,Algoma Public Health Unit,24,No Info-Unk,1,Unknown,Algoma
148,Timiskaming Health Unit,18,Travel,2,Travel,Timiskaming
145,Timiskaming Health Unit,18,CC,6,Confirmed case,Timiskaming
146,Timiskaming Health Unit,18,No Epi-link,3,Comm. spread,Timiskaming


In [63]:
#hide_input
phuacqbar = px.bar(acqdf, y="Region", x="Count", color="Acquisition", orientation="h", height=800, 
      color_discrete_sequence=acqColours,
      labels={"Region" : "Public Health Unit", "Count" : "Total Confirmed Cases"},
      title="Total Confirmed Cases per Public Health Unit up to " + str(maxdate))

phuacqbar.update_layout(yaxis=dict(autorange="reversed"))

In [64]:
#hide_input
fig = px.scatter_mapbox(agdf, lat="Reporting_PHU_Latitude", lon="Reporting_PHU_Longitude",     
                        color="Confirmed Percent Travelled", 
                        animation_frame="Date",
                        size="Total", hover_name="Reporting_PHU",
                        color_continuous_scale=["red", "blue"],
                        range_color=[0,100],
                        size_max=40, zoom=5.9, 
                        title="Cumulative Cases per Region over Time " + fromto,
                        center=dict(lat=44.5,lon=-78.4),
                        height=800,
                        labels={'Confirmed Percent Travelled':'% travelled', "Date" : "Symptom Onset Date"})
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [65]:
#hide_input
HTML(f'Data was last retrieved from <a href="https://data.ontario.ca/en/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/455fd63b-603d-4608-8216-7d8647f43350" title="Ontario cases">Confirmed positive cases of COVID19 in Ontario</a> and <a href="https://data.ontario.ca/dataset/status-of-covid-19-cases-in-ontario/resource/ed270bb8-340b-41f9-a7c6-e8ef587e6d11" title="Ontario testing status">Status of COVID-19 cases in Ontario</a> on {refreshtime.strftime("%Y-%m-%d %H:%M")}.')