# Ontario COVID-19 data visualizations of confirmed cases
> "Maps and graphs of confirmed COVID-19 cases in Ontario, Canada"

- author: <a href=https://github.com/anomal/>Sophiah Ho</a> 
- categories: [ontario, canada, covid-19, province, hotspot, toronto, waterloo, ottawa, peel, york region, halton, testing]
- image: images/ontario_confirmed_map.png
- permalink: /ontario-confirmed-cases-per-region/
- toc: true

In [1]:
#hide
import urllib, json, os, time
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import dateutil.parser
import re
from IPython.display import HTML
from datetime import datetime, timedelta, timezone

url = 'https://data.ontario.ca/dataset/f4112442-bdc8-45d2-be3c-12efae72fb27/resource/455fd63b-603d-4608-8216-7d8647f43350/download/conposcovidloc.csv'  
cachefilename = "data_files/ontario-cases-data.cache.csv"
try:
    origdf = pd.read_csv(url)
    origdf.to_csv(cachefilename, index=False)
    refreshtime = datetime.now()
    cached=False
except:
    cached=True
    origdf = pd.read_csv(cachefilename)

df = origdf[(origdf["Accurate_Episode_Date"] < "2100") & (origdf["Accurate_Episode_Date"] > "2020")]
df

FileNotFoundError: [Errno 2] File data_files/ontario-cases-data.cache.csv does not exist: 'data_files/ontario-cases-data.cache.csv'

In [None]:
#hide
df \
.groupby(["Accurate_Episode_Date"]) \
.count()

In [None]:
#hide

mindate = dateutil.parser.parse(df["Accurate_Episode_Date"].min()).date()
maxdate = dateutil.parser.parse(df["Accurate_Episode_Date"].max()).date()

In [None]:
#hide_input

if cached:
    errormsg = 'An error occurred retrieving data from <a href="https://data.ontario.ca/en/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/455fd63b-603d-4608-8216-7d8647f43350" title="Ontario data">Confirmed positive cases of COVID19 in Ontario</a>. Cached data is used. '
else:
    errormsg = ''

HTML(f"{errormsg}The graphs are from either Ontario status of confirmed cases data, where the date used is the client's symptom onset date, or Ontario testing status data, where the date used is the reported date (rep. date). Data based on Symptom Onset Date is lagging and can be updated retroactively. Most of the graphs are interactive. You can toggle off figures by clicking on the item in the legend, or select an area on the graph to zoom in.")

In [None]:
#hide
activedf = df[df["Outcome1"] == "Not Resolved"]

activedatetimes = activedf.apply(lambda row: dateutil.parser.parse(row["Accurate_Episode_Date"]), axis=1)

activedf = activedf.assign(Datetime=activedatetimes.values)

activedf

In [None]:
#hide
agactivedf = activedf \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivedf["OnsetWithin"] = "Any date"

agactivedf

In [None]:
#hide
now = datetime.now()
agactivelast14df = activedf[activedf["Datetime"] >= now - timedelta(days=14)] \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivelast14df["OnsetWithin"] = "14 days"

agactivelast14df

In [None]:
#hide
agactivelast7df = activedf[activedf["Datetime"] >= now - timedelta(days=7)] \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivelast7df["OnsetWithin"] = "7 days"

agactivelast7df

In [None]:
#hide
agactivelast3df = activedf[activedf["Datetime"] >= now - timedelta(days=3)] \
.groupby(["Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivelast3df["OnsetWithin"] = "3 days"

agactivelast3df

In [None]:
#hide
appendagactivedf = agactivedf.append(agactivelast14df).append(agactivelast7df).append(agactivelast3df)
appendagactivedf

## Hotspots

In [None]:
#hide_input
activemap = px.scatter_mapbox(appendagactivedf, lat="Reporting_PHU_Latitude", lon="Reporting_PHU_Longitude",  
            color="OnsetWithin", 
            color_discrete_sequence=["maroon", "darkorange", "yellow", "white"], 
            size="Cases", hover_name="Reporting_PHU", 
            size_max=28, zoom=5.4, 
            center=dict(lat=45,lon=-79.4), 
            height=800, 
            labels={"OnsetWithin" : "Onset w/in last"}, 
            title="Currently Active (Not Resolved) Confirmed Cases per Public Health Unit" )
activemap.update_layout(mapbox_style="open-street-map")
if not cached:
    activemap.show()

In [None]:
#hide
import re

traveldf = df[df["Case_AcquisitionInfo"] == "Travel"] \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Travelled_Count"}) 

pendingdf = df[(df["Case_AcquisitionInfo"] == "No Info-Missing") | (df["Case_AcquisitionInfo"] == "No Info-Unk")] \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Pending_Count"}) 

countdf = df \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Count"}) 
countdf

joindf = pd.merge(countdf, traveldf, how="left", \
left_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)

joindf = pd.merge(joindf, pendingdf, how="left", \
left_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)
                  
joindf["Datetime"] = joindf.apply(lambda row: dateutil.parser.parse(row["Accurate_Episode_Date"]), axis=1)

phudf = countdf.groupby(by=["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"], as_index=False) \
    .first()[["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"]]
phudf["key"] = 0

dates = pd.date_range(mindate, maxdate).tolist()
datetimedf = pd.DataFrame(dates, columns=["Datetime"])
datetimedf["key"] = 0

cartesianproddf = pd.merge(datetimedf, phudf, on="key")
cartesianproddf = cartesianproddf.drop(columns=['key'])

joindf = pd.merge(cartesianproddf, joindf, how="left",
left_on=["Datetime", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Datetime", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)

joindf["Date"] = joindf.apply(lambda row: str(row["Datetime"].date()), axis=1)

aggdf = joindf[["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", \
                "Count", "Travelled_Count", "Pending_Count"]].sort_values(by=["Date","Reporting_PHU"]).reset_index(drop=True)

cumsumdf = joindf \
.groupby(["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]).sum() \
.groupby(level=1).cumsum() \
.reset_index() \
.rename(columns={"Count" : "Total", "Travelled_Count" : "Total Travelled", \
                 "Pending_Count" : "Total Information Pending"}) 

aggdf = pd.merge(aggdf, cumsumdf, how="left", \
left_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) 

incdf = aggdf \
.groupby(["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]).sum() \
.groupby(level=1).pct_change().mul(100) \
.reset_index() \
.rename(columns={"Total" : "Percent Increase"}) \
[["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", "Percent Increase"]]

aggdf = pd.merge(aggdf, incdf, how="left", \
left_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) 

def cumulativeNotTravelled(total, totalTravelled, totalPending):
    return total - totalTravelled - totalPending

def cumulativePercentTravelled(total, totalTravelled, totalPending):
    denominator = total - totalPending
    if denominator != 0:
        return 100 * totalTravelled / denominator
    else:
        return 0

def getRegion(phu_name):
    region = re.sub("(, )?(((&|and) )?District|Health|Public).+$", "", phu_name).replace(" and ", " & ")
    if region.startswith("Hastings & Prince Edward Counties"):
        return "Hastings & P.E. Counties"
    elif region.startswith("North Bay Parry Sound District"):
        return "North Bay Parry Sound"
    elif region.startswith("Kingston, Frontenac"):
        return "Kingston, Front./Len., Add."
    elif region.startswith("Haliburton, Kawartha"):
        return "Haliburton, Kawar., Pine R."
    else:    
        return region
    
def getArea(phu_name):
    if phu_name == "Toronto Public Health" or phu_name == "York Region Public Health Services" \
        or phu_name == "Peel Public Health" or phu_name == "Durham Region Health Department" \
        or phu_name == "Halton Region Health Department" :
            return "GTA"
    elif phu_name == "Ottawa Public Health":
        return "Ottawa"
    elif phu_name == "Windsor-Essex County Health Unit":
        return "Windsor-Essex"
    else:
        return "Other"

aggdf["Total Not Travelled"] = aggdf.apply(lambda row: cumulativeNotTravelled(row["Total"], row["Total Travelled"], row["Total Information Pending"]), axis=1) 
aggdf["Confirmed Percent Travelled"] = aggdf.apply(lambda row: cumulativePercentTravelled(row["Total"], row["Total Travelled"], row["Total Information Pending"]), axis=1)
aggdf["Region"] = aggdf.apply(lambda row: getRegion(row["Reporting_PHU"]), axis=1)
aggdf["Area"] = aggdf.apply(lambda row: getArea(row["Reporting_PHU"]), axis=1)
aggdf

In [None]:
#hide
def getAgeNumber(ageGroup):
    if ageGroup == "UNKNOWN":
        return -1
    elif ageGroup == "<20":
        return 0
    elif re.compile("^[0-9]+").match(ageGroup):
        return int(ageGroup[:-1])
    else:
        return -2

In [None]:
#hide
latestdf = aggdf[aggdf["Date"] == str(maxdate)].sort_values(by=["Count"])
latestdf

In [None]:
#hide
import datetime as dt
last7days = maxdate - dt.timedelta(days=7)
last7df = aggdf[aggdf["Date"] == str(last7days)].rename(columns={"Total" : "TotalLast7"}) 

def getTotalChangePercent(totalChange, totalLast7):
    if totalLast7 == 0:
        return 100
    else:
        return totalChange / totalLast7 * 100

last7deltadf = pd.merge(latestdf.rename(columns={"Total" : "TotalLatest"}) , last7df, how="left", \
left_on=["Region", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Region", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
[["Region", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", "TotalLatest", "TotalLast7"]]
last7deltadf["TotalChange"] = last7deltadf.apply(lambda row: row["TotalLatest"] - row["TotalLast7"], axis=1)
last7deltadf["TotalChangePercent"] = last7deltadf.apply(lambda row: getTotalChangePercent(row["TotalChange"], row["TotalLast7"]), axis=1)
last7deltadf = last7deltadf.sort_values(by=["TotalChange", "TotalChangePercent"], ascending=[True,True]).reset_index()
last7deltadf

In [None]:
#hide_input
px.bar(last7deltadf, y='Region', x='TotalChange',
             hover_name="Reporting_PHU", color='TotalChangePercent',
             labels={"Reporting_PHU":"Public Health Unit", "TotalChangePercent":"% increase", "TotalChange":"Increase in cases", "Region" : "Public Health Unit"},
             orientation="h", height=800,
             title="Increase in cases for week of " + str(last7days) + " to " + str(maxdate)
    )

## Ontario-wide

### Cases

In [None]:
#hide
outcomestartdate = "2020-02-15"

outcomedf = df[df["Accurate_Episode_Date"] >= outcomestartdate + "T00:00:00"] \
.groupby(["Accurate_Episode_Date", "Outcome1"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Cases"})

outcomedf["Outcome"] = pd.Categorical(outcomedf["Outcome1"], ["Fatal", "Resolved", "Not Resolved"])

outcomedf = outcomedf.sort_values(by="Outcome")

outcomedf

In [None]:
#hide
teststatusurl = "https://data.ontario.ca/dataset/f4f86e54-872d-43f8-8a86-3892fd3cb5e6/resource/ed270bb8-340b-41f9-a7c6-e8ef587e6d11/download/covidtesting.csv"
teststatuscache = "data_files/ontario-teststatus-data.cache.csv"
try:
    teststatusdf = pd.read_csv(teststatusurl)
    teststatusdf.to_csv(teststatuscache, index=False)
except:
    teststatusdf = pd.read_csv(teststatuscache)
teststatusdf

In [None]:
#hide
testconfirmeddf = teststatusdf.copy() \
[["Reported Date", "Total Cases", "Deaths", "Total tests completed in the last day", "Total Positive LTC Resident Cases", "Total Positive LTC HCW Cases"]]

testconfirmeddf["ReportedDate"] = testconfirmeddf.apply(lambda r: dateutil.parser.parse(r["Reported Date"]), axis=1)
testconfirmeddf["New Cases"] = testconfirmeddf["Total Cases"].fillna(0).diff()
testconfirmeddf["New Deaths"] = testconfirmeddf["Deaths"].fillna(0).diff()
testconfirmeddf["New LTC Resident Cases"] = testconfirmeddf["Total Positive LTC Resident Cases"].diff()
testconfirmeddf["New LTC HCW Cases"] = testconfirmeddf["Total Positive LTC HCW Cases"].diff()
testconfirmeddf["Total Tests"] = testconfirmeddf["Total tests completed in the last day"].cumsum()

diffDays = 5

testconfirmeddf["Cases Inc 5d"] = testconfirmeddf["Total Cases"].fillna(0).diff(periods=diffDays)
testconfirmeddf["Tests Inc 5d"] = testconfirmeddf["Total Tests"].fillna(0).diff(periods=diffDays)
testconfirmeddf["New Cases Mean 5d"] = testconfirmeddf["New Cases"].rolling(diffDays).mean()
testconfirmeddf["New Tests Mean 5d"] = testconfirmeddf["Total tests completed in the last day"].rolling(diffDays).mean()
testconfirmeddf["New Deaths Mean 5d"] = testconfirmeddf["New Deaths"].rolling(diffDays).mean()

def getPercentPositive(numPos, numTests):
    if numTests == 0:
        return 0
    else:
        return numPos/numTests * 100

testconfirmeddf["Daily Percent Tested Positive"] = testconfirmeddf.apply(lambda r: getPercentPositive(r["New Cases"], r["Total tests completed in the last day"]), axis=1)
testconfirmeddf["Total Percent Tested Positive"] = testconfirmeddf.apply(lambda r: getPercentPositive(r["Total Cases"], r["Total Tests"]), axis=1)
testconfirmeddf["5d Avg Percent Tested Positive"] = testconfirmeddf.apply(lambda r: getPercentPositive(r["Cases Inc 5d"], r["Tests Inc 5d"]), axis=1)
testconfirmeddf["Diff 5d avg and avg of avgs"] = testconfirmeddf.apply(lambda r: r["5d Avg Percent Tested Positive"] - getPercentPositive(r["New Cases Mean 5d"], r["New Tests Mean 5d"]), axis=1)

testconfirmeddf

In [None]:
#hide
testconfirmeddf[testconfirmeddf["Diff 5d avg and avg of avgs"] != 0] \
[["Reported Date", "Cases Inc 5d", "Tests Inc 5d", "New Cases Mean 5d", "New Tests Mean 5d", "5d Avg Percent Tested Positive", "Diff 5d avg and avg of avgs"]]

In [None]:
#hide
casesdf = df[df["Accurate_Episode_Date"] >= outcomestartdate] \
.groupby(["Accurate_Episode_Date"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Count"})

casesdf["Count 5d Mean right"] = casesdf["Count"].rolling(diffDays).mean()
casesdf["Count 5d Mean"] = casesdf["Count"].rolling(diffDays, center=True).mean()
casesdf["Count 5d Mean - 1d"] = casesdf.shift(periods=-1)["Count 5d Mean"]
casesdf["Count 5d Mean - 5d"] = casesdf.shift(periods=-5)["Count 5d Mean"]
casesdf["Count 5d Mean - 14d"] = casesdf.shift(periods=-14)["Count 5d Mean"]

casesdf

In [None]:
#hide_input
totalbar = px.bar(outcomedf, y="Cases", x="Accurate_Episode_Date",
             hover_data=["Outcome1"], 
             color="Outcome1", color_discrete_sequence=["#ff6666", "#b0e8b0", "#ffd11a"],
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Outcome1" : "By onset date"},
             title="Ontario Confirmed Cases by Symptom Onset Date<br>by Outcome from " + outcomestartdate + " to " + str(maxdate)
    )
totalbar.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean right"], mode="lines",
                             name="5d avg", 
                              line=dict(color="black", dash="solid")))
totalbar.add_trace(go.Scatter(x=testconfirmeddf["Reported Date"], y=testconfirmeddf["New Cases Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="silver", dash="dot")))

In [None]:
#hide

def getAcquisition(info):
    if info == "CC":
        return "Confirmed case"
    elif info == "No Epi-link":
        return "Comm. spread"
    elif info == "OB":
        return "Inst. outbreak"
    elif info == "No Info-Missing":
        return "Missing info"
    elif info == "No Info-Unk" :
        return "Unknown"
    else:
        return info

acqOrder = ["Travel", "Confirmed case", "Comm. spread", "Inst. outbreak", "Unknown", "Missing info"]
acqColours = ["mediumblue", "#ff6666", "red", "#990000", "grey", "silver"]

onacqdf = origdf[origdf["Accurate_Episode_Date"] >= outcomestartdate] \
.groupby(["Accurate_Episode_Date", "Case_AcquisitionInfo"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Count"}) 


onacqdf["Acquisition"] = onacqdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)
onacqdf["Acquisition"] = pd.Categorical(onacqdf["Acquisition"], acqOrder)
onacqdf = onacqdf.sort_values(by="Acquisition")
onacqdf

In [None]:
#hide_input

onacqbar = px.bar(onacqdf, y="Count", x="Accurate_Episode_Date",
             #hover_data=["Outcome1"], 
             color="Acquisition", color_discrete_sequence=acqColours,
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Outcome1" : "By onset date"},
             title="Ontario Confirmed Cases by Symptom Onset Date<br>by Acquisition from " + outcomestartdate + " to " + str(maxdate)
    )
onacqbar.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean right"], mode="lines",
                             name="5d avg", 
                              line=dict(color="black", dash="solid")))
onacqbar.add_trace(go.Scatter(x=testconfirmeddf["Reported Date"], y=testconfirmeddf["New Cases Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="darkgrey", dash="dot")))

In [None]:
#hide
def getInfectionSource(info):
    if info.startswith("No Info"):
        return "No info"
    elif info == "CC" or info == "No Epi-link":
        return "Other"
    elif info == "OB":
        return "Inst. outbreak"
    else:
        return info

onacqdf["Infection Source"] = onacqdf.apply(lambda r: getInfectionSource(r["Case_AcquisitionInfo"]), axis=1)

onacqlinedf = onacqdf.groupby(["Accurate_Episode_Date", "Infection Source"]).sum() \
.reset_index()
onacqlinedf

In [None]:
#hide_input

rollingbardf = testconfirmeddf[testconfirmeddf["ReportedDate"] >= "2020-03-01"]

rollingbar = px.bar(rollingbardf, 
       x="ReportedDate", y="New Cases", 
       color="5d Avg Percent Tested Positive", range_color=[0,8],
       color_continuous_scale=px.colors.sequential.Inferno,
       hover_data=["New Cases", "Total tests completed in the last day"],
       labels={"5d Avg Percent Tested Positive" : "% pos. 5d avg", "New Cases" : "Cases", "ReportedDate" : "Reported Date"},
       title="Ontario Confirmed Cases by Reported Date<br>by Percent Tests per Day with Positive Result (5-Day Rolling Average)",
      )
rollingbar.update_layout(bargap=0)
rollingbar.add_trace(go.Scatter(x=rollingbardf["ReportedDate"], y=rollingbardf["New Cases Mean 5d"], mode="lines",
                             name="Cases 5d Avg", showlegend=False,
                              line=dict(color="darkgrey", dash="solid")))

In [None]:
#hide_input
HTML('For the graph above, data for "Total tests completed in the last day" is not available before 15 Apr 2020.')

In [None]:
#hide_input

testeddf = testconfirmeddf[(testconfirmeddf["Total tests completed in the last day"].notnull()) & (testconfirmeddf["Total tests completed in the last day"] != 0)]

testsbar = px.bar(testeddf, 
       x="ReportedDate", y="Total tests completed in the last day", 
       color="5d Avg Percent Tested Positive", range_color=[0,8],
       color_continuous_scale=px.colors.sequential.Inferno,
       hover_data=["New Cases", "Total tests completed in the last day"],
       labels={"5d Avg Percent Tested Positive" : "% pos. 5d avg", "New Cases" : "Cases", "ReportedDate" : "Reported Date"},
       title="Ontario Tests Completed in the Last Day by Reported Date<br>by Percent Tests per Day with Positive Result (5-Day Rolling Average)",
      )

testsbar.update_layout(bargap=0, annotations=[
        dict(
            x="2020-04-24",
            y=12295,
            xref="x",
            yref="y",
            text="Apr 24 peak<br>in reported<br>confirmed cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-80
        ),
        dict(
            x="2020-05-24",
            y=11383,
            xref="x",
            yref="y",
            text="May 24<br>peak in<br>reported<br>confirmed<br>cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-100
        ),
        dict(
            x="2020-06-06",
            y=11383,
            xref="x",
            yref="y",
            text="Jun 6<br>peak in<br>reported<br>confirmed<br>cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-180
        )
    ]
                      )
testsbar.add_trace(go.Scatter(x=testeddf["ReportedDate"], y=testeddf["New Tests Mean 5d"], mode="lines",
                             name="Tests 5d Avg", showlegend=False,
                              line=dict(color="deepskyblue", dash="solid")))

In [None]:
#hide_input
import math

posline = px.line(testeddf, 
       x="ReportedDate", y="5d Avg Percent Tested Positive", log_y=True,
       hover_data=["New Cases", "Total tests completed in the last day"],
       labels={"5d Avg Percent Tested Positive" : "Percent Tests Positive (5d avg)", "ReportedDate" : "Reported Date"},
       title="Percentage of Ontario Tests per Day with SARS-CoV-2 Positive Result<br>(5-Day Rolling Average) by Reported Date",
        color_discrete_sequence=["red"]
      )
posline.update_layout(annotations=[
        dict(
            x="2020-04-24",
            y=math.log(5.773, 10),
            xref="x",
            yref="y",
            text="Apr 24 peak <br>in reported<br>confirmed cases",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-60
        ),
        dict(
            x="2020-05-24",
            y=math.log(4.103, 10),
            xref="x",
            yref="y",
            text="May 24 peak<br>in reported<br>confirmed cases",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-60
        ),
        dict(
            x="2020-06-06",
            y=math.log(testeddf[testeddf["ReportedDate"] == "2020-06-06"]["5d Avg Percent Tested Positive"], 10),
            xref="x",
            yref="y",
            text="Jun 6<br>peak in<br>reported<br>confirmed<br>cases",
            showarrow=True,
            arrowhead=1,
            ax=0,
            ay=-100
        )
    ]
)

In [None]:
#hide
ageactivedf = activedf[activedf["Age_Group"] != "Unknown"]

aggageactivedf = ageactivedf \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivedf["OnsetWithin"] = "Any date"

aggageactivelast14df = ageactivedf[ageactivedf["Datetime"] >= now - timedelta(days=14)] \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivelast14df["OnsetWithin"] = "14 days"

aggageactivelast7df = ageactivedf[ageactivedf["Datetime"] >= now - timedelta(days=7)] \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivelast7df["OnsetWithin"] = "7 days"

aggageactivelast3df = ageactivedf[ageactivedf["Datetime"] >= now - timedelta(days=3)] \
.groupby(["Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

aggageactivelast3df["OnsetWithin"] = "3 days"

appendaggageactivedf = aggageactivedf.append(aggageactivelast14df).append(aggageactivelast7df).append(aggageactivelast3df)
appendaggageactivedf

In [None]:
#hide_input

sumageactivedf = appendaggageactivedf \
.groupby(["OnsetWithin", "Age_Group"]) \
.sum() \
.reset_index()

sumageactivedf["Age Group"] = sumageactivedf.apply(lambda row: getAgeNumber(row["Age_Group"]), axis=1)
sumageactivedf["Onset"] = pd.Categorical(sumageactivedf["OnsetWithin"], ["Any date", "14 days", "7 days", "3 days"])

sumageactivedf = sumageactivedf.sort_values(by=["Onset", "Age Group"])


activeagebar = px.bar(sumageactivedf, y="Cases", x='Age_Group',
    color="Onset", color_discrete_sequence=["maroon", "darkorange", "yellow", "white"],
    labels={"Onset" : "Onset w/in last", "Age_Group" : "Age Group"},
    title="Currently Active Confirmed Cases by Age Group",
  hover_name="Age_Group"

    )
activeagebar.update_layout(barmode='overlay', hovermode="x")

In [None]:
#hide_input

HTML("The incubation period between SARS-CoV-2 infection to COVID-19 symptom onset is 1 to 14 days, with an average of 5 days. The graph below explores whether there is a relationship between confirmed cases by offset Symptom Onset Date and notable social events.")

In [None]:
#hide

events = [
    { "name" : "Good Friday", "date" : "2020-04-10" },
    { "name" : "Easter Monday", "date" : "2020-04-13", "ax" : 40 },
    { "name" : "Mother's Day", "date" : "2020-05-10"},
    { "name" : "Victoria Day", "date" : "2020-05-18"},
    { "name" : "Trinity Bellwoods<br>Park weekend", "date" : "2020-05-23", "ay" : -200},
    { "name" : "Father's Day", "date" : "2020-06-21"},
    { "name" : "Canada Day", "date" : "2020-07-01"},
    { "name" : "Civic Holiday", "date" : "2020-08-03"},
]

maxinfectiondate = casesdf[casesdf["Count 5d Mean - 5d"].notnull()]["Accurate_Episode_Date"].max()

annotations = []
for event in events:
    if event["date"] <= maxinfectiondate:
        ax = event.get("ax")
        if ax is None:
            ax = 0
        ay = event.get("ay")
        if ay is None:
            ay = -60
        annotations.append(dict(
            x=event["date"],
            y=casesdf[casesdf["Accurate_Episode_Date"] == event["date"]]["Count 5d Mean - 5d"].values[0],
            xref="x",
            yref="y",
            text=event["name"],
            showarrow=True,
            arrowhead=7,
            ax=ax,
            ay=ay
        ))
    
annotations

In [None]:
#hide_input
infectionline = go.Figure()
infectionline.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean - 1d"], mode="lines",
                              showlegend=True, name="-1d (min)",
                              line=dict(color="pink", dash="dot")))
infectionline.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean - 14d"], mode="lines",
                              showlegend=True, name="-14d (max)",
                              line=dict(color="thistle", dash="dot")))
infectionline.add_trace(go.Scatter(x=casesdf["Accurate_Episode_Date"], y=casesdf["Count 5d Mean - 5d"], mode="lines",
                              showlegend=True, name="-5d (avg)",
                              line=dict(color="hotpink", dash="dot")))
infectionline.update_layout(annotations=annotations, 
                            title="Confirmed Cases (5-Day Rolling Average) by Projected Infection Date<br>Based on Symptom Onset Date",
                           xaxis_title="Projected Infection Date based on Symptom Onset Date", yaxis_title="Confirmed Cases (5d Average)",
                           legend_title_text="Onset date offset")

### Deaths

In [None]:
#hide
ageacqdeathdf = origdf[origdf["Outcome1"] == "Fatal"] \
.groupby(["Age_Group", "Case_AcquisitionInfo"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Deaths"})

ageacqdeathdf["AgeGroup"] = ageacqdeathdf.apply(lambda r: getAgeNumber(r["Age_Group"]), axis=1)
ageacqdeathdf["Acquisition"] = ageacqdeathdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)
ageacqdeathdf["Acquisition"] = pd.Categorical(ageacqdeathdf["Acquisition"], acqOrder)
ageacqdeathdf = ageacqdeathdf.sort_values(by=["AgeGroup", "Acquisition"])
ageacqdeathdf

ageacqdeathdf

In [None]:
#hide

deathsdf = df[df["Outcome1"] == "Fatal"] \
.groupby(["Accurate_Episode_Date", "Age_Group"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Cases"})

deathsdf["Age Group"] = deathsdf.apply(lambda r: getAgeNumber(r["Age_Group"]), axis=1)

deathsdf = deathsdf.sort_values(by=["Age Group", "Accurate_Episode_Date"])

deathsdf

In [None]:
#hide
testsfirstdeath = testconfirmeddf[testconfirmeddf["New Deaths"] > 0]["Reported Date"].min()

testsdeathsdf = testconfirmeddf[testconfirmeddf["Reported Date"] >= testsfirstdeath]

In [None]:
#hide
deathacqdf = df[df["Outcome1"] == "Fatal"] \
.groupby(["Accurate_Episode_Date", "Case_AcquisitionInfo"]).count() \
["Row_ID"].reset_index().rename(columns={"Row_ID" : "Count"})

deathacqdf["Acquisition"] = deathacqdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)
deathacqdf["Acquisition"] = pd.Categorical(deathacqdf["Acquisition"], acqOrder)
deathacqdf = deathacqdf.sort_values(by="Acquisition")
deathacqdf

In [None]:
#hide_input

deathacqbar = px.bar(deathacqdf, y="Count", x="Accurate_Episode_Date",
             color="Acquisition", 
             color_discrete_sequence=acqColours,
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Count" : "Deaths"},
             title="Ontario COVID-19 Deaths by Symptom Onset Date<br>by Acquisition"
    )

deathacqbar.add_trace(go.Scatter(x=testsdeathsdf["Reported Date"], y=testsdeathsdf["New Deaths Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="black", dash="dot")))

In [None]:
#hide_input

deathbar = px.bar(deathsdf, y="Cases", x="Accurate_Episode_Date",
             color="Age_Group", 
             color_discrete_sequence=["maroon", "red", "darkorange", "yellow", "#b3ffb3", "turquoise", "#3385ff", "#9999ff", "#dab3ff", "white"],
             labels={"Accurate_Episode_Date" : "Symptom Onset Date", "Age_Group" : "Age Group", "Cases" : "Deaths"},
             title="Ontario COVID-19 Deaths by Symptom Onset Date<br>by Age Group"
    )

deathbar.add_trace(go.Scatter(x=testsdeathsdf["Reported Date"], y=testsdeathsdf["New Deaths Mean 5d"], mode="lines",
                             name="By rep dt avg", 
                              line=dict(color="black", dash="dot")))

In [None]:
#hide
deathdemdf = teststatusdf[teststatusdf["Reported Date"].notnull()] \
[["Reported Date", "Deaths", "Total LTC Resident Deaths", "Total LTC HCW Deaths"]].copy()

deathdemdf["ReportedDate"] = deathdemdf.apply(lambda r: dateutil.parser.parse(r["Reported Date"]), axis=1)
deathdemdf["Total Deaths"] = deathdemdf["Deaths"].fillna(0)
deathdemdf["New Deaths"] = deathdemdf["Total Deaths"].diff()
deathdemdf["New LTC Resident Deaths"] = deathdemdf["Total LTC Resident Deaths"].diff()
deathdemdf["New LTC HCW Deaths"] = deathdemdf["Total LTC HCW Deaths"].diff()

deathdemdf["New Other Deaths"] = deathdemdf.apply(lambda r: r["New Deaths"] - r["New LTC Resident Deaths"] - r["New LTC HCW Deaths"], axis=1)

deathdemdf

In [None]:
#hide
unknowndeathsdf = deathdemdf[(deathdemdf["New Other Deaths"].isnull()) | (deathdemdf["New Other Deaths"] < 0) | (deathdemdf["New LTC Resident Deaths"] < 0)].copy()

unknowndeathsdf["Situation"] = "Unknown"
unknowndeathsdf["DeathCount"] = unknowndeathsdf["New Deaths"]

unknowndeathsdf = unknowndeathsdf[(unknowndeathsdf["DeathCount"].notnull()) & (unknowndeathsdf["DeathCount"] != 0)] \
[["ReportedDate", "Situation", "DeathCount"]]
unknowndeathsdf

In [None]:
#hide
sitdeathsdf =  deathdemdf[(deathdemdf["New Other Deaths"].notnull()) & (deathdemdf["New Other Deaths"] >= 0) & (deathdemdf["New LTC Resident Deaths"] >= 0)]

otherdeathsdf = sitdeathsdf.copy()

otherdeathsdf["Situation"] = "Other"

otherdeathsdf["DeathCount"] = otherdeathsdf.apply(lambda r: r["New Deaths"] - r["New LTC Resident Deaths"] - r["New LTC HCW Deaths"], axis=1)

otherdeathsdf = otherdeathsdf[["ReportedDate", "Situation", "DeathCount"]]

otherdeathsdf

In [None]:
#hide
residentdeathsdf = sitdeathsdf.copy()
residentdeathsdf["Situation"] = "LTC Resident"
residentdeathsdf["DeathCount"] = residentdeathsdf["New LTC Resident Deaths"]
residentdeathsdf = residentdeathsdf[["ReportedDate", "Situation", "DeathCount"]]
residentdeathsdf

In [None]:
#hide
lhcwdeathsdf = sitdeathsdf.copy()
lhcwdeathsdf["Situation"] = "LTC HCW"
lhcwdeathsdf["DeathCount"] = lhcwdeathsdf["New LTC HCW Deaths"]
lhcwdeathsdf = lhcwdeathsdf[["ReportedDate", "Situation", "DeathCount"]]
lhcwdeathsdf

In [None]:
#hide
situationdeathsdf = unknowndeathsdf.append(residentdeathsdf).append(lhcwdeathsdf).append(otherdeathsdf)
situationdeathsdf

In [None]:
#hide_input

testsdeathsavg = testconfirmeddf[(testconfirmeddf["New Deaths Mean 5d"].notnull()) & (testconfirmeddf["New Deaths Mean 5d"] != 0)]

sitdeathsbar = px.bar(situationdeathsdf, x="ReportedDate", y="DeathCount",
      color="Situation",
      color_discrete_sequence=["silver", "pink", "lightskyblue", "#ff9999"],
    title="Ontario COVID-19 Deaths by Reported Date",
                      labels={"DeathCount" : "Deaths"}
      )
sitdeathsbar.add_trace(go.Scatter(x=testsdeathsavg["ReportedDate"], y=testsdeathsavg["New Deaths Mean 5d"], mode="lines",
                             name="5d avg", 
                              line=dict(color="black", dash="solid")))

## Regional by Public Health Unit

In [None]:
#hide
top4growth = last7deltadf.sort_values(by=["TotalChangePercent"], ascending=[False]) \
.head(4)[["Reporting_PHU", "TotalChangePercent"]].reset_index()

activetop4df = pd.merge(top4growth, ageactivedf, how="left", \
left_on=["Reporting_PHU"], \
right_on=["Reporting_PHU"])

activetop4df

In [None]:
#hide
agactivetop4df = activetop4df \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4df["OnsetWithin"] = "Any date"

agactivetop4last14df = activetop4df[activetop4df["Datetime"] >= now - timedelta(days=14)] \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4last14df["OnsetWithin"] = "14 days"

agactivetop4last7df = activetop4df[activetop4df["Datetime"] >= now - timedelta(days=7)] \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4last7df["OnsetWithin"] = "7 days"

agactivetop4last3df = activetop4df[activetop4df["Datetime"] >= now - timedelta(days=3)] \
.groupby(["Reporting_PHU", "TotalChangePercent", "Accurate_Episode_Date", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agactivetop4last3df["OnsetWithin"] = "3 days"

appendagactivetop4df = agactivetop4df.append(agactivetop4last14df).append(agactivetop4last7df).append(agactivetop4last3df)

sumactivetop4df = appendagactivetop4df \
.groupby(["Reporting_PHU", "TotalChangePercent", "OnsetWithin", "Age_Group"]) \
.sum() \
.reset_index()

sumactivetop4df["Age Group"] = sumactivetop4df.apply(lambda row: getAgeNumber(row["Age_Group"]), axis=1)
sumactivetop4df["Onset"] = pd.Categorical(sumactivetop4df["OnsetWithin"], ["Any date", "14 days", "7 days", "3 days"])

sumactivetop4df = sumactivetop4df.sort_values(by=["Age Group", "Cases", "Onset"], ascending=[True, False, True])

sumactivetop4df

In [None]:
#hide_input
activetop4bar = px.bar(sumactivetop4df, y="Cases", x='Age_Group', facet_col="Reporting_PHU", facet_col_wrap=2,
    color="Onset", color_discrete_sequence=["maroon", "darkorange", "yellow", "white"],
    labels={"Onset" : "Onset w/in last", "Age_Group" : "Age Group", "Reporting_PHU" : "PHU"},
    title="Active Cases by Age Group in Top Four Growth Regions",
    hover_name="Age_Group"
    )
activetop4bar.update_layout(barmode='overlay', hovermode="x")

In [None]:
#hide
startdate = "2020-03-01"
fromto = "from " + startdate + " to " + str(maxdate)
agdf = aggdf[aggdf["Date"] >= startdate]
prefix = "Cumulative Confirmed Cases per Ontario Region"
titlelatest = prefix + " up to " + str(maxdate)

In [None]:
#hide_input
dailyline = px.line(agdf, x="Date", y="Count", color="Area", 
                    color_discrete_sequence=["limegreen", "slategray", "magenta", "peru"],
                    hover_name="Reporting_PHU", line_group="Reporting_PHU",
                    title="Daily Confirmed Cases per Ontario Region " + fromto,
       labels={"Reporting_PHU" : "Public Health Unit", "Date" : "Symptom Onset Date"}
       )
dailyline.show()

In [None]:
#hide_input
cumline = px.line(agdf, x="Date", y="Total", color="Area", 
                  color_discrete_sequence=["limegreen", "slategray", "magenta", "peru"],
                  hover_name="Reporting_PHU", line_group="Reporting_PHU",
                  title=prefix + " " + fromto,
        labels={"Reporting_PHU" : "Public Health Unit", "Date" : "Symptom Onset Date"}
)
cumline.show()

In [None]:
#hide
agephuobdf = origdf[origdf["Case_AcquisitionInfo"] == "OB"] \
.groupby(["Age_Group", "Reporting_PHU"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "OB Count"})

agephudf = origdf \
.groupby(["Age_Group", "Reporting_PHU"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Total"})

totalagephudf = origdf \
.groupby(["Reporting_PHU"]).count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Cases"})

agephuobdf = pd.merge(agephuobdf, agephudf, how="right",
left_on=["Age_Group", "Reporting_PHU"], \
right_on=["Age_Group", "Reporting_PHU"]) \
.fillna(0)

agephuobdf = pd.merge(agephuobdf, totalagephudf, how="right",
left_on=["Reporting_PHU"], \
right_on=["Reporting_PHU"]) \
.fillna(0)

agephuobdf["AgeGroup"] = agephuobdf.apply(lambda r: getAgeNumber(r["Age_Group"]), axis=1)
agephuobdf["PercentTotal"] = agephuobdf.apply(lambda r: r["OB Count"]/r["Total"] * 100, axis=1)
agephuobdf["Area"] = agephuobdf.apply(lambda r: getArea(r["Reporting_PHU"]), axis=1)

agephuobdf = agephuobdf.sort_values(by=["Cases", "AgeGroup"], ascending=[False, True])

agephuobdf.head(20)

In [None]:
#hide_input
px.scatter(agephuobdf, x="Age_Group", y="PercentTotal", 
           color="Area", color_discrete_sequence=["slategray", "magenta", "peru", "limegreen"],
           size="OB Count", size_max=30,
           hover_name="Reporting_PHU", hover_data=["Total"],
           labels={"PercentTotal" : "% Cases Acquired from Inst. Outbreak", "Age_Group" : "Age Group", "OB Count" : "Cases"},
           title="Percentage of Cases Acquired from Institutional Outbreak<br>by Age Group by Public Health Unit"
          )

In [None]:
#hide
fataldf = origdf[(origdf["Age_Group"] != "Unknown") & (origdf["Outcome1"] == "Fatal")] \
.groupby(["Reporting_PHU", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Total Fatal"})

fataldf

In [None]:
#hide
concludeddf = origdf[(origdf["Age_Group"] != "Unknown") & (origdf["Outcome1"] != "Not Resolved")] \
.groupby(["Reporting_PHU", "Age_Group"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Total Concluded"})

concludeddf

In [None]:
#hide
concludedfataldf = pd.merge(concludeddf, fataldf, how="left", \
left_on=["Reporting_PHU", "Age_Group"], \
right_on=["Reporting_PHU", "Age_Group"]) \
.fillna(0) \
.sort_values(by=["Total Fatal", "Age_Group"], ascending=[False,True]).reset_index()

pattern = re.compile('^[0-9]+s$')
    
concludedfataldf["Age Group"] = concludedfataldf.apply(lambda row: getAgeNumber(row["Age_Group"]), axis=1)
concludedfataldf["% fatal"] = concludedfataldf.apply(lambda row: row["Total Fatal"]/row["Total Concluded"]*100, axis=1)
concludedfataldf["Area"] = concludedfataldf.apply(lambda row: getArea(row["Reporting_PHU"]), axis=1)

concludedfataldf

In [None]:
#hide
totalconcludedfataldf = concludedfataldf.groupby(["Age Group", "Age_Group"]).sum().reset_index()
totalconcludedfataldf["% fatal"] = totalconcludedfataldf.apply(lambda row: row["Total Fatal"]/row["Total Concluded"]*100, axis=1)
totalconcludedfataldf

In [None]:
#hide_input
agedeathsbubble = px.scatter(concludedfataldf, x="Age Group", y="% fatal", 
           hover_data=["Total Concluded"], hover_name="Reporting_PHU",
           color="Area", color_discrete_sequence=["slategray", "magenta", "limegreen", "peru"],
            size='Total Fatal', size_max=45, 
           labels={"% fatal" : "Percent Fatal", "Total Fatal" : "Total Fatalities", "Reporting_PHU" : "Public Health Unit"},
           title="Percent Fatality for Concluded Cases per Age Group per Public Health Unit"
          )
agedeathsbubble.add_trace(go.Scatter(x=totalconcludedfataldf["Age Group"], y=totalconcludedfataldf["% fatal"], mode="lines",
                             name="ON avg", 
                              line=dict(color="black", dash="solid")))

In [None]:
#hide
origdf.groupby(["Case_AcquisitionInfo"]).count()

In [None]:
#hide

acqdf = origdf \
.groupby(["Case_AcquisitionInfo", "Reporting_PHU"]).count()["Row_ID"].reset_index().rename(columns={"Row_ID" : "Count"})
acqdf["Acquisition"] = acqdf.apply(lambda r: getAcquisition(r["Case_AcquisitionInfo"]), axis=1)

acqdf = pd.merge(origdf.groupby(["Reporting_PHU"]).count()["Row_ID"].reset_index().rename(columns={"Row_ID" : "Total"}), \
                 acqdf, how="left", \
left_on=["Reporting_PHU"], \
right_on=["Reporting_PHU"]) \
.fillna(0)

acqdf["Region"] = acqdf.apply(lambda r: getRegion(r["Reporting_PHU"]), axis=1)
acqdf["Acquisition"] = pd.Categorical(acqdf["Acquisition"], acqOrder)
acqdf = acqdf.sort_values(by=["Total", "Acquisition"], ascending=[False, True])

acqdf

In [None]:
#hide_input
phuacqbar = px.bar(acqdf, y="Region", x="Count", color="Acquisition", orientation="h", height=800, 
      color_discrete_sequence=acqColours,
      labels={"Region" : "Public Health Unit", "Count" : "Total Confirmed Cases"},
      title="Total Confirmed Cases per Public Health Unit up to " + str(maxdate))

phuacqbar.update_layout(yaxis=dict(autorange="reversed"))

In [None]:
#hide_input
fig = px.scatter_mapbox(agdf, lat="Reporting_PHU_Latitude", lon="Reporting_PHU_Longitude",     
                        color="Confirmed Percent Travelled", 
                        animation_frame="Date",
                        size="Total", hover_name="Reporting_PHU",
                        color_continuous_scale=["red", "blue"],
                        range_color=[0,100],
                        size_max=40, zoom=5.9, 
                        title="Cumulative Cases per Region over Time " + fromto,
                        center=dict(lat=44.5,lon=-78.4),
                        height=800,
                        labels={'Confirmed Percent Travelled':'% travelled', "Date" : "Symptom Onset Date"})
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [None]:
#hide_input
HTML(f'Data was last retrieved from <a href="https://data.ontario.ca/en/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/455fd63b-603d-4608-8216-7d8647f43350" title="Ontario cases">Confirmed positive cases of COVID19 in Ontario</a> and <a href="https://data.ontario.ca/dataset/status-of-covid-19-cases-in-ontario/resource/ed270bb8-340b-41f9-a7c6-e8ef587e6d11" title="Ontario testing status">Status of COVID-19 cases in Ontario</a> on {refreshtime.strftime("%Y-%m-%d %H:%M")}.')