# Ontario confirmed COVID-19 cases per Region
> "Map and graphs of confirmed COVID-19 cases in Ontario, Canada"

- author: Sophiah Ho https://github.com/anomal
- categories: [plotly, ontario, canada, covid-19, province, toronto, waterloo, ottawa, peel, york region, halton, sudbury]
- image: images/ontario_confirmed_map.png
- permalink: /ontario-confirmed-cases-per-region/

In [1]:
#hide
import urllib, json
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from IPython.display import HTML

url = 'https://data.ontario.ca/api/3/action/datastore_search?resource_id=455fd63b-603d-4608-8216-7d8647f43350'  
fileobj = urllib.request.urlopen(url)
vals = json.load(fileobj)
df = pd.DataFrame(data=vals["result"]["records"])
df

Unnamed: 0,_id,Row_ID,Accurate_Episode_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude
0,1,1,2020-01-22T00:00:00,50s,FEMALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
1,2,2,2020-01-21T00:00:00,50s,MALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
2,3,3,2020-01-24T00:00:00,20s,FEMALE,Travel-Related,Resolved,Middlesex-London Health Unit,50 King Street,London,N6A 5L7,www.healthunit.com,42.981468,-81.254016
3,4,4,2020-02-05T00:00:00,20s,FEMALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
4,5,5,2020-02-16T00:00:00,60s,FEMALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,95,2020-03-04T00:00:00,60s,MALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
96,144,144,2020-03-12T00:00:00,40s,MALE,Travel-Related,Resolved,Ottawa Public Health,100 Constellation Drive,Ottawa,K2G 6J8,www.ottawapublichealth.ca,45.345665,-75.763912
97,96,96,2020-03-05T00:00:00,60s,FEMALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
98,97,97,2020-03-08T00:00:00,50s,FEMALE,Travel-Related,Resolved,Halton Region Health Department,1151 Bronte Road,Oakville,L6M 3Ll,www.halton.ca/For-Residents/Public-Health/,43.413997,-79.744796


In [2]:
#hide
import dateutil.parser

mindate = dateutil.parser.parse(df["Accurate_Episode_Date"].min()).date()
maxdate = dateutil.parser.parse(df["Accurate_Episode_Date"].max()).date()

# Ontario confirmed COVID-19 cases per Region

In [3]:
#hide_input
#HTML(f'Data is available for the period from {str(mindate)} to {str(maxdate)}.')

In [4]:
#hide
import re

traveldf = df[df["Case_AcquisitionInfo"] == "Travel-Related"] \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Travelled_Count"}) 

countdf = df \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Count"}) 
countdf

joindf = pd.merge(countdf, traveldf, how="left", \
left_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)
joindf["Datetime"] = joindf.apply(lambda row: dateutil.parser.parse(row["Accurate_Episode_Date"]), axis=1)

phus = countdf.groupby(by=["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"], as_index=False) \
    .first()[["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"]].to_dict("index")
dates = pd.date_range(mindate, maxdate).tolist()

for date in dates:
    for i in range(len(phus)):
        phu = phus[i]
        phu_name = phu["Reporting_PHU"]
        if joindf[(joindf["Datetime"] == date) & (joindf["Reporting_PHU"] == phu_name)].shape[0] == 0:
            joindf = joindf.append({"Datetime" : date, "Reporting_PHU" : phu_name, \
                          "Reporting_PHU_Latitude" : phu["Reporting_PHU_Latitude"], \
                          "Reporting_PHU_Longitude" : phu["Reporting_PHU_Longitude"], \
                          "Count" : 0, "Travelled_Count" : 0},ignore_index=True)
joindf["Date"] = joindf.apply(lambda row: str(row["Datetime"].date()), axis=1)

aggdf = joindf[["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", \
                "Count", "Travelled_Count"]].sort_values(by=["Date","Reporting_PHU"]).reset_index(drop=True)

def cumulativeSum(df, phu, date):
    return df[(df["Reporting_PHU"] == phu) & (df["Date"] <= date)]["Count"].sum()

def cumulativeTravelled(df, phu, date):
    return df[(df["Reporting_PHU"] == phu) & (df["Date"] <= date)]["Travelled_Count"].sum()

def cumulativeNotTravelled(df, phu, date):
    return cumulativeSum(df, phu, date) - cumulativeTravelled(df, phu, date)

def cumulativePercentTravelled(df, phu, date):
    total = cumulativeSum(df, phu, date)
    if total != 0:
        totalTravelled = cumulativeTravelled(df, phu, date)
        return 100 * totalTravelled / total
    else:
        return 0

def getRegion(phu_name):
    return re.sub("(Health|Public|,).+$", "", phu_name)
    
aggdf["Total"] = aggdf.apply(lambda row: cumulativeSum(aggdf, row["Reporting_PHU"], row["Date"]), axis=1) 
aggdf["Total Travelled"] = aggdf.apply(lambda row: cumulativeTravelled(aggdf, row["Reporting_PHU"], row["Date"]), axis=1) 
aggdf["Total Not Travelled"] = aggdf.apply(lambda row: cumulativeNotTravelled(aggdf, row["Reporting_PHU"], row["Date"]), axis=1) 
aggdf["Total Percent Travelled"] = aggdf.apply(lambda row: cumulativePercentTravelled(aggdf, row["Reporting_PHU"], row["Date"]), axis=1)
aggdf["Region"] = aggdf.apply(lambda row: getRegion(row["Reporting_PHU"]), axis=1)
aggdf

Unnamed: 0,Date,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Count,Travelled_Count,Total,Total Travelled,Total Not Travelled,Total Percent Travelled,Region
0,2020-01-21,Durham Region Health Department,43.898605,-78.940341,0,0.0,0,0.0,0.0,0.000000,Durham Region
1,2020-01-21,Eastern Ontario Health Unit,45.029152,-74.736298,0,0.0,0,0.0,0.0,0.000000,Eastern Ontario
2,2020-01-21,Halton Region Health Department,43.413997,-79.744796,0,0.0,0,0.0,0.0,0.000000,Halton Region
3,2020-01-21,Hamilton Public Health Services,43.257631,-79.871341,0,0.0,0,0.0,0.0,0.000000,Hamilton
4,2020-01-21,Middlesex-London Health Unit,42.981468,-81.254016,0,0.0,0,0.0,0.0,0.000000,Middlesex-London
...,...,...,...,...,...,...,...,...,...,...,...
671,2020-03-12,"Region of Waterloo, Public Health",43.462876,-80.520913,0,0.0,3,3.0,0.0,100.000000,Region of Waterloo
672,2020-03-12,Simcoe Muskoka District Health Unit,44.410713,-79.686306,0,0.0,1,1.0,0.0,100.000000,Simcoe Muskoka District
673,2020-03-12,Sudbury & District Health Unit,46.466092,-80.998059,0,0.0,2,0.0,2.0,0.000000,Sudbury & District
674,2020-03-12,Toronto Public Health,43.656591,-79.379358,0,0.0,55,41.0,14.0,74.545455,Toronto


In [5]:
#hide
latestdf = aggdf[aggdf["Date"] == str(maxdate)].sort_values(by=["Region"])
latestdf

Unnamed: 0,Date,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Count,Travelled_Count,Total,Total Travelled,Total Not Travelled,Total Percent Travelled,Region
663,2020-03-12,Durham Region Health Department,43.898605,-78.940341,0,0.0,2,1.0,1.0,50.0,Durham Region
664,2020-03-12,Eastern Ontario Health Unit,45.029152,-74.736298,0,0.0,1,1.0,0.0,100.0,Eastern Ontario
665,2020-03-12,Halton Region Health Department,43.413997,-79.744796,0,0.0,3,3.0,0.0,100.0,Halton Region
666,2020-03-12,Hamilton Public Health Services,43.257631,-79.871341,0,0.0,1,1.0,0.0,100.0,Hamilton
667,2020-03-12,Middlesex-London Health Unit,42.981468,-81.254016,0,0.0,1,1.0,0.0,100.0,Middlesex-London
668,2020-03-12,Niagara Region Public Health Department,43.116537,-79.24122,0,0.0,1,0.0,1.0,0.0,Niagara Region
669,2020-03-12,Ottawa Public Health,45.345665,-75.763912,1,1.0,9,8.0,1.0,88.888889,Ottawa
670,2020-03-12,Peel Public Health,43.647471,-79.708893,0,0.0,10,7.0,3.0,70.0,Peel
671,2020-03-12,"Region of Waterloo, Public Health",43.462876,-80.520913,0,0.0,3,3.0,0.0,100.0,Region of Waterloo
672,2020-03-12,Simcoe Muskoka District Health Unit,44.410713,-79.686306,0,0.0,1,1.0,0.0,100.0,Simcoe Muskoka District


In [7]:
#hide_input
prefix = "Cumulative Confirmed Cases per Ontario Region"
titlelatest = prefix + " up to " + str(maxdate)

latestdfdesc = latestdf.sort_values(by=["Total", "Total Not Travelled"], ascending=[False,False]).reset_index()

barfig = go.Figure(go.Bar(x=latestdfdesc["Region"], y=latestdfdesc["Total Travelled"], name='Travel-Related'))
barfig.add_trace(go.Bar(x=latestdfdesc["Region"], y=latestdfdesc["Total Not Travelled"], name='Not Travel-Related'))
barfig.update_layout(barmode='stack', xaxis={'categoryorder':'array'}, title=titlelatest)
barfig.show()

In [7]:
#hide_input
fromto = "from " + str(mindate) + " to " + str(maxdate)

fig = px.scatter_mapbox(aggdf, lat="Reporting_PHU_Latitude", lon="Reporting_PHU_Longitude",     
                        color="Total Percent Travelled", 
                        animation_frame="Date",
                        size="Total", hover_name="Reporting_PHU",
                        color_continuous_scale=[ "red","blue"], 
                        size_max=40, zoom=6, 
                        title="Cumulative Cases per Region over Time " + fromto)
fig.update_layout(mapbox_style="open-street-map")
try:
    fig.show()
except:
    fig.show()

In [8]:
#hide_input
cumline = px.line(aggdf, x="Date", y="Total", color="Reporting_PHU", title=prefix + " " + fromto,
        labels={'Reporting_PHU':'Public Health Unit'}
)
try:
    cumline.show()
except:
    cumline.show()

In [9]:
#hide_input
dailyline = px.line(aggdf, x="Date", y="Count", color="Reporting_PHU", title="Daily Confirmed Cases per Ontario Region " + fromto,
       labels={'Reporting_PHU':'Public Health Unit'}
       )
try:
    dailyline.show()
except:
    dailyline.show()

In [10]:
#hide_input
from datetime import date

HTML(f'Data was last retrieved from <a href="https://data.ontario.ca/en/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/455fd63b-603d-4608-8216-7d8647f43350" title="Ontario Data">Confirmed positive cases of COVID19 in Ontario</a> on {str(date.today())}.')