# Ontario confirmed COVID-19 cases per Region
> "Map and graphs of confirmed COVID-19 cases in Ontario, Canada"

- author: Sophiah Ho https://github.com/anomal
- categories: [ontario, canada, covid-19, province, toronto, waterloo, ottawa, peel, york region, halton, sudbury]
- image: images/ontario_confirmed_map.png
- permalink: /ontario-confirmed-cases-per-region/

This workbook demonstrates the use of plotting and data exploration in python using plotly library. We will visualize the number of confirmed COVID-19 cases in Ontario as based in different regions

In [1]:
#hide
import urllib, json
import pandas as pd
#import plotly.graph_objects as go
#import plotly.express as px
from IPython.display import HTML

# We are obtaining data from this URL and storing it in pandas dataframe

url = 'https://data.ontario.ca/en/api/3/action/datastore_search?resource_id=455fd63b-603d-4608-8216-7d8647f43350&limit=9999999'  
fileobj = urllib.request.urlopen(url)
vals = json.load(fileobj)
df = pd.DataFrame(data=vals["result"]["records"])
df



Unnamed: 0,_id,Row_ID,Accurate_Episode_Date,Case_Reported_Date,Test_Reported_Date,Specimen_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Outbreak_Related,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude
0,1,1,2020-03-18T00:00:00,2020-03-31T00:00:00,2020-03-31T00:00:00,2020-03-20T00:00:00,50s,MALE,Travel,Resolved,,Ottawa Public Health,100 Constellation Drive,Ottawa,K2G 6J8,www.ottawapublichealth.ca,45.345665,-75.763912
1,2,2,2020-03-23T00:00:00,2020-03-30T00:00:00,2020-03-31T00:00:00,2020-03-30T00:00:00,70s,FEMALE,No Epi-link,Resolved,,Windsor-Essex County Health Unit,1005 Ouellette Avenue,Windsor,N9A 4J8,www.wechu.org,42.308796,-83.033670
2,3,3,2020-03-27T00:00:00,2020-03-31T00:00:00,2020-03-30T00:00:00,2020-03-30T00:00:00,20s,FEMALE,No Epi-link,Resolved,,Windsor-Essex County Health Unit,1005 Ouellette Avenue,Windsor,N9A 4J8,www.wechu.org,42.308796,-83.033670
3,4,4,2020-03-27T00:00:00,2020-03-31T00:00:00,2020-03-31T00:00:00,2020-03-30T00:00:00,40s,FEMALE,No Epi-link,Resolved,,"Region of Waterloo, Public Health",99 Regina Street South,Waterloo,N2J 4V3,www.regionofwaterloo.ca,43.462876,-80.520913
4,5,5,2020-03-22T00:00:00,2020-03-31T00:00:00,2020-03-31T00:00:00,2020-03-28T00:00:00,70s,FEMALE,Travel,Resolved,,Ottawa Public Health,100 Constellation Drive,Ottawa,K2G 6J8,www.ottawapublichealth.ca,45.345665,-75.763912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33848,33848,33848,2020-04-11T00:00:00,2020-04-17T00:00:00,2020-04-17T00:00:00,2020-04-16T00:00:00,60s,MALE,OB,Resolved,Yes,"Region of Waterloo, Public Health",99 Regina Street South,Waterloo,N2J 4V3,www.regionofwaterloo.ca,43.462876,-80.520913
33849,33849,33849,2020-04-02T00:00:00,2020-04-18T00:00:00,2020-04-18T00:00:00,2020-04-15T00:00:00,60s,MALE,OB,Fatal,Yes,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893
33850,33851,33851,2020-04-16T00:00:00,2020-04-17T00:00:00,2020-04-17T00:00:00,2020-04-16T00:00:00,30s,MALE,OB,Resolved,Yes,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
33851,33852,33852,2020-04-09T00:00:00,2020-04-17T00:00:00,2020-04-17T00:00:00,2020-04-10T00:00:00,70s,FEMALE,OB,Resolved,Yes,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893


The dataframe has number of attrbutes including age_group, reporting public health unit and address of PHU. Our visualization will use these attributes along with date of reporting.

In [2]:
#hide
import dateutil.parser

# Following is the range of dates we want to visualize the data
mindate = "2020-03-01" #dateutil.parser.parse(df["Accurate_Episode_Date"].min()).date()
maxdate = "2020-05-01" #dateutil.parser.parse(df["Accurate_Episode_Date"].max()).date()

In [3]:
#hide_input
HTML(f"Data is available for the period from {str(mindate)} to {str(maxdate)}.")

In [4]:
#hide

# We will group the input data frame according to travel relted history and region
import re

traveldf = df[df["Case_AcquisitionInfo"] == "Travel-Related"] \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Travelled_Count"}) 

countdf = df \
.groupby(["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.count()["Row_ID"] \
.reset_index() \
.rename(columns={"Row_ID" : "Count"}) 
countdf

joindf = pd.merge(countdf, traveldf, how="left", \
left_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"], \
right_on=["Accurate_Episode_Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude"]) \
.fillna(0)
joindf["Datetime"] = joindf.apply(lambda row: dateutil.parser.parse(row["Accurate_Episode_Date"]), axis=1)

phus = countdf.groupby(by=["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"], as_index=False) \
    .first()[["Reporting_PHU","Reporting_PHU_Latitude","Reporting_PHU_Longitude"]].to_dict("index")
dates = pd.date_range(mindate, maxdate).tolist()

for date in dates:
    for i in range(len(phus)):
        phu = phus[i]
        phu_name = phu["Reporting_PHU"]
        if joindf[(joindf["Datetime"] == date) & (joindf["Reporting_PHU"] == phu_name)].shape[0] == 0:
            joindf = joindf.append({"Datetime" : date, "Reporting_PHU" : phu_name, \
                          "Reporting_PHU_Latitude" : phu["Reporting_PHU_Latitude"], \
                          "Reporting_PHU_Longitude" : phu["Reporting_PHU_Longitude"], \
                          "Count" : 0, "Travelled_Count" : 0},ignore_index=True)
joindf["Date"] = joindf.apply(lambda row: str(row["Datetime"].date()), axis=1)

aggdf = joindf[["Date", "Reporting_PHU", "Reporting_PHU_Latitude", "Reporting_PHU_Longitude", \
                "Count", "Travelled_Count"]].sort_values(by=["Date","Reporting_PHU"]).reset_index(drop=True)

def cumulativeSum(df, phu, date):
    return df[(df["Reporting_PHU"] == phu) & (df["Date"] <= date)]["Count"].sum()

def cumulativeTravelled(df, phu, date):
    return df[(df["Reporting_PHU"] == phu) & (df["Date"] <= date)]["Travelled_Count"].sum()

def cumulativeNotTravelled(df, phu, date):
    return cumulativeSum(df, phu, date) - cumulativeTravelled(df, phu, date)

def cumulativePercentTravelled(df, phu, date):
    total = cumulativeSum(df, phu, date)
    if total != 0:
        totalTravelled = cumulativeTravelled(df, phu, date)
        return 100 * totalTravelled / total
    else:
        return 0

def getRegion(phu_name):
    return re.sub("(Health|Public|,).+$", "", phu_name)
    
aggdf["Total"] = aggdf.apply(lambda row: cumulativeSum(aggdf, row["Reporting_PHU"], row["Date"]), axis=1) 
aggdf["Total Travelled"] = aggdf.apply(lambda row: cumulativeTravelled(aggdf, row["Reporting_PHU"], row["Date"]), axis=1) 
aggdf["Total Not Travelled"] = aggdf.apply(lambda row: cumulativeNotTravelled(aggdf, row["Reporting_PHU"], row["Date"]), axis=1) 
aggdf["Total Percent Travelled"] = aggdf.apply(lambda row: cumulativePercentTravelled(aggdf, row["Reporting_PHU"], row["Date"]), axis=1)
aggdf["Region"] = aggdf.apply(lambda row: getRegion(row["Reporting_PHU"]), axis=1)
aggdf

Unnamed: 0,Date,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Count,Travelled_Count,Total,Total Travelled,Total Not Travelled,Total Percent Travelled,Region
0,2020-01-01,Haldimand-Norfolk Health Unit,42.847825,-80.303815,1,0.0,1,0.0,1.0,0.0,Haldimand-Norfolk
1,2020-01-01,"Region of Waterloo, Public Health",43.462876,-80.520913,1,0.0,1,0.0,1.0,0.0,Region of Waterloo
2,2020-01-21,Toronto Public Health,43.656591,-79.379358,1,0.0,1,0.0,1.0,0.0,Toronto
3,2020-01-22,Toronto Public Health,43.656591,-79.379358,1,0.0,2,0.0,2.0,0.0,Toronto
4,2020-01-24,Middlesex-London Health Unit,42.981468,-81.254016,1,0.0,1,0.0,1.0,0.0,Middlesex-London
...,...,...,...,...,...,...,...,...,...,...,...
3075,2020-06-21,Windsor-Essex County Health Unit,42.308796,-83.033670,2,0.0,1352,0.0,1352.0,0.0,Windsor-Essex County
3076,2020-06-21,York Region Public Health Services,44.048023,-79.480239,1,0.0,2847,0.0,2847.0,0.0,York Region
3077,2020-06-22,Peel Public Health,43.647471,-79.708893,3,0.0,5542,0.0,5542.0,0.0,Peel
3078,2020-06-22,Simcoe Muskoka District Health Unit,44.410713,-79.686306,2,0.0,572,0.0,572.0,0.0,Simcoe Muskoka District


In [5]:
#hide
# Sorting by the number of COVID-19 cases as of May 1st, 2020
latestdf = aggdf[aggdf["Date"] == str(maxdate)].sort_values(by=["Count"])
latestdf

Unnamed: 0,Date,Reporting_PHU,Reporting_PHU_Latitude,Reporting_PHU_Longitude,Count,Travelled_Count,Total,Total Travelled,Total Not Travelled,Total Percent Travelled,Region
2116,2020-05-01,Algoma Public Health Unit,46.532373,-84.314836,0,0.0,14,0.0,14.0,0.0,Algoma
2138,2020-05-01,Porcupine Health Unit,48.47251,-81.32875,0,0.0,61,0.0,61.0,0.0,Porcupine
2143,2020-05-01,Sudbury & District Health Unit,46.466092,-80.998059,0,0.0,59,0.0,59.0,0.0,Sudbury & District
2144,2020-05-01,Thunder Bay District Health Unit,48.400572,-89.258851,0,0.0,77,0.0,77.0,0.0,Thunder Bay District
2134,2020-05-01,Northwestern Health Unit,49.769615,-94.488254,0,0.0,17,0.0,17.0,0.0,Northwestern
2145,2020-05-01,Timiskaming Health Unit,47.509284,-79.681632,0,0.0,18,0.0,18.0,0.0,Timiskaming
2127,2020-05-01,Huron Perth District Health Unit,43.368662,-81.001913,0,0.0,50,0.0,50.0,0.0,Huron Perth District
2126,2020-05-01,Hastings and Prince Edward Counties Health Unit,44.186674,-77.391446,0,0.0,43,0.0,43.0,0.0,Hastings and Prince Edward Counties
2128,2020-05-01,"Kingston, Frontenac and Lennox & Addington Pub...",44.227874,-76.525211,0,0.0,60,0.0,60.0,0.0,Kingston
2140,2020-05-01,Renfrew County and District Health Unit,45.799406,-77.118727,0,0.0,17,0.0,17.0,0.0,Renfrew County and District


In [6]:
#hide_input

# Plotting the cumulative cases in ontario upto May 1st, 2020
import plotly.graph_objects as go
import plotly.express as px
prefix = "Cumulative Confirmed Cases per Ontario Region"
titlelatest = prefix + " up to " + str(maxdate)

latestdfdesc = latestdf.sort_values(by=["Total", "Total Not Travelled"], ascending=[False,False]).reset_index()

barfig = go.Figure(go.Bar(x=latestdfdesc["Region"], y=latestdfdesc["Total Travelled"], name='Travel-Related'))
barfig.add_trace(go.Bar(x=latestdfdesc["Region"], y=latestdfdesc["Total Not Travelled"], name='Other/Unknown'))
barfig.update_layout(barmode='stack', xaxis={'categoryorder':'array'}, title=titlelatest)

ModuleNotFoundError: No module named 'plotly'

In [None]:
#hide_input
HTML("For the graphs below, the date used is integrated Public Health Information System (iPHIS)'s best estimated date of COVID-19 disease onset, projected into the past based on factors such as symptom onset, test date, or reported date.")

In [None]:
#hide_input

# Plot of confirmed cases. Color coding is the 
startdate = "2020-03-01"
fromto = "from " + startdate + " to " + str(maxdate)
agdfOne = aggdf[aggdf["Date"] >= startdate]
agdf = agdfOne[agdfOne["Date"]<= maxdate]
fig = px.scatter_mapbox(agdf, lat="Reporting_PHU_Latitude", lon="Reporting_PHU_Longitude",     
                        color="Total Percent Travelled", 
                        animation_frame="Date",
                        size="Total", hover_name="Reporting_PHU",
                        color_continuous_scale=[ "red","blue"], 
                        size_max=40, zoom=6, 
                        title="Cumulative Cases per Region over Time " + fromto,
                       center=dict(lat=43.6,lon=-79.7))
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [None]:
import numpy as np
np.min(agdfOne["Total Percent Travelled"])

In [None]:
#hide_input
# Plot of confirmed cases as per each county
cumline = px.line(agdf, x="Date", y="Total", color="Region", title=prefix + " " + fromto,
        labels={'Region':'Public Health Unit'}
)
cumline.show()

In [None]:
#hide_input
# Plot of daily confirmed cases as per each county
dailyline = px.line(agdf, x="Date", y="Count", color="Region", title="Daily Confirmed Cases per Ontario Region " + fromto,
       labels={'Region':'Public Health Unit'}
       )
dailyline.show()

In [None]:
#hide_input
from datetime import date
HTML(f'Data was last retrieved from <a href="https://data.ontario.ca/en/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/455fd63b-603d-4608-8216-7d8647f43350" title="Ontario data">Confirmed positive cases of COVID19 in Ontario</a> on {str(date.today())}.')