In [1]:
# comparisons
# https://docs.google.com/spreadsheets/u/1/d/e/2PACX-1vTX5x2nxCde90Zwo83cdixZsyd_hU1orGsGYKpDe344wHeFi9MqI71aZYC6GLjOV_P2lp6_lUoacPNa/pubhtml?gid=1467787327&single=true
# https://twitter.com/jkwan_md/status/1312035412274221058?s=21
# https://russell-pollari.github.io/ontario-covid19/

# prune these
import pandas as pd
import numpy as np
import urllib, json
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from mpl_toolkits.mplot3d import Axes3D
# from mpl_toolkits.basemap import Basemap
from matplotlib.collections import PolyCollection
import seaborn as sns
import datetime as dt
import geopandas as gpd
from geopandas import GeoDataFrame
import folium
from shapely.geometry import Point, Polygon
from keplergl import KeplerGl

import plotly.express as px

# set the standard plot size
plt.rcParams['figure.figsize']=(30,15)

In [2]:
# get the "Confirmed positive cases of COVID19 in Ontario" data
# https://data.ontario.ca/en/dataset/confirmed-positive-cases-of-covid-19-in-ontario
# import data using the ontario open data API
# make sure to check the limit against current cases
url = "https://data.ontario.ca/en/api/3/action/datastore_search?resource_id=455fd63b-603d-4608-8216-7d8647f43350&limit=600000"
response = urllib.request.urlopen(url)
data = json.loads(response.read().decode('utf-8'))

# use this if you want to see the json
# print (data['result']['records'])

# use this if you want to write it to a pandas dataframe
conpos_df = pd.DataFrame(data['result']['records'])

In [3]:
# get the "Status of COVID-19 Cases in Ontario" data
# https://data.ontario.ca/dataset/status-of-covid-19-cases-in-ontario
# make sure to check the limit against current cases
url = "https://data.ontario.ca/en/api/3/action/datastore_search?resource_id=ed270bb8-340b-41f9-a7c6-e8ef587e6d11&limit=600000"
response = urllib.request.urlopen(url)
data = json.loads(response.read().decode('utf-8'))

# use this if you want to see the json
# print (data['result']['records'])

status_df = pd.DataFrame(data['result']['records'])

In [4]:
# sort by date
conpos_df = conpos_df.sort_values(by=['Accurate_Episode_Date'], ascending=False);
status_df = status_df.sort_values(by=['Reported Date'], ascending=True);

In [5]:
# set the dates to datetime64 for later use
conpos_df["Accurate_Episode_Date"] = conpos_df["Accurate_Episode_Date"].astype("datetime64")
conpos_df["Case_Reported_Date"] = conpos_df["Case_Reported_Date"].astype("datetime64")
conpos_df["Test_Reported_Date"] = conpos_df["Test_Reported_Date"].astype("datetime64")
conpos_df["Specimen_Date"] = conpos_df["Specimen_Date"].astype("datetime64")

## Some Geodata Resources

- U of T map and data library resources https://mdl.library.utoronto.ca/collections/numeric-data/census-canada/2016/geo
- where to get geojson files https://stackoverflow.com/questions/17267248/how-where-do-i-get-geojson-data-for-states-provinces-and-administrative-region
- creating geojson from canada census divisions
https://blog.exploratory.io/making-maps-for-canadas-provisions-and-census-divisions-in-r-c189b88ccd8a
- creating geojson for toronto area codes https://medium.com/dataexplorations/generating-geojson-file-for-toronto-fsas-9b478a059f04
- ontario public health units https://geohub.lio.gov.on.ca/datasets/ministry-of-health-public-health-unit-boundary
- more ontario geodatasets https://geohub.lio.gov.on.ca/datasets/11be9127e6ae43c4850793a3a2ee943c_13
- potentially a fun shapefile to play with https://data.ontario.ca/dataset/provincially-licensed-meat-plants
- ttc shapefile https://open.toronto.ca/dataset/ttc-subway-shapefiles/

In [6]:
# fix these so geodata can be grabbed directly from ontario data store
# import data from the ontario open data API
# make sure to check the limit against current cases
# geourl = "https://data.ontario.ca/dataset/f4112442-bdc8-45d2-be3c-12efae72fb27/resource/4f39b02b-47fe-4e66-95b6-e6da879c6910/download/conposcovidloc.geojson"
# response = urllib.request.urlopen(geourl)
# data = json.loads(response.read().decode('utf-8'))

# geojson from ontario 
# https://data.ontario.ca/dataset/confirmed-positive-cases-of-covid-19-in-ontario
# https://data.ontario.ca/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/4f39b02b-47fe-4e66-95b6-e6da879c6910

# with urllib.request.urlopen("https://data.ontario.ca/dataset/f4112442-bdc8-45d2-be3c-12efae72fb27/resource/4f39b02b-47fe-4e66-95b6-e6da879c6910/download/conposcovidloc.geojson") as url:
#    fileobj = url.read()
#    print(fileobj)

# use this if you want to see the json
# print (data)

# use this if you want to write it to a pandas dataframe
# conpos_df = pd.DataFrame(data['result']['records'])

In [7]:
# for accurate daily totals, better to use the "status of covid cases" dataset
# you'll need to create a new column for daily growth in cases
status_df['New Cases'] = status_df['Total Cases'].diff(periods=1).fillna(0)

In [8]:
# let's slice up our original DF to get counts from each public health unit
conpos_df1 = conpos_df[['_id','Reporting_PHU','Outcome1']]

In [9]:
# need to fix all these so they write column names properly
resolved = conpos_df1[conpos_df1.Outcome1 == 'Resolved'].groupby(['Reporting_PHU']).agg(['count']).drop(['Outcome1'], axis=1)
resolved.to_csv('data/ontario_covid_resolved_by_phu.csv')
#resolved.head()
not_resolved = conpos_df1[conpos_df1.Outcome1 == 'Not Resolved'].groupby(['Reporting_PHU']).agg(['count']).drop(['Outcome1'], axis=1)
not_resolved.to_csv('data/ontario_covid_active_by_phu.csv')
#not_resolved.head()
fatal = conpos_df1[conpos_df1.Outcome1 == 'Fatal'].groupby(['Reporting_PHU']).agg(['count']).drop(['Outcome1'], axis=1)
fatal.to_csv('data/ontario_covid_deaths_by_phu.csv')
#fatal.head()

In [10]:
# sudbury, kingston, york, wellington, and huron are all named wrong in the province's PHU shapefile 
# either fix the shapefile/geojson (done) or rename all of them in the df
# conpos_df1[conpos_df1['Reporting_PHU'].str.contains("Kingston")]
# e.g.
# fix kingston
# conpos_df1['Reporting_PHU'] = conpos_df1['Reporting_PHU'].replace('Kingston, Frontenac and Lennox and Addington Health Unit', 'Kingston, Frontenac and Lennox & Addington Public Health Unit')

In [11]:
# filter the whole dataset for deaths
deaths = conpos_df[conpos_df.Outcome1 == 'Fatal']
deaths.to_csv('data/ontario_conpos_deaths_by_phu.csv')

In [12]:
# clean up the top row in the csv first
active_cases = pd.read_csv("data/ontario_covid_active_by_phu.csv")
fatal_cases = pd.read_csv("data/ontario_covid_deaths_by_phu.csv")
resolved_cases = pd.read_csv("data/ontario_covid_resolved_by_phu.csv")
active_cases.head()

Unnamed: 0.1,Unnamed: 0,_id
0,,count
1,Reporting_PHU,


In [13]:
# fix the column names in the csv first
# explain how to generate the geojson files from shapefiles (and how to align file names with the df)
map_ontario = folium.Map(location=[43.653963, -79.387207], zoom_start=10)
ontario_geo = "./geodata/ontario_phu.geojson"
folium.Choropleth(
    geo_data=ontario_geo,
    data = active_cases,
    columns=['Reporting_PHU','count'],
    key_on='feature.properties.PHU_NAME_E',
    fill_color='YlOrRd',
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Active Cases by Reporting PHU').add_to(map_ontario)   
    
map_ontario

KeyError: "None of ['Reporting_PHU'] are in the columns"

In [14]:
# some other stuff we can do if we want to merge the phu boundaries with the covid dataset
shapefile_data = gpd.read_file("geodata/Ministry_of_Health_Public_Health_Unit_Boundary.shp")
#print(shapefile_data)
#geojson_data = gpd.read_file("geodata/ontario_phu.geojson")
#print(geojson_data)
shapefile_raw = pd.DataFrame()
shapefile_raw = shapefile_raw.append(shapefile_data)
# shapefile_raw.head()

In [15]:
# set up this new df for use with kepler
kepler_df = conpos_df.drop(columns=['Row_ID', 
                             'Case_Reported_Date', 
                             'Test_Reported_Date', 
                             'Specimen_Date',
                             'Reporting_PHU_Address',
                             'Reporting_PHU_Postal_Code',
                             'Reporting_PHU_Website',
                            ])

In [16]:
# reconfigure the accurate episode date so we can use with a time slider
kepler_df['Accurate_Episode_Date'] = kepler_df['Accurate_Episode_Date'].astype(str) + ' 0:00'

In [17]:
kepler_df.head()

Unnamed: 0,_id,Accurate_Episode_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Outbreak_Related,Reporting_PHU_ID,Reporting_PHU,Reporting_PHU_City,Reporting_PHU_Latitude,Reporting_PHU_Longitude
78360,357584,2021-03-29 0:00,<20,FEMALE,CC,Resolved,,2251,Ottawa Public Health,Ottawa,45.345665,-75.763912
77862,357583,2021-03-29 0:00,<20,FEMALE,CC,Resolved,,2251,Ottawa Public Health,Ottawa,45.345665,-75.763912
77649,357582,2021-03-29 0:00,<20,FEMALE,CC,Resolved,,2251,Ottawa Public Health,Ottawa,45.345665,-75.763912
77457,357581,2021-03-29 0:00,<20,FEMALE,CC,Resolved,,2251,Ottawa Public Health,Ottawa,45.345665,-75.763912
77229,347809,2021-03-25 0:00,<20,FEMALE,CC,Resolved,,2261,Sudbury & District Health Unit,Sudbury,46.466092,-80.998059


In [18]:
#kepler_covid_df = pd.merge(shapefile_raw,df, on=['PHU'])

In [19]:
kepler_map = KeplerGl(height = 800, data={'df': kepler_df})
# uncomment this to use a specific kepler config file                      , config=current_config)
# current_config = kepler_map.config
kepler_map

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'df':           _id Accurate_Episode_Date Age_Group Client_Gender  \
78360  357584       2021-0…

In [20]:
# use this to get the kepler config
current_config = kepler_map.config

In [21]:
# use to save to html file

kepler_map.save_to_html(data={'df': kepler_df}, config=current_config, file_name="html/kepler_map.html")

Map saved to html/kepler_map.html!
