# Coronavirus Map

#### Author: Chandana Karunaratne
#### Date: 28 April 2020
#### Description: This code generates an interactive map of coronavirus cases in Ontario, Canada, using daily-updated data from the Government of Ontario Data Catalogue.


In [1]:
print("Hello, friend")

Hello, friend


In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
import plotly.express as px
import plotly.io as pio
import bs4
import urllib.request
from io import StringIO


In [3]:
# Download the raw data directly from the online API (set the query limit to 100,000):
# Source: https://data.ontario.ca/dataset/confirmed-positive-cases-of-covid-19-in-ontario/resource/455fd63b-603d-4608-8216-7d8647f43350

with urllib.request.urlopen("https://data.ontario.ca/api/3/action/datastore_search?resource_id=455fd63b-603d-4608-8216-7d8647f43350&limit=100000") as url:
    raw_data_online = url.read()


In [4]:
# Convert the raw data into a string:

raw_data_string = str(raw_data_online,'utf-8')


In [5]:
# Delete the header and footer in the raw data (all characters leading up to the second '[' character (header) and 
# all characters after the first ']'character (footer)):
# (The following line of code first splits the 'raw_data_string' string by the '[' character, then takes the resulting 
# third substring, then splits this substring by the ']' character, then takes the resulting first substring, and finally 
# assigns this resulting substring to "raw_data_json")

raw_data_json = raw_data_string.split('[')[2].lstrip().split(']')[0]


In [6]:
# Write the 'raw_data' json string into a file:

text_file = open(r"filepath/filename.json","w")

n = text_file.write(raw_data_json)

text_file.close()


In [7]:
# Open the 'Raw_Data.json' file into a Pandas dataframe:
# The parameter, 'lines=True', refers to reading the file as a json object per line (see documentation: https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.read_json.html)

covid_df = pd.read_json (r'filepath/filename.json', lines=True) 


In [8]:
# Get today's date (to be used as part of file name):

current_date = datetime.today().strftime('%d_%B')

# Save a copy of the data as a csv file:

covid_df.to_csv(r'filepath/filename.csv' %current_date, encoding='utf-8')


In [9]:
# Check length (number of rows) of dataframe:

len(covid_df)


18310

In [10]:
# Show first 5 rows of data:

covid_df.head(5)


Unnamed: 0,_id,Row_ID,Accurate_Episode_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude
0,1,1,2020-04-04T00:00:00,20s,MALE,Information pending,Resolved,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893
1,2,2,2020-04-04T00:00:00,60s,FEMALE,Information pending,Resolved,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893
2,3,3,2020-04-02T00:00:00,90s,FEMALE,Neither,Resolved,Windsor-Essex County Health Unit,1005 Ouellette Avenue,Windsor,N9A 4J8,www.wechu.org,42.308796,-83.03367
3,4,4,2020-03-31T00:00:00,30s,FEMALE,Neither,Resolved,Peterborough Public Health,185 King Street,Peterborough,K9J 2R8,www.peterboroughpublichealth.ca,44.301632,-78.321347
4,5,5,2020-04-03T00:00:00,50s,MALE,Information pending,Resolved,Peel Public Health,7120 Hurontario Street,Mississauga,L5W 1N4,www.peelregion.ca/health/,43.647471,-79.708893


In [11]:
# Group the data by 'Reporting_PHU' and then by 'Accurate_Episode_Date' and then convert this series to a dataframe:

covid_count_df = covid_df.groupby('Reporting_PHU')['Accurate_Episode_Date'].value_counts().to_frame()


In [12]:
# Rename the 'Accurate_Episode_Date' column to 'Count':

covid_count_df.rename(columns = {'Accurate_Episode_Date':'Count'}, inplace = True)


In [13]:
# Flatten the hierarchical column indexes:

covid_count_df.reset_index(inplace=True)


In [14]:
# Create a dataframe containing latitude and longitude for each PHU:

PHU_df = covid_df.groupby('Reporting_PHU').first().reset_index()


In [15]:
# Drop all columns except for 'Reporting_PHU', 'Reporting_PHU_Latitude', 'Reporting_PHU_Longitude':

PHU_df = PHU_df[['Reporting_PHU', 'Reporting_PHU_Latitude', 'Reporting_PHU_Longitude']]


In [16]:
# Merge 'covid_count_df' and 'PHU_df':

covid_final_df = covid_count_df.merge(PHU_df, how = 'left', left_on = 'Reporting_PHU', right_on = 'Reporting_PHU')


In [17]:
# If 'Accurate_Episode_Date' does not feature year in YYYY format (i.e. if it does not contain '2020'), 
# then append '20' to end of all dates so that date can be inferred:

if not '2020' in covid_final_df['Accurate_Episode_Date'][2]:
    covid_final_df['Accurate_Episode_Date'] = covid_final_df['Accurate_Episode_Date'].astype(str) + '20'


In [18]:
covid_final_df.head()

Unnamed: 0,Reporting_PHU,Accurate_Episode_Date,Count,Reporting_PHU_Latitude,Reporting_PHU_Longitude
0,Algoma Public Health Unit,2020-03-20T00:00:00,2,46.532373,-84.314836
1,Algoma Public Health Unit,2020-03-22T00:00:00,2,46.532373,-84.314836
2,Algoma Public Health Unit,2020-03-23T00:00:00,2,46.532373,-84.314836
3,Algoma Public Health Unit,2020-03-08T00:00:00,1,46.532373,-84.314836
4,Algoma Public Health Unit,2020-03-15T00:00:00,1,46.532373,-84.314836


In [19]:
# Convert string values in 'Accurate_Episode_Date' to datetime (infer datetime format) values:

covid_final_df['Accurate_Episode_Date'] = pd.to_datetime(covid_final_df['Accurate_Episode_Date'], infer_datetime_format=True)


In [20]:
# Sort 'covid_final_df' dataframe by 'Accurate_Episode_Date' and then 'Reporting_PHU':

covid_final_df = covid_final_df.sort_values(["Accurate_Episode_Date", "Reporting_PHU"], ascending = (True, True))

# Reset index in dataframe:

covid_final_df = covid_final_df.reset_index(drop=True)


In [21]:
covid_final_df.head()

Unnamed: 0,Reporting_PHU,Accurate_Episode_Date,Count,Reporting_PHU_Latitude,Reporting_PHU_Longitude
0,Peel Public Health,2020-01-19,1,43.647471,-79.708893
1,Toronto Public Health,2020-01-21,1,43.656591,-79.379358
2,Toronto Public Health,2020-01-22,1,43.656591,-79.379358
3,Middlesex-London Health Unit,2020-01-24,1,42.981468,-81.254016
4,Hastings and Prince Edward Counties Health Unit,2020-02-01,1,44.186674,-77.391446


In [22]:
# Get date of first case and convert date to string (to be used later in chart title):

first_date = covid_final_df['Accurate_Episode_Date'][0].strftime('%d %B')

# If first_date string includes a leading 0 (e.g. '01 January'), then remove leading 0 from first_date string:

if first_date[0] == '0':
    first_date = first_date[1:]


In [23]:
# Convert all datetime values to string values (only day and month) to enable plotly time-series plot:

covid_final_df['Accurate_Episode_Date'] = covid_final_df.Accurate_Episode_Date.dt.strftime('%d %b')


In [24]:
# Rename columns so that map is easier to understand:

covid_final_df.columns = ['Public Health Unit', 'Date', 'Number of Cases', 'PHU Latitude', 'PHU Longitude']


In [25]:
# Get today's date (to be used as part of chart title):

current_date = datetime.today().strftime('%d %B')

# If current date includes a leading 0 (e.g. '04 May'), then remove leading 0 from current_date string:

if current_date[0] == '0':
    current_date = current_date[1:]


In [None]:
# Specify all parameters for map visualization:

fig = px.scatter_mapbox(covid_final_df, 
                     size="Number of Cases",
                     hover_name="Public Health Unit",
                     animation_frame="Date",
                     lat = "PHU Latitude",
                     lon = "PHU Longitude",
                     color_discrete_sequence=["crimson"],
                     title = "Interactive Map of Coronavirus Cases in Ontario (%s - %s 2020)" %(first_date, current_date),
                     zoom=5, 
                     height=600)

# Use Open Street Map as base map:

fig.update_layout(mapbox_style="open-street-map")

# Adjust margins on top, left, and right:

fig.update_layout(margin={"r":150,"t":40,"l":150,"b":0})

# Center title and increase font size:

fig.update_layout(title={'x':0.5,'xanchor':'center','font':{'size':20}})

# Increase frame duration so that length of time on each frame is increased (in milliseconds)

fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 200

# Increase transition duration so that length of time between frames is increased (in milliseconds)

fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 200

fig.show()


In [27]:
# Create html file featuring chart to use on webpage:

# Source: https://towardsdatascience.com/how-to-create-a-plotly-visualization-and-embed-it-on-websites-517c1a78568b
# Also see: https://plotly.com/chart-studio-help/embed-graphs-in-websites/

# Ensure that filename is 'index.html'
# Parameter 'auto_open' refers to whether the html file will automatically open in a new tab
# Parameter 'auto-play' refers to whether the html file (and animation) will automatically play when file is opened

pio.write_html(fig, file='filepath/index.html', auto_open=False, auto_play=False)


In [28]:
# To add a footer to the 'index.html' file, use the following steps:

# Open the 'index.html' file and load it in Beautiful Soup:

with open("filepath/index.html") as inf:
    txt = inf.read()
    soup = bs4.BeautifulSoup(txt)
    

In [29]:
# Convert the Beautiful Soup file to a string:

html_text = str(soup)


In [30]:
# Specify the text to be replaced in the 'index.html' file (specify the html footer here):

old_text = '\n</body>\n</html>'
new_text = '<footer><p><a style="text-decoration:none; font-family:verdana; font-size:15px; color:#403E3E" target="_blank"> <br/><br/><br/>This is an interactive map of coronavirus cases in Ontario, Canada, as reported by Public Health Units (see below for source of data). Press the play button to see a time-lapse animation of the location of cases, and use the scroll wheel on your mouse to zoom in and out of the map. Hover over specific points on the map to see more information. <br/><br/><br/> </a><a style="text-decoration:none; font-family:verdana; font-size:18px; color:#62605F" href="https://chandana.ca/" target="_blank">Home&emsp;|</a><a style="text-decoration:none; font-family:verdana; font-size:18px; color:#62605F" href="https://data.ontario.ca/dataset/confirmed-positive-cases-of-covid-19-in-ontario" target="_blank">&emsp;Data Source&emsp;|</a><a style="text-decoration:none; font-family:verdana; font-size:18px; color:#62605F" href="https://github.com/chandana-karunaratne/coronamap/blob/master/COVID_Analysis_GitHub.ipynb" target="_blank">&emsp;Source Code</a>  </p></footer></body></html>'

# Replace the old text with the new text (featuring the footer) in the html_text file:

html_text = html_text.replace(old_text, new_text)


In [31]:
# Save html_text as 'index.html' file:

with open("filepath/index.html", "w") as file:
    file.write(html_text)
    

In [32]:
# Sources:
# 1) https://data.ontario.ca/dataset/confirmed-positive-cases-of-covid-19-in-ontario
# 2) https://towardsdatascience.com/how-to-create-a-plotly-visualization-and-embed-it-on-websites-517c1a78568b
# 3) https://plotly.com/python/scatter-plots-on-maps/
# 4) https://plotly.com/python/bubble-maps/
# 5) https://plotly.com/python-api-reference/generated/plotly.express.scatter_geo.html
# 6) https://plotly.com/python/mapbox-layers/
# 7) https://www.youtube.com/watch?v=RCUrpCpGZ5o&feature=youtu.be
# 8) https://github.com/Coding-with-Adam/Dash-by-Plotly/blob/master/Plotly_Graphs/Animated_Scatter/gender_ineq.py
# 9) https://github.com/plotly/plotly.py/pull/1447/commits/b8a731ea6e0d56203833c10e90926473ed783032
# 10) https://docs.python.org/3/library/datetime.html
# 11) https://stackoverflow.com/questions/35355225/edit-and-create-html-file-using-python
# 12) https://stackoverflow.com/questions/16523939/how-to-write-and-save-html-file-in-python
# 13) https://medium.com/@Alexander_H/removing-characters-before-after-and-in-the-middle-of-strings-fb4930cce76a
# 14) https://datatofish.com/load-json-pandas-dataframe/
# 15) https://stackoverflow.com/questions/30088006/loading-a-file-with-more-than-one-line-of-json-into-pandas
# 16) https://docs.ckan.org/en/latest/maintaining/datastore.html
# 17) https://stackoverflow.com/questions/3969726/attributeerror-module-object-has-no-attribute-urlopen
