In [2]:
import pandas as pd
import requests
import urllib.parse

In [6]:
# This is the GoogleAPI Key for the geocode aspect - to convert the address into lat long for mapping
apikey = 'Insert yours here'

<h3>Data Extract</h3>

Get the csv file from the NSW Health website <br>
https://www.health.nsw.gov.au/Infectious/covid-19/Pages/case-locations-and-alerts.aspx

I used the CSV file for the 'case locations' - you can get that from
<img src="csvwebsite.PNG">

In [20]:
##I renamed the NSW Heath file to "cases{date}"
cases = pd.read_csv("cases210713.csv")

In [21]:
cases.columns = ['update','type','suburb','venue','address','exposure','advice']
cases.shape

(322, 7)

In [22]:
cases.head()

Unnamed: 0,update,type,suburb,venue,address,exposure,advice
0,12/07/2021,Get tested immediately. Self-isolate until you...,Bankstown,Big W Bankstown Bankstown Central,1 A North Terrrace,11am to 11:30am on Friday 9 July 2021,Anyone who attended this venue is a ...
1,12/07/2021,Get tested immediately. Self-isolate until you...,Bankstown,Kmart Bankstown Bankstown Central,1 North Terrrace,11am to 11:30am on Friday 9 July 2021,Anyone who attended this venue is a ...
2,12/07/2021,Get tested immediately. Self-isolate until you...,Bankstown,Pharmacy 4 Less Bankstown Central,North Terrace Shop SP247,9am to 9:15am on Friday 9 July 2021,Anyone who attended this venue is a ...
3,12/07/2021,Get tested immediately and self-isolate for 14...,Belmore,Belmore Medical Centre,481 Burwood Ave,10am to 11am on Monday 5 July 20216:30pm to 7:...,Anyone who attended this venue is a ...
4,12/07/2021,Get tested immediately and self-isolate for 14...,Bondi Junction,99 Bikes Bondi Junction,228 Oxford Street,12:45pm to 2:45pm on Saturday 10 July 2021,Anyone who attended this venue is a ...


<h3>Data Extract</h3>

Geocode the address in the CSV into lat long using the Google Mapping GeoCode API

Note you need to activate the API in the developer console

Read more here 

https://developers.google.com/maps/documentation/geocoding/overview

In [23]:
#Geocode the address in the CSV into lat long using the Google Mapping GeoCode API

def getlatlong(address,suburb):
    address = urllib.parse.quote('{},{}'.format(address,suburb))
    try:
        response = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address={},NSW,Australia&key={}'.format(address,apikey))
        resp_json_payload = response.json()
        latlong = resp_json_payload['results'][0]['geometry']['location']
    except:
        latlong = {'lat': 0, 'lng': 0}
    return latlong['lat'],latlong['lng']

# Test on one location
# getlatlong('Pennant Hills Road and Carlingford Road','Carlingford')

#Run on the entire Dataset
cases[['lat','lng']] = cases.apply(lambda x: getlatlong(x['address'], x['suburb']), axis=1, result_type="expand")

In [27]:
#Check if any entries need a manual fix
cases[cases['lat']==0]

Unnamed: 0,update,type,suburb,venue,address,exposure,advice,lat,lng


In [25]:
#Save to ensure that geocoding is not required again
cases.to_pickle("cases210713.pkl")

<h3>Data Prep</h3>

This creates the code needed for Folium to produce the maps

The raw csv is split so that each row is for each hour of each day

So an exposure site that was a Case Location between 1pm and 4.30pm will be represented as 4 rows of data

In [30]:
#split multiday entries - create a row per day
cases['temp'] = cases['exposure'].str.replace('2021','2021~').str[:-1]
print(cases.shape)
cases['tempexposure'] = cases['temp'].str.split('~')
print(cases.shape)
cases.drop('temp', axis='columns', inplace=True)
print(cases.shape)
cases = cases.explode('tempexposure')
print(cases.shape)

(322, 10)
(322, 11)
(322, 10)
(469, 10)


In [31]:
#Create time entires
cases['tempexposure'] = cases['tempexposure'].str.replace('All day', '12am to 11:59pm', regex=False)
print(cases.shape)
cases['tempexposure'] = cases['tempexposure'].str.replace(r' (midnight) ', ' ', regex=False)
print(cases.shape)
cases['tempexposure'] = cases['tempexposure'].str.replace(r' (noon) ', ' ', regex=False)
print(cases.shape)
cases[['times','date']] = cases['tempexposure'].str.split(' on ', 1, expand=True)
print(cases.shape)
cases[['times_start','times_end']] = cases['times'].str.split(' to ', 1, expand=True)
print(cases.shape)
cases['times_start'] = pd.to_datetime(cases['times_start'], errors="ignore")
cases['times_end'] = pd.to_datetime(cases['times_end'], errors="ignore")

(469, 10)
(469, 10)
(469, 10)
(469, 12)
(469, 14)


In [32]:
# Create a row per hour per site 
def getlist(x, y):
    offset = 0
    start = int(x.split(':')[0].replace('pm','').replace('am',''))
    end   = int(y.split(':')[0].replace('pm','').replace('am',''))
    if 'pm' in x:
        start = start + 12
    if 'pm' in y:
        end = end + 12
    if '12' in x:
        if 'am' in x:
            start = 0
    if '12' in y:
        if 'pm' in y:
            end = 12
    if ':' in y:
        if int(y.split(':')[1].replace('pm','').replace('am','')) > 10:
            offset = 1
    return list(range(start,end+offset))

# getlist('10:50am','12:10pm')
cases['hours'] = cases.apply(lambda x: getlist(x['times_start'], x['times_end']), axis=1)
cases = cases.explode('hours')
print(cases.shape)

(1530, 15)


In [33]:
#let's look at the data
cases.iloc[30:40]

Unnamed: 0,update,type,suburb,venue,address,exposure,advice,lat,lng,tempexposure,times,date,times_start,times_end,hours
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,8
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,9
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,10
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,11
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,12
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,13
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,14
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,15
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,16
16,12/07/2021,Get tested immediately and self-isolate for 14...,Fairfield,iMedic iCare Medical Centre,107 Ware Street,8:30am to 6:30pm on Saturday 10 July 20218:30a...,Anyone who attended this venue is a ...,-33.869817,150.954146,8:30am to 6:30pm on Saturday 10 July 2021,8:30am to 6:30pm,Saturday 10 July 2021,8:30am,6:30pm,17


In [37]:
#This is used as the index for the Timelapse maps
days = ['Sunday 20 June 2021',
'Monday 21 June 2021',
'Tuesday 22 June 2021',
'Wednesday 23 June 2021',
'Thursday 24 June 2021',
'Friday 25 June 2021',
'Saturday 26 June 2021',
'Sunday 27 June 2021',
'Monday 28 June 2021',
'Tuesday 29 June 2021',
'Wednesday 30 June 2021',
'Thursday 1 July 2021',
'Friday 2 July 2021',
'Saturday 3 July 2021',
'Sunday 4 July 2021',
'Monday 5 July 2021',
'Tuesday 6 July 2021',
'Wednesday 7 July 2021',
'Thursday 8 July 2021',
'Friday 9 July 2021',
'Saturday 10 July 2021',
'Sunday 11 July 2021',
'Monday 12 July 2021']
len(days)

23

In [42]:
# Create Timelapse map

import folium
from folium.plugins import HeatMapWithTime

geo_data = []
m=folium.Map(location=[-33.8777204, 151.0518224],zoom_start=11)
for day in days:
    df_geo = cases.loc[(cases['date'] == day)][['lat','lng']].copy()
    df_geo['weight'] = 0.4
    geo_data.append(df_geo.values.tolist())
hm = HeatMapWithTime(geo_data, index=days, auto_play=True, radius=20)
hm.add_to(m)
m

<h3>Create a gif/movie of the hourly movements</h3>

This creates a static map for each hour, and then stitches it up using PIL into a gif

As Folium only creates HTML graphs, Selenium (an automated browser) is then used to open each one and create a png out of it through a screenshot

PIL is then used to stitch the individual 

In [43]:
# Create HTML Maps

from folium.plugins import HeatMap

def createmap_day_time(iday,itime):
    day = days[iday]
    m2=folium.Map(location=[-33.8777204, 151.0518224],zoom_start=11,height='90%')
    df_cases = cases.loc[(cases['date'] == day)&(cases['hours'] == itime)].copy()
    df_geo = df_cases[['lat','lng']].copy()
    df_geo['weight'] = 0.4
    HeatMap(data=df_geo, radius=20).add_to(m2)
    title_html = f'''<h3 align="center" style="font-size:20px">
                            <b>Covid-19 Case locations at {time}:00 on {day}</b></h3>
                         '''
    m2.get_root().html.add_child(folium.Element(title_html))
    #the :02 format is used so that the order is correct otherwise GLOB orders it as 0, 1, 10, 11, ..., 2, 20, 21,... 3,4,5
    m2.save(f'maps/{iday:02}-{itime:02}.html')
    return m2
    

for day in range(0,len(days)):
    for time in range(0,24):
        createmap_day_time(day,time)


In [45]:
# Convert HTML into PNG

from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.set_window_size(1080,800)

for day in range(0,len(days)):
    for hour in range(0,24):
#         update filepath as chrome would display the URL
        file = f'file:///Z://Python//Covid%20Map//maps//{day:02}-{hour:02}.html'
        browser.get(file)
        time.sleep(2)
        browser.save_screenshot(f'maps_png//{day:02}-{hour:02}.png')

browser.close()

In [None]:
#Stitch the PNG into a GIF using PIL

from PIL import Image 
import glob

frames = []
    
# Use Glob to get a list of all the png files
images = glob.glob('maps_png/*.png')

# Loop through image files to open, resize them and append them to frames
for i in sorted(images): 
    im = Image.open(i)
    w, h = im.size
    im = im.crop((0, 0, w-50, h))
    im = im.resize((550,357),Image.ANTIALIAS)
    frames.append(im.copy())

# Save frames/ stitched images as .gif
frames[0].save('covidheatmap.gif', format='GIF', append_images=frames[1:], save_all=True,
               duration=100, loop=0)

In [None]:
!ffmpeg -i covidheatmap.gif -movflags faststart -pix_fmt yuv420p -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" video.mp4