In [None]:
!pip install pandas
!pip install geopandas
!pip install matplotlib
!pip install pydeck
!pip install beautifulsoup4
!pip install requests

import geopandas as gpd
import pandas as pd
import pydeck
import requests

from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from matplotlib import cm
from pathlib import Path
from shapely.ops import cascaded_union

In [3]:
# https://data.nsw.gov.au/search/dataset/ds-nsw-ckan-aefcde60-3b0c-4bc0-9af1-6fe652944ec2/details?q=
endpoint = "https://data.nsw.gov.au/data/api/3/action/datastore_search_sql"

two_weeks_ago = datetime.now() - timedelta(days=14)
start = two_weeks_ago.strftime("%Y-%m-%d")
params = {
  "sql": (
      f"SELECT * "
      f"FROM \"21304414-1ff1-4243-a5d2-f52778048b29\""
      f"WHERE notification_date >= '{start}'"
  ),
}

r = requests.get(endpoint, params=params)
if not r.ok:
  raise Exception(r)

covid = pd.DataFrame(r.json()['result']['records'])
covid.tail()

Unnamed: 0,_id,_full_text,notification_date,postcode,lhd_2010_code,lhd_2010_name,lga_code19,lga_name19
2192,8960,'-07':2 '-31':3 '16260':8 '2021':1 '2150':4 'c...,2021-07-31,2150,X740,Western Sydney,16260,Parramatta (C)
2193,8961,'-07':2 '-31':3 '17420':8 '2021':1 '2153':4 'h...,2021-07-31,2153,X740,Western Sydney,17420,The Hills Shire (A)
2194,8962,'-07':2 '-31':3 '14900':9 '2021':1 '2174':4 'c...,2021-07-31,2174,X710,South Western Sydney,14900,Liverpool (C)
2195,8963,'-07':2 '-31':3 '14900':9 '2021':1 '2556':4 'c...,2021-07-31,2556,X710,South Western Sydney,14900,Liverpool (C)
2196,8964,'-07':2 '-31':3 '14900':9 '2021':1 '2170':4 'c...,2021-07-31,2170,X710,South Western Sydney,14900,Liverpool (C)


In [4]:
cases = covid.postcode.value_counts()
cases.name = "cases"
cases.head()

2165    181
2145    141
2176    129
2170     93
2195     87
Name: cases, dtype: int64

In [5]:
!git clone https://github.com/tonywr71/GeoJson-Data.git

fatal: destination path 'GeoJson-Data' already exists and is not an empty directory.


In [6]:
"""
nsw_loca_2: name of region (there exist duplicates)
"""

column_names=["nsw_loca_2", "state", "postcode", "lat", "long", "bound_n", "bound_e", "bound_s", "bound_w"]
suburb = pd.read_csv("GeoJson-Data/PostcodeData-final.txt", names=column_names, index_col="nsw_loca_2")
suburb.index = suburb.index.map(lambda s: s.upper())
suburb = suburb[suburb.state == "NSW"]
suburb.head()

Unnamed: 0_level_0,state,postcode,lat,long,bound_n,bound_e,bound_s,bound_w
nsw_loca_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ABBOTSBURY,NSW,2176,-33.8709405,150.8783824,-33.8464578,150.9188397,-33.8910441,150.8424476
ABBOTSFORD,NSW,2046,-33.8566687,151.1315813,-33.8436757,151.1498009,-33.8741901,151.1116136
ABERDEEN,NSW,2336,-32.1089351,151.1869325,-31.8380762,151.3346493,-32.2372644,150.5885727
ABERMAIN,NSW,2326,-32.787791,151.4552213,-32.7302144,151.5131447,-32.8543184,151.3957588
ABERNETHY,NSW,2325,-32.9380528,151.1869325,-32.7447487,151.4713226,-33.1299019,150.8310154


In [7]:
"""
nsw_loca_2: name of region (there exist duplicates)
nsw_loca_4: if there exists a duplicate, this specifies the region postcode
"""

geo = gpd.read_file("GeoJson-Data/suburb-10-nsw.geojson")
geo.head()

geo = geo.join(suburb, on="nsw_loca_2")
uniquely_named_suburbs = (geo.nsw_loca_4.astype(str) == "None")
duplicate_suburb_match = (geo.nsw_loca_4 == geo.postcode)
geo = geo[uniquely_named_suburbs | duplicate_suburb_match]

postcode_must_exist = geo.postcode.notna()
geo = geo[postcode_must_exist]

geo = geo.join(cases, on="postcode")
geo = geo.fillna({"cases": 0})

geo.head()

Unnamed: 0,id,lc_ply_pid,dt_create,dt_retire,loc_pid,nsw_locali,nsw_loca_1,nsw_loca_2,nsw_loca_3,nsw_loca_4,nsw_loca_5,nsw_loca_6,nsw_loca_7,geometry,state,postcode,lat,long,bound_n,bound_e,bound_s,bound_w,cases
0,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.1,25113,2015-09-03,,NSW1147,2015-11-10,,CROYDON PARK,,,G,,1,"POLYGON ((151.11700 -33.89152, 151.11635 -33.8...",NSW,2133,-33.8967345,151.1082813,-33.8913554,151.117,-33.9036517,151.0890101,11.0
1,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.2,25065,2015-09-03,,NSW2557,2015-11-10,,MAYFIELD WEST,,,G,,1,"POLYGON ((151.73345 -32.87974, 151.73255 -32.8...",NSW,2304,-32.877603,151.7356433,-32.8231919,151.7943059,-32.9086939,151.6851619,0.0
2,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.3,25115,2015-09-03,,NSW797,2015-11-10,,CAMPSIE,,,G,,1,"POLYGON ((151.11002 -33.90297, 151.11062 -33.9...",NSW,2194,-33.9105784,151.1024569,-33.9017586,151.1156778,-33.9269853,151.0911132,73.0
4,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.5,24884,2015-09-03,,NSW812,2015-11-10,,CANTERBURY,,,G,,1,"POLYGON ((151.12351 -33.90672, 151.12596 -33.9...",NSW,2193,-33.909932,151.1199309,-33.8955206,151.1349917,-33.9233776,151.1082281,23.0
5,ckan_91e70237_d9d1_4719_a82f_e71b811154c6.6,24903,2015-09-03,,NSW1012,2015-11-10,,CONCORD,,,G,,1,"POLYGON ((151.10366 -33.84280, 151.10457 -33.8...",NSW,2137,-33.8543494,151.1024569,-33.8344387,151.1218355,-33.8696826,151.0826747,1.0


In [8]:
# A region can have many postcodes: for example, Strathfield has 2129, 2135
# A postcode may have many regions: for example, 2165 has Fairfield, Fairfield West, Fairfield Heights, Fairfield East
pid = geo.groupby("postcode").aggregate({
  "nsw_loca_2": tuple,
  "geometry": cascaded_union,
  "lat": "first",
  "long": "first",
  "cases": "first",
})

pid.cases = pid.cases.astype(int)
pid.lat = pid.lat.astype(float)
pid.long = pid.long.astype(float)

pid = gpd.GeoDataFrame(pid, crs=geo.crs)

In [17]:
def get_label(row: pd.Series) -> str:
  if row.cases == 0:
    return ""
  return str(row.cases)


def get_suburbs(row: pd.Series) -> str:
  return ', '.join(s.title() for s in row.nsw_loca_2)


pid["postcode"] = pid.index
pid["suburbs"] = pid.apply(get_suburbs, axis=1)
pid["label"] = pid.apply(get_label, axis=1)

# https://matplotlib.org/stable/tutorials/colors/colormaps.html#sphx-glr-tutorials-colors-colormaps-py
colour_map = cm.get_cmap('YlOrRd')
normalised_cases = pid.cases / pid.cases.max()
pid[["r", "g", "b", "a"]] = colour_map(normalised_cases, alpha=0.3, bytes=True)

pid.head()

Unnamed: 0_level_0,nsw_loca_2,geometry,lat,long,cases,label,r,g,b,a,postcode,suburbs
postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000,"(THE ROCKS, MILLERS POINT, SYDNEY, DAWES POINT...","POLYGON ((151.20912 -33.87970, 151.20873 -33.8...",-33.870846,151.20733,2,2.0,255,253,201,76,2000,"The Rocks, Millers Point, Sydney, Dawes Point,..."
2006,"(CAMPERDOWN,)","POLYGON ((151.18748 -33.88472, 151.19057 -33.8...",-33.887766,151.188389,0,,255,255,204,76,2006,Camperdown
2007,"(ULTIMO,)","POLYGON ((151.19973 -33.87751, 151.20079 -33.8...",-33.882319,151.197131,0,,255,255,204,76,2007,Ultimo
2008,"(CHIPPENDALE, DARLINGTON)","POLYGON ((151.19882 -33.88862, 151.19761 -33.8...",-33.887887,151.196403,0,,255,255,204,76,2008,"Chippendale, Darlington"
2009,"(PYRMONT,)","POLYGON ((151.19807 -33.86859, 151.19724 -33.8...",-33.868789,151.194217,4,4.0,255,252,197,76,2009,Pyrmont


In [None]:
# https://deck.gl/docs/api-reference/layers/geojson-layer
suburb_layer = pydeck.Layer(
  "GeoJsonLayer",
  pid,
  pickable=True,
  stroked=True,
  get_line_width=20,
  get_line_color=[0, 0, 0, 100],
  get_fill_color="[r, g, b, a]",
)

# https://deck.gl/docs/api-reference/layers/text-layer
text_layer = pydeck.Layer(
  "TextLayer",
  pid,
  get_position="[long, lat]",
  get_text="label",
  get_color=[0, 0, 0, 180],
)

sydney = -33.86785, 151.20732
view_state = pydeck.ViewState(
  latitude=sydney[0],
  longitude=sydney[1],
  zoom=11,
  pitch=0,
  bearing=0,
)

r = pydeck.Deck(
  layers=[suburb_layer, text_layer],
  initial_view_state=view_state,
  tooltip={
      "html": (
          "<b>Reported cases over the last 14 days</b>: {cases}<br>"
          "<b>Postcode</b>: {postcode}<br>"
          "<b>Suburb</b>: {suburbs}<br>"
      )
  },
  map_style="light",
)
outpath = Path('./public/index.html')
Path('./public').mkdir(exist_ok=True)
outpath = r.to_html(outpath)

In [57]:
with open(outpath) as f:
  soup = BeautifulSoup(f)

soup.title.string = "COVID Heatmap NSW"

with open(outpath, 'w') as f:
  f.write(str(soup))