## Environment setup

Make sure you are using DBR 17.1+

In [0]:
%pip install geopandas folium gdown h3
%restart_python

In [0]:
dbutils.widgets.text("catalog", "", "Catalog")
catalog = dbutils.widgets.get("catalog")

dbutils.widgets.text("schema", "", "Schema")
schema = dbutils.widgets.get("schema")

dbutils.widgets.text("volume", "", "Volume")
volume = dbutils.widgets.get("volume")

In [0]:
import gdown

url = 'https://drive.google.com/file/d/1WKK3v_QYSdv6nXld5mqHWN6xwNV33TM8/view?usp=drive_link'
uc_volume_path = f'/Volumes/{catalog}/{schema}/{volume}/Vicmap_Sample.zip'

gdown.download(url, uc_volume_path, quiet=False, fuzzy=True)

In [0]:
import zipfile

extract_path = f'/Volumes/{catalog}/{schema}/{volume}'

with zipfile.ZipFile(uc_volume_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

## Exploring Vicmap Transport data

In [0]:
import geopandas as gpd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from shapely import wkt
import pandas as pd

gdf_transport = gpd.read_file(f"{extract_path}/VicMap_Sample/TR_ROAD_INFRASTRUCTURE.shp")

# Convert geometry to WKT
gdf_transport['geometry'] = gdf_transport.geometry.to_wkt()  

# Convert GeoDataFrame to Pandas DataFrame
pdf_transport = pd.DataFrame(gdf_transport)  

# Create Spark DataFrame
spark_df_transport = spark.createDataFrame(pdf_transport)

# Display the Spark DataFrame
display(spark_df_transport)

(
  spark_df_transport
  .drop("CONS_MAT", "URBAN") # remove void column (spark infers all null column as void)
  .write.mode("overwrite")
  .option("overwriteSchema", "true")
  .saveAsTable(f"{catalog}.{schema}.bronze_road_infra")
)

Databricks data profile. Run in Databricks to view.

In [0]:
import geopandas as gpd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from shapely import wkt
import pandas as pd

gdf_transport = gpd.read_file(f"{extract_path}/VicMap_Sample/TR_ROAD.shp")

# Convert geometry to WKT
gdf_transport['geometry'] = gdf_transport.geometry.to_wkt()  

# Convert GeoDataFrame to Pandas DataFrame
pdf_transport = pd.DataFrame(gdf_transport)  

# Create Spark DataFrame
spark_df_transport = spark.createDataFrame(pdf_transport)

# Display the Spark DataFrame
display(spark_df_transport)

(
  spark_df_transport
  .drop("RD_SUF6", "RD_SUF7", "RDNAMEUSE7", "CONS_MAT")
  .write.mode("overwrite")
  .option("overwriteSchema", "true")
  .saveAsTable(f"{catalog}.{schema}.bronze_road")
)

Databricks data profile. Run in Databricks to view.

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW bronze_road_4326 AS
SELECT UFI, PFI, FTYPE_CODE, EZI_RDNAME, LEFT_LOC, RIGHT_LOC, FROM_UFI, TO_UFI, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326 FROM danny_catalog.vicmap_schema.bronze_road;

SELECT * FROM bronze_road_4326 LIMIT 100

In [0]:
import folium
from folium import GeoJson
import pandas as pd

# Load geom_4326 from the SQL table into a Pandas DataFrame
df_geom = spark.sql("SELECT geom_4326, EZI_RDNAME FROM bronze_road_4326 LIMIT 5000").toPandas()

# Create a base map
#m = folium.Map(location=[-37.8136, 144.9631], zoom_start=10) # Using OpenStreetMap
m = folium.Map(location=[-37.81, 144.96], zoom_start=12, tiles=None) # Using Vicmap basemap

folium.raster_layers.WmsTileLayer(
    url="https://base.maps.vic.gov.au/service?",
    layers="CARTO_WM_256",           # WMS layer name
    name="Vicmap WMS",
    fmt="image/png",
    transparent=True,
    version="1.1.1",                 # WMS version supported
    attr="Vicmap - Department of Transport and Planning, Victoria",
    overlay=True,
    control=True
).add_to(m)

# Add geometries to the map with tooltips
for _, row in df_geom.iterrows():
    geojson = folium.GeoJson(data=wkt.loads(row['geom_4326']),
                             tooltip=row['EZI_RDNAME'])
    geojson.add_to(m)

# Display the map
display(m)

In [0]:
import folium
from folium import GeoJson
from shapely import wkt

def create_vicmap_basemap(
    location=[-37.81, 144.96],
    zoom_start=12,
    show_layer_control=True,
    **wms_kwargs
):
    """
    Creates a Folium map with Vicmap WMS basemap and allows adding more elements.

    Parameters:
        location: Center of map [lat, lon]
        zoom_start: Initial zoom level
        show_layer_control: Add folium.LayerControl()?
        wms_kwargs: Additional kwargs for WmsTileLayer (for flexibility)
    
    Returns:
        folium.Map object
    """
    m = folium.Map(location=location, zoom_start=zoom_start, tiles=None)
    folium.raster_layers.WmsTileLayer(
        url="https://base.maps.vic.gov.au/service?",
        layers="CARTO_WM_256",
        name="Vicmap WMS",
        fmt="image/png",
        transparent=True,
        version="1.1.1",
        attr="Vicmap - Department of Transport and Planning, Victoria",
        overlay=True,
        control=True,
        **wms_kwargs
    ).add_to(m)
    return m

# Example use with a GeoPandas/GeoJSON DataFrame
m = create_vicmap_basemap(zoom_start=12)

# Add geometries from a pandas DataFrame (with WKT geometry)
for _, row in df_geom.iterrows():
    geojson = folium.GeoJson(data=wkt.loads(row['geom_4326']),
                             tooltip=row['EZI_RDNAME'])
    geojson.add_to(m)

# Display in notebook
display(m)


In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW bronze_road_infra_4326 AS
SELECT UFI, PFI, FTYPE_CODE, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326 FROM danny_catalog.vicmap_schema.bronze_road_infra;

SELECT * FROM bronze_road_infra_4326 LIMIT 100

In [0]:
import folium
from folium import GeoJson
import pandas as pd
from shapely import wkt

# Load geom_4326 from the SQL table into a Pandas DataFrame
df_geom = spark.sql("SELECT FTYPE_CODE, geom_4326 FROM bronze_road_infra_4326 LIMIT 5000").toPandas()

# Create a base map
m = create_vicmap_basemap(zoom_start=12)

# Add geometries to the map
for _, row in df_geom.iterrows():
    geojson = folium.GeoJson(data=wkt.loads(row['geom_4326']),
                             tooltip=row['FTYPE_CODE'])
    geojson.add_to(m)

# Display the map
display(m)

## Exploring Vicmap locality polygon

In [0]:
import geopandas as gpd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from shapely import wkt
import pandas as pd

gdf_transport = gpd.read_file(f"{extract_path}/VicMap_Sample/LOCALITY_POLYGON.shp")

# Convert geometry to WKT
gdf_transport['geometry'] = gdf_transport.geometry.to_wkt()  

# Convert GeoDataFrame to Pandas DataFrame
pdf_transport = pd.DataFrame(gdf_transport)  

# Create Spark DataFrame
spark_df_transport = spark.createDataFrame(pdf_transport)

# Display the Spark DataFrame
display(spark_df_transport)

(
  spark_df_transport
  #.drop("RD_SUF6", "RD_SUF7", "RDNAMEUSE7", "CONS_MAT")
  .write.mode("overwrite")
  .option("overwriteSchema", "true")
  .saveAsTable(f"{catalog}.{schema}.bronze_locality")
)

In [0]:
%sql
SELECT UFI, PFI, LOCALITY, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326 
FROM danny_catalog.vicmap_schema.bronze_locality
WHERE LOCALITY ILIKE '%box hill%'

In [0]:
import folium
from folium import GeoJson
import pandas as pd
from shapely import wkt

# Load geom_4326 from the SQL table into a Pandas DataFrame
df_geom = spark.sql("SELECT LOCALITY, geom_4326 FROM _sqldf LIMIT 10").toPandas()

# Create a base map
m = create_vicmap_basemap(zoom_start=12)

# Add geometries to the map
for _, row in df_geom.iterrows():
    geojson = folium.GeoJson(data=wkt.loads(row['geom_4326']),
                             tooltip=row['LOCALITY'])
    geojson.add_to(m)

# Display the map
display(m)

In [0]:
%sql
CREATE OR REPLACE TABLE danny_catalog.vicmap_schema.silver_road_infra AS 
SELECT UFI, PFI, FTYPE_CODE, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326, h3_pointash3(st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)), 9) as h3 FROM danny_catalog.vicmap_schema.bronze_road_infra

In [0]:
%sql
SELECT * FROM danny_catalog.vicmap_schema.silver_road_infra

In [0]:
%sql
CREATE OR REPLACE TABLE danny_catalog.vicmap_schema.silver_road AS
SELECT UFI, PFI, FTYPE_CODE, EZI_RDNAME, LEFT_LOC, RIGHT_LOC, FROM_UFI, TO_UFI, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326, inline(h3_tessellateaswkb(st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)), 9)) FROM danny_catalog.vicmap_schema.bronze_road

In [0]:
%sql
CREATE OR REPLACE TABLE danny_catalog.vicmap_schema.elgar_road_linestring AS
SELECT * FROM danny_catalog.vicmap_schema.silver_road
WHERE EZI_RDNAME = 'ELGAR ROAD'

In [0]:
%sql
SELECT * FROM danny_catalog.vicmap_schema.elgar_road_linestring

In [0]:
%sql 
CREATE OR REPLACE TABLE danny_catalog.vicmap_schema.box_hill_polygons AS
SELECT UFI, PFI, LOCALITY, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326, inline(h3_tessellateaswkb(st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)), 9))
FROM danny_catalog.vicmap_schema.bronze_locality
WHERE LOCALITY ILIKE '%box hill%'

## Find all the road infrastructures in the 3 Box Hill suburbs

In [0]:
%sql
SELECT sri.*
FROM danny_catalog.vicmap_schema.silver_road_infra sri
JOIN danny_catalog.vicmap_schema.box_hill_polygons bhp
ON st_contains(st_geomfromtext(bhp.geom_4326), st_geomfromtext(sri.geom_4326))

In [0]:
%sql
SELECT sri.*, bhp.geom_4326 as geom_polygon, bhp.LOCALITY FROM danny_catalog.vicmap_schema.silver_road_infra sri
JOIN danny_catalog.vicmap_schema.box_hill_polygons bhp
ON bhp.cellid = sri.h3
WHERE bhp.core OR st_contains(st_geomfromwkb(bhp.chip), st_geomfromwkt(sri.geom_4326))

In [0]:
import folium
import pandas as pd
import h3
from shapely.geometry import Polygon, Point
from shapely import wkt

# Load H3 hexagon indices and FTYPE_CODE from the SQL table into a Pandas DataFrame
df_h3 = spark.sql("SELECT FTYPE_CODE, h3_h3tostring(h3) as h3 FROM _sqldf").toPandas()

# Load points in WKT format from the SQL table into a Pandas DataFrame
df_points = spark.sql("SELECT FTYPE_CODE, geom_4326 FROM _sqldf").toPandas()

# Load polygons in WKT format from the SQL table into a Pandas DataFrame
df_polygons = spark.sql("SELECT DISTINCT geom_polygon, LOCALITY FROM _sqldf").toPandas()

# Create a base map
#m = folium.Map(location=[-37.8215, 145.126], zoom_start=13)
m = create_vicmap_basemap(zoom_start=10)

# Add H3 hexagons to the map with orange color
for _, row in df_h3.iterrows():
    hex_boundary = h3.cell_to_boundary(row['h3'])
    polygon = Polygon(hex_boundary)
    geojson = folium.GeoJson(
        data=polygon.__geo_interface__,
        tooltip=row['FTYPE_CODE'],
        style_function=lambda feature: {
            'fillColor': 'orange',
            'color': 'orange',
            'weight': 1,
            'fillOpacity': 0.5,
        }
    )
    geojson.add_to(m)

# Add polygons to the map
for _, row in df_polygons.iterrows():
    polygon = wkt.loads(row['geom_polygon'])
    geojson = folium.GeoJson(
        data=polygon.__geo_interface__,
        tooltip=row['LOCALITY'],
        style_function=lambda feature: {
            'fillColor': 'blue',
            'color': 'blue',
            'weight': 1,
            'fillOpacity': 0.3,
        }
    )
    geojson.add_to(m)

# Add points to the map as circles with different colors for different FTYPE_CODE
for _, row in df_points.iterrows():
    point = wkt.loads(row['geom_4326'])
    #color = color_map.get(row['FTYPE_CODE'], 'black')  # Default to black if FTYPE_CODE not in color_map
    folium.CircleMarker(
        location=[point.y, point.x],
        radius=0.5,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6,
        tooltip=row['FTYPE_CODE']
    ).add_to(m)

# Display the map
display(m)

## Find out all the infra on the Elgar road segment

In [0]:
%sql
SELECT 
    elgar.FTYPE_CODE as elgar_ftype_code,
    elgar.geom_4326 as elgar_road_line_geom,
    elgar.LEFT_LOC,
    silver.FTYPE_CODE as silver_ftype_code,
    silver.geom_4326 as infra_geom
FROM 
    danny_catalog.vicmap_schema.elgar_road_linestring elgar
JOIN 
    danny_catalog.vicmap_schema.silver_road_infra silver
ON 
    elgar.cellid = silver.h3

In [0]:
import folium
import pandas as pd
from shapely import wkt

# Load linestrings in WKT format from the SQL table into a Pandas DataFrame
df_linestrings = spark.sql("SELECT * FROM _sqldf").toPandas()

# Load points in WKT format from the SQL table into a Pandas DataFrame
df_points = spark.sql("SELECT * FROM _sqldf").toPandas()

# Create a base map
m = create_vicmap_basemap(zoom_start=10)

# Add linestrings to the map
for _, row in df_linestrings.iterrows():
    linestring = wkt.loads(row['elgar_road_line_geom'])
    geojson = folium.GeoJson(
        data=linestring.__geo_interface__,
        tooltip=row['LEFT_LOC'],
        style_function=lambda feature: {
            'color': 'green',
            'weight': 2,
        }
    )
    geojson.add_to(m)

# Add points to the map as circles with different colors for different FTYPE_CODE
for _, row in df_points.iterrows():
    point = wkt.loads(row['infra_geom'])
    folium.CircleMarker(
        location=[point.y, point.x],
        radius=0.5,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6,
        tooltip=row['silver_ftype_code']
    ).add_to(m)

# Display the map
display(m)

## Exploring Vicmap Address dataset

In [0]:
import geopandas as gpd
import pandas as pd

gdf = gpd.read_file(f"{extract_path}/VicMap_Sample/VMADD.gdb")
gdf['geometry'] = gdf['geometry'].astype(str)
spark_df = spark.createDataFrame(gdf)
display(spark_df)

(
  spark_df
  #.drop("CONS_MAT", "URBAN") # remove void column (spark infers all null column as void)
  .write.mode("overwrite")
  .option("overwriteSchema", "true")
  .saveAsTable(f"{catalog}.{schema}.bronze_address")
)

In [0]:
%sql
CREATE OR REPLACE TABLE danny_catalog.vicmap_schema.silver_address AS 
SELECT UFI, PFI, PROPERTY_PFI, EZI_ADDRESS, LOCALITY_NAME, POSTCODE, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326, h3_pointash3(st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)), 9) as h3 FROM danny_catalog.vicmap_schema.bronze_address

In [0]:
%sql
SELECT * FROM danny_catalog.vicmap_schema.silver_address LIMIT 10

## Exploring Victorian Flood History October 2022 Event Public dataset

https://discover.data.vic.gov.au/dataset/victorian-flood-history-october-2022-event-public

In [0]:
import geopandas as gpd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from shapely import wkt
import pandas as pd

gdf_flood = gpd.read_file(f"{extract_path}/VicMap_Sample/VIC_FLOOD_HISTORY_PUBLIC.shp")

# Convert geometry to WKT
gdf_flood['geometry'] = gdf_flood.geometry.to_wkt()  

# Convert GeoDataFrame to Pandas DataFrame
pdf_flood = pd.DataFrame(gdf_flood)  

# Create Spark DataFrame
spark_df_flood = spark.createDataFrame(pdf_flood)

# Display the Spark DataFrame
display(spark_df_flood)

(
  spark_df_flood
#  .drop("CONS_MAT", "URBAN") # remove void column (spark infers all null column as void)
  .write.mode("overwrite")
  .option("overwriteSchema", "true")
  .saveAsTable(f"{catalog}.{schema}.bronze_flooding")
)

In [0]:
%sql
CREATE OR REPLACE TABLE danny_catalog.vicmap_schema.silver_flooding AS 
SELECT SUBTYPE, INCIDENT_N, FLOOD_OBS_, SOURCE, COMMENTS, st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)) as geom_4326, inline(h3_tessellateaswkb(st_astext(st_transform(st_setsrid(st_geomfromwkt(geometry), 7899), 4326)), 7)) -- resolution 7
FROM danny_catalog.vicmap_schema.bronze_flooding

## Find out all the address in the flood zone

In [0]:
%sql
CREATE OR REPLACE TABLE danny_catalog.vicmap_schema.address_in_flood_zone AS
SELECT add.*, flo.geom_4326 as geom_polygon, flo.FLOOD_OBS_, flo.INCIDENT_N, flo.COMMENTS 
FROM danny_catalog.vicmap_schema.silver_address add
JOIN danny_catalog.vicmap_schema.silver_flooding flo
ON flo.cellid = h3_toparent(add.h3, 7) -- upscale from 9 to 7
WHERE flo.core OR st_contains(st_geomfromwkb(flo.chip), st_geomfromwkt(add.geom_4326))

In [0]:
%sql
SELECT * FROM danny_catalog.vicmap_schema.address_in_flood_zone

In [0]:
import folium
import pandas as pd
import h3
from shapely.geometry import Polygon, Point
from shapely import wkt

# Load H3 hexagon indices and FTYPE_CODE from the SQL table into a Pandas DataFrame
df_h3 = spark.sql("SELECT FLOOD_OBS_, h3_h3tostring(h3) as h3 FROM _sqldf").toPandas()

# Load points in WKT format from the SQL table into a Pandas DataFrame
df_points = spark.sql("SELECT EZI_ADDRESS, geom_4326 FROM _sqldf").toPandas()

# Load polygons in WKT format from the SQL table into a Pandas DataFrame
df_polygons = spark.sql("SELECT DISTINCT geom_polygon, FLOOD_OBS_ FROM _sqldf").toPandas()

# Create a base map
#m = folium.Map(location=[-37.8215, 145.126], zoom_start=13)
m = create_vicmap_basemap(zoom_start=10)

# Add H3 hexagons to the map with orange color
for _, row in df_h3.iterrows():
    hex_boundary = h3.cell_to_boundary(row['h3'])
    polygon = Polygon(hex_boundary)
    geojson = folium.GeoJson(
        data=polygon.__geo_interface__,
        tooltip=row['FLOOD_OBS_'],
        style_function=lambda feature: {
            'fillColor': 'orange',
            'color': 'orange',
            'weight': 1,
            'fillOpacity': 0.5,
        }
    )
    geojson.add_to(m)

# Add polygons to the map
for _, row in df_polygons.iterrows():
    polygon = wkt.loads(row['geom_polygon'])
    geojson = folium.GeoJson(
        data=polygon.__geo_interface__,
        tooltip=row['FLOOD_OBS_'],
        style_function=lambda feature: {
            'fillColor': 'blue',
            'color': 'blue',
            'weight': 1,
            'fillOpacity': 0.3,
        }
    )
    geojson.add_to(m)

# Add points to the map as circles with different colors for different FTYPE_CODE
for _, row in df_points.iterrows():
    point = wkt.loads(row['geom_4326'])
    folium.CircleMarker(
        location=[point.y, point.x],
        radius=0.5,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6,
        tooltip=row['EZI_ADDRESS']
    ).add_to(m)

# Display the map
display(m)

# Write to a HTML file in UC volume
uc_html_path = f"/Volumes/{catalog}/{schema}/{volume}/flood_map.html"
m.save(uc_html_path)

#### Download and Check out the html file in the UC volume

![](sample_flood_map.png)