### 001-setup
1. create tables for bronze layer equipment GPS data and polygon boundaries
2. create silver tables with GEOMETRY types for equipment GPS data and polygon boundaries
3. register a Unity Catalog function to densify points along a polygon boundary

In [0]:
import pandas as pd

In [0]:
# modify to your catalog and schema
catalog = "users"
schema = "david_hurley"

##### Step 1

In [0]:
# load data from csv
df_bronze_gps_data = pd.read_csv("data/equipment_gps_data.csv")

df_bronze_congestion_zone_data = pd.read_csv("data/congestion_zone_boundary_data.csv")


In [0]:
# save data to delta tables
spark_df_bronze_gps_data = spark.createDataFrame(df_bronze_gps_data).withColumnRenamed("lon", "longitude").withColumnRenamed("lat", "latitude").withColumnRenamed("trip_id", "route_id").withColumnRenamed("vehicle_id", "equipment_id")

spark_df_bronze_gps_data.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{schema}.equipment_gps_data")

spark_df_bronze_congestion_zone_data = spark.createDataFrame(df_bronze_congestion_zone_data)

spark_df_bronze_congestion_zone_data.write.format("delta").mode("overwrite").saveAsTable(f"{catalog}.{schema}.congestion_zone_polygon_data")

##### Step 2

In [0]:
# create new tables with GEOMETRY types
spark.sql(
  f"""
    CREATE OR REPLACE TABLE {catalog}.{schema}.equipment_gps_geom_data AS
    SELECT
        *,
        st_transform(st_point(longitude, latitude, 4326), 3763) as point_geom
    FROM {catalog}.{schema}.equipment_gps_data
  """
)

spark.sql(
  f"""
    CREATE OR REPLACE TABLE {catalog}.{schema}.congestion_zone_polygon_geom_data AS
    SELECT
      *,
      st_transform(st_geomfromwkt(congestion_zone_polygon, 4326), 3763) AS polygon_geom
    FROM {catalog}.{schema}.congestion_zone_polygon_data
  """
)

##### Step 3

In [0]:
# register a Unity Catalog function to densify points along a boundary
spark.sql(
  f"""
    CREATE OR REPLACE FUNCTION {catalog}.{schema}.densify_polygon_boundary(
      xmin DOUBLE, xmax DOUBLE, ymin DOUBLE, ymax DOUBLE, meter_spacing FLOAT
    )
    RETURNS ARRAY<STRUCT<x DOUBLE, y DOUBLE>>
    LANGUAGE PYTHON
    AS $$
    from pyspark.sql import Row

    def densify_boundary(xmin, xmax, ymin, ymax, meter_spacing):
      points = []

      x = xmin
      while x <= xmax:
          points.append(Row(x=x, y=ymin))
          x += meter_spacing

      y = ymin + meter_spacing
      while y <= ymax:
          points.append(Row(x=xmax, y=y))
          y += meter_spacing

      x = xmax - meter_spacing
      while x >= xmin:
          points.append(Row(x=x, y=ymax))
          x -= meter_spacing

      y = ymax - meter_spacing
      while y > ymin:
          points.append(Row(x=xmin, y=y))
          y -= meter_spacing

      return points

    return densify_boundary(xmin, xmax, ymin, ymax, meter_spacing)
    $$;
  """
)