### Incremental Data Pull (City of Calgary Traffic Data)

In [0]:
# Fetch data from the provided URL and load it into a Pandas DataFrame
url = "https://data.calgary.ca/resource/35ra-9556.json"
response = requests.get(url)
data = response.json()

df = pd.DataFrame(data)
display(df)

# Convert the Pandas DataFrame to a Spark DataFrame and create a temporary view
df_spark = spark.createDataFrame(df)
df_spark.createOrReplaceTempView("incremental_traffic")

### Merge my dataset with the existing table.

In [0]:
%sql SELECT  * FROM gshen_catalog.city_of_calgary.yyc_traffic_incidents

In [0]:
%sql
MERGE INTO gshen_catalog.city_of_calgary.yyc_traffic_incidents AS target
USING (
  SELECT
    incident_info as `Incident Info`,
    description,
    start_dt,
    modified_dt,
    quadrant,
    CAST(longitude AS DOUBLE) AS Longitude,
    CAST(latitude AS DOUBLE) AS Latitude,
    CAST(count AS BIGINT) AS Count,
    id,
    CONCAT('POINT(', CAST(point.coordinates[0] AS STRING), ' ', CAST(point.coordinates[1] AS STRING), ')') AS Point,
    TO_JSON(named_struct(
      'computed_region_kxmf_bzkv', `:@computed_region_kxmf_bzkv`,
      'computed_region_4a3i_ccfj', `:@computed_region_4a3i_ccfj`,
      'computed_region_4b54_tmc4', `:@computed_region_4b54_tmc4`
    )) AS _rescued_data
  FROM incremental_traffic
) AS source
ON target.id = source.id
WHEN MATCHED THEN
  UPDATE SET *
WHEN NOT MATCHED THEN
  INSERT *