In [0]:
%sql
CREATE CATALOG IF NOT EXISTS fleetops_project;
CREATE SCHEMA IF NOT EXISTS fleetops_project.raw;
CREATE EXTERNAL VOLUME IF NOT EXISTS fleetops_project.raw.landing_zone_iot
LOCATION 's3://fleetops-landing-zone/telemetry_data';

In [0]:
from pyspark.sql.functions import col, current_timestamp

CATALOG = "fleetops_project"
SCHEMA = "bronze"
VOLUME_PATH = f"/Volumes/{CATALOG}/raw/landing_zone_iot"
DATA_SOURCE_PATH = VOLUME_PATH 

CHECKPOINT_PATH = f"{VOLUME_PATH}/_system/checkpoints"
SCHEMA_PATH     = f"{VOLUME_PATH}/_system/schema"

df_stream = (spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "json")
    .option("cloudFiles.schemaLocation", SCHEMA_PATH)
    .option("cloudFiles.useNotifications", "false")
    .option("cloudFiles.useIncrementalListing", "true") 
    .option("cloudFiles.inferColumnTypes", "true")
    .load(DATA_SOURCE_PATH))

df_bronze = (df_stream
    .select(
        "*", 
        col("_metadata.file_path").alias("_source_file"),
        col("_metadata.file_modification_time").alias("_file_time")
    )
    .withColumn("_ingested_at", current_timestamp()))

query = (df_bronze.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", CHECKPOINT_PATH)
    .option("mergeSchema", "true")
    .partitionBy("year", "month")
    .trigger(availableNow=True)
    .table(f"{CATALOG}.{SCHEMA}.iot_telemetry"))

query.awaitTermination()

print("Bronze ingestion completed successfully.")