In [0]:
from pyspark.sql.functions import col, to_timestamp, to_date
from pyspark.sql.types import IntegerType, DoubleType, LongType, TimestampType

TRIP_UPDATES_BRONZE_PATH = "dbfs:/mta_project/delta/bronze/trip_updates"
BUS_POSITIONS_BRONZE_PATH = "dbfs:/mta_project/delta/bronze/bus_positions"
ALERTS_BRONZE_PATH = "dbfs:/mta_project/delta/bronze/alerts"

TRIP_UPDATES_SILVER_CHECKPOINTS_PATH = "dbfs:/mta_project/delta/silver_checkpoints/trip_updates"
BUS_POSITIONS_SILVER_CHECKPOINTS_PATH = "dbfs:/mta_project/delta/silver_checkpoints/bus_positions"
ALERTS_SILVER_CHECKPOINTS_PATH = "dbfs:/mta_project/delta/silver_checkpoints/alerts"

CATALOG_NAME = "mta_databricks_ws" 
SCHEMA_NAME = "default"

TRIP_UPDATES_SILVER_TABLE_NAME = f"{CATALOG_NAME}.{SCHEMA_NAME}.trip_updates_silver"
BUS_POSITIONS_SILVER_TABLE_NAME = f"{CATALOG_NAME}.{SCHEMA_NAME}.bus_positions_silver"
ALERTS_SILVER_TABLE_NAME = f"{CATALOG_NAME}.{SCHEMA_NAME}.alerts_silver"

(spark.readStream.format("delta").load(TRIP_UPDATES_BRONZE_PATH)
  .withColumn("start_date", to_date(col("start_date"), "yyyyMMdd"))
  .withColumn("direction_id", col("direction_id").cast(IntegerType()))
  .withColumn("trip_delay_seconds", col("trip_delay_seconds").cast(IntegerType()))
  .withColumn("stop_sequence", col("stop_sequence").cast(IntegerType()))
  .withColumn("update_timestamp", to_timestamp(col("update_timestamp").cast(LongType())))
  .withColumn("arrival_time", to_timestamp(col("arrival_time").cast(LongType())))
  .withColumn("departure_time", to_timestamp(col("departure_time").cast(LongType())))
  .drop("kafka_timestamp", "message_type")
  .writeStream
  .format("delta")
  .outputMode("append")
  .option("checkpointLocation", TRIP_UPDATES_SILVER_CHECKPOINTS_PATH)
  .trigger(availableNow=True)
  .toTable(TRIP_UPDATES_SILVER_TABLE_NAME)
)

(spark.readStream.format("delta").load(BUS_POSITIONS_BRONZE_PATH)
  .withColumn("start_date", to_date(col("start_date"), "yyyyMMdd"))
  .withColumn("direction_id", col("direction_id").cast(IntegerType()))
  .withColumn("latitude", col("latitude").cast(DoubleType()))
  .withColumn("longitude", col("longitude").cast(DoubleType()))
  .withColumn("bearing", col("bearing").cast(DoubleType()))
  .withColumn("event_timestamp", to_timestamp(col("timestamp").cast(LongType())))
  .drop("kafka_timestamp", "message_type", "timestamp") # Also drop the original timestamp string
  .writeStream
  .format("delta")
  .outputMode("append")
  .option("checkpointLocation", BUS_POSITIONS_SILVER_CHECKPOINTS_PATH)
  .trigger(availableNow=True)
  .toTable(BUS_POSITIONS_SILVER_TABLE_NAME)
)

(spark.readStream.format("delta").load(ALERTS_BRONZE_PATH)
  .withColumn("start_time", to_timestamp(col("start_time").cast(LongType())))
  .withColumn("end_time", to_timestamp(col("end_time").cast(LongType())))
  .withColumn("trip_direction_id", col("trip_direction_id").cast(IntegerType()))
  .withColumn("route_id", coalesce(col("route_id"), col("trip_route_id")))
  .drop("kafka_timestamp", "message_type", "trip_route_id")
  .writeStream
  .format("delta")
  .outputMode("append")
  .option("checkpointLocation", ALERTS_SILVER_CHECKPOINTS_PATH)
  .trigger(availableNow=True)
  .toTable(ALERTS_SILVER_TABLE_NAME)
)

<pyspark.sql.streaming.query.StreamingQuery at 0x7f6b15cb46e0>