In [0]:
from pyspark.sql import functions as F

def load_projectviews_to_delta(bronze_path, delta_table, checkpoint_path):
    """
    Reads plain text files from the specified S3 path
    and writes the data as a Delta table partitioned by the file path.
    Parameters:
    - bronze_path: str, S3 path with wildcard to read files 
    - delta_table: str, full name of the Delta table to write 
    - checkpoint_path: str,  path where the streaming job will store checkpoint information to enable fault-tolerance and exactly-once processing    
    """

    df = (
    spark.readStream
    .format("text")
    .option("path", bronze_path)
    .option("includeFileName", "true")
    .load()
    )

    df_with_path = df.withColumn("file_path", F.col("_metadata.file_path"))

    (df_with_path.writeStream
        .format("delta")
        .option("checkpointLocation", checkpoint_path)
        .partitionBy("file_path")
        .outputMode("append")
        .trigger(once=True)
        .toTable(delta_table)
    )

bronze_path = "s3://dest-wikimedia/projectviews/2025/2025-01/*"
delta_table = "projectviews.bronze.raw_projectsview" 
checkpoint_path = "/Volumes/projectviews/bronze/checkpoints"  

load_projectviews_to_delta(spark, bronze_path, delta_table, checkpoint_path)