In [0]:
# -------------------------------
# Logging setup (self-contained)
# -------------------------------

from pyspark.sql.types import *
from pyspark.sql.functions import current_timestamp
import uuid

LOG_PATH = "/FileStore/project/logs/pipeline_logs"

log_schema = StructType([
    StructField("run_id", StringType()),
    StructField("pipeline_layer", StringType()),
    StructField("notebook_name", StringType()),
    StructField("event_type", StringType()),   # START / END / ERROR / REJECTED
    StructField("record_count", LongType()),
    StructField("status", StringType()),       # RUNNING / SUCCESS / FAILED
    StructField("error_message", StringType()),
    StructField("event_timestamp", TimestampType())
])

# Create logging table if it does not exist
if not spark._jsparkSession.catalog().tableExists("delta.`/FileStore/project/logs/pipeline_logs`"):
    spark.createDataFrame([], log_schema) \
        .write.format("delta") \
        .mode("overwrite") \
        .save(LOG_PATH)

def log_event(layer, notebook, event_type, record_count, status, error_msg=None):

    row = [(
        str(uuid.uuid4()),
        layer,
        notebook,
        event_type,
        int(record_count),
        status,
        error_msg,          # can be None safely now
        None                # placeholder for timestamp
    )]

    df = spark.createDataFrame(row, schema=log_schema) \
              .withColumn("event_timestamp", current_timestamp())

    df.write.format("delta").mode("append").save(LOG_PATH)



In [0]:
# Task 5.1: Start MERGE logic

from delta.tables import DeltaTable

notebook_name = "silver_merge"
log_event("silver", notebook_name, "START", 0, "RUNNING")

silver_delta = DeltaTable.forPath(
    spark, "/FileStore/project/silver/sales"
)


In [0]:
# Task 5.2: Perform MERGE (UPSERT)

try:
    silver_delta.alias("t").merge(
        source=valid_sales.alias("s"),
        condition="t.transaction_id = s.transaction_id"
    ).whenMatchedUpdateAll() \
     .whenNotMatchedInsertAll() \
     .execute()

    log_event("silver", notebook_name, "END",
              valid_sales.count(), "SUCCESS")

except Exception as e:
    log_event("silver", notebook_name, "ERROR", 0, "FAILED", str(e))
    raise


[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
File [0;32m<command-4725766364173296>, line 5[0m
[1;32m      1[0m [38;5;66;03m# Task 5.2: Perform MERGE (UPSERT)[39;00m
[1;32m      3[0m [38;5;28;01mtry[39;00m:
[1;32m      4[0m     silver_delta[38;5;241m.[39malias([38;5;124m"[39m[38;5;124mt[39m[38;5;124m"[39m)[38;5;241m.[39mmerge(
[0;32m----> 5[0m         source[38;5;241m=[39mvalid_sales[38;5;241m.[39malias([38;5;124m"[39m[38;5;124ms[39m[38;5;124m"[39m),
[1;32m      6[0m         condition[38;5;241m=[39m[38;5;124m"[39m[38;5;124mt.transaction_id = s.transaction_id[39m[38;5;124m"[39m
[1;32m      7[0m     )[38;5;241m.[39mwhenMatchedUpdateAll() \
[1;32m      8[0m      [38;5;241m.[39mwhenNotMatchedInsertAll() \
[1;32m      9[0m      [38;5;241m.[39mexecute()
[1;32m     11[0m     log_event([38;5;124m"[39m[38;5;1