In [None]:
# Databricks notebook source
from datetime import datetime

def log_pipeline_event(status, message):
    spark.sql(f"""
    INSERT INTO pipeline_quality_checks VALUES (
        current_timestamp(),
        'pipeline_run',
        '{status}',
        NULL,
        '{message}'
    )
    """)

try:
    # 1. Log start
    log_pipeline_event("STARTED", f"Pipeline started at {datetime.now()}")
    # 2. Run the ETL pipeline

    # 2.1. Read data from bronze layer
    bronze_df = spark.sql("SELECT * FROM bronze_stock_data")
    # 2.2. Process data
    processed_df = bronze_df.withColumn("processed_date", current_timestamp())
    # 2.3. Write data to silver layer
    processed_df.write.mode("overwrite").saveAsTable("silver_stock_data")
    # 2.4. Run quality checks
    quality_check_df = spark.sql("""
        SELECT COUNT(*) AS record_count FROM silver_stock_data
        WHERE stock_price IS NOT NULL
    """)
    # 2.5. Write data to gold layer
    quality_check_df.write.mode("overwrite").saveAsTable("gold_stock_analysis")
    # 3. Log completion
    log_pipeline_event("COMPLETED", f"Pipeline completed at {datetime.now()}")
    # 4. Log record count
    # 4.1. Count records in gold layer
    # 4.2. Log record count
    # 4.3. Log completion
    record_count = spark.sql("SELECT COUNT(*) FROM gold_stock_analysis").collect()[0][0]
    log_pipeline_event("COMPLETED", f"Processed {record_count} records at {datetime.now()}")
    
except Exception as e:
    log_pipeline_event("FAILED", f"Error: {str(e)}")
    raise

StatementMeta(, 9f7e89ac-07c2-4283-ad48-2c252bc7b330, 7, Finished, Available, Finished)