In [0]:
def get_bronze_table_names():
    bronze_tables_df = spark.sql("""
        SELECT table_name 
        FROM tracking_table 
        WHERE layer = 'Bronze'
    """)
    return [row.table_name for row in bronze_tables_df.collect()]

In [0]:
def dq_checks_bronze(input_df):
    failure_log = []
    
    try:
        # Check for null values in critical columns
        for col_name in critical_columns:
            null_count = input_df.filter(col(col_name).isNull()).count()
            if null_count > 0:
                failure_log.append((col_name, "Null values found"))
        
        # Check for duplicates
        duplicates = input_df.groupBy(critical_columns).count().filter("count > 1").collect()
        for row in duplicates:
            failure_log.append((row[0], "Duplicate records found"))

        # Log failures to the dq_failure_log table
        if failure_log:
            for record_id, reason in failure_log:
                spark.sql(f"""
                    INSERT INTO dq_failure_log (layer, record_id, failure_reason)
                    VALUES ('Bronze', '{record_id}', '{reason}')
                """)
    
    except Exception as e:
        # Log exceptions to the dq_failure_log table
        spark.sql(f"""
            INSERT INTO dq_failure_log (layer, record_id, failure_reason)
            VALUES ('Bronze', NULL, 'Exception: {str(e)}')
        """)
    
    return failure_log

In [0]:
def log_to_table(table_name, layer, log_message, error_flag=False):
    timestamp = spark.sql("SELECT current_timestamp()").collect()[0][0]
    spark.sql(f"""
        INSERT INTO logging_table (table_name, layer, log_message, log_timestamp, error_flag)
        VALUES ('{table_name}', '{layer}', '{log_message}', '{timestamp}', {error_flag})
    """)

In [0]:
try:
    # Get the list of bronze table names
    bronze_table_names = get_bronze_table_names()
        
    for table_name in bronze_table_names:
        # Load the DataFrame for the current bronze table
        input_df = spark.table(table_name)

        # Execute the bronze data quality checks
        dq_checks_bronze(input_df)
except Exception as e:
    log_to_table("Bronze DQ check Execution", "Bronze", f"Error during bronze table operations: {str(e)}", error_flag=True)
    print(f"Error during bronze table operations: {str(e)}")