# OpenMeteo Weather Data - Silver Layer - AUDIT step

In [0]:
from pyspark.sql.functions import col
from datetime import datetime, timedelta, UTC

In [0]:
# Initialize widget for Date Parameter (Default to Yesterday if not provided)
default_date = (datetime.now(UTC) - timedelta(days=1)).strftime('%Y-%m-%d')
dbutils.widgets.text("processing_date", default_date, "Date (YYYY-MM-DD)")

# Get the parameter
string_date = dbutils.widgets.get("processing_date")

processing_date = datetime.strptime(string_date, '%Y-%m-%d')

print(f"Starting processing for date: {processing_date}")

In [0]:
SOURCE_CATALOG = 'workspace'
SOURCE_SCHEMA = 'silver_staging'
SOURCE_TABLE = 'openmeteo_hourly_historical'

In [0]:
table_to_audit = spark.read\
    .table(f'{SOURCE_CATALOG}.{SOURCE_SCHEMA}.{SOURCE_TABLE}')\
    .where(col('observation_date') == processing_date)

In [0]:
table_to_audit.createOrReplaceTempView("table_to_audit")

In [0]:
audit_results = spark.sql(
    """
    SELECT 
        count(*) as total_rows,
        sum(CASE WHEN temperature_2m < -90 OR temperature_2m > 60 THEN 1 ELSE 0 END) as bad_temps,
        sum(CASE WHEN cloudcover > 100 or cloudcover < 0 THEN 1 ELSE 0 END) as bad_cloudcover,
        sum(CASE WHEN observation_date IS NULL THEN 1 ELSE 0 END) as null_keys
    FROM table_to_audit
    """).collect()[0]

In [0]:
audit_passed = (
    audit_results['bad_temps'] == 0 and 
    audit_results['bad_cloudcover'] == 0 and 
    audit_results['null_keys'] == 0
)

In [0]:
if not audit_passed:
    if audit_results['bad_temps'] > 0:
        failure_msg = f"AUDIT FAILED: Found {audit_results['bad_temps']} bad temps."
    if audit_results['bad_cloudcover'] > 0:
        failure_msg = f"AUDIT FAILED: Found {audit_results['bad_cloudcover']} bad cloudcover values."
    if audit_results['null_keys'] > 0:
        failure_msg = f"AUDIT FAILED: Found {audit_results['null_keys']} null keys."
    print(failure_msg)
    raise Exception(failure_msg)
else:
    print("AUDIT PASSED: Proceeding to Publish.")