In [0]:
import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *


In [0]:
raw_data = "/Volumes/soni/default/streaming_writes/synthetic_data_1million_events_per_second/"

In [0]:
# Bronze table: Ingest parquet files
@dlt.expect_or_drop("valid_timestamp", "event_timestamp IS NOT NULL")
@dlt.expect_or_drop("valid_site", "site_id IS NOT NULL AND site_id != ''")
@dlt.expect_or_drop("valid_angles", "angle_actual IS NOT NULL AND angle_target IS NOT NULL")
@dlt.expect_or_drop("reasonable_angle_range", "angle_actual BETWEEN -180 AND 180 AND angle_target BETWEEN -180 AND 180")
@dlt.expect_or_drop("valid_motor_temp", "motor_temp IS NOT NULL AND motor_temp BETWEEN -50 AND 150")
@dlt.table(
    name="bronze_raw_stream",
    comment="Raw tracker data ingested from parquet files",
    partition_cols=["event_date"],
    table_properties={
        "quality": "bronze",
        "pipelines.autoOptimize.managed": "true",
        "delta.enableDeletionVectors":  "true"
        
    },
    spark_conf={
    # Run this table every 30 seconds instead of the pipeline default
    "pipelines.trigger.interval": "60 seconds"
  }
)
def bronze_tracker_data():
    return spark.readStream.format("cloudFiles") \
        .option("cloudFiles.format", "parquet") \
        .load(raw_data) \
        .withColumn("event_date", col("event_timestamp").cast("date")) \
        .select("*", "_metadata") \
        .withColumn("angle_dev",  expr("angle_target - angle_actual")) \
        .withColumn("kwh_proxy",  col("irradiance") / 1000.0)  # Wh → kWh proxy

In [0]:
@dlt.table(
    name="silver_alerts",
    comment="Real-time alerts for operational issues requiring immediate attention",
    table_properties={
        "quality": "gold",
        "pipelines.autoOptimize.managed": "true"
    },
    spark_conf={
    # Run this table every instead of the pipeline default
    "pipelines.trigger.interval": "15 minutes"
  }
)
def silver_alerts():
    """
    Gold layer: Alert table for immediate operational response
    Only contains records that require attention
    """
    return (
        spark.readStream.format("delta").option("skipChangeCommits", "true").table("silver_core_metrics")
        .filter(
            (col("alert_level").isin(["CRITICAL", "FAILURE"])) |
            (col("maintenance_priority") == "HIGH") |
            (col("availability_pct") < 90) |
            (col("avg_angle_deviation_deg") > 5) |
            (col("fault_events") > 0)
        )
        .select(
            col("site_id"),
            col("window_start"),
            col("alert_level"),
            col("maintenance_priority"),
            col("availability_pct"),
            col("avg_angle_deviation_deg"),
            col("fault_events"),
            col("system_health_score"),
            col("avg_motor_temp"),
            current_timestamp().alias("alert_generated_at")
        )
        .withColumn(
            "alert_description",
            when(col("alert_level") == "FAILURE", "System failure detected - immediate attention required")
            .when(col("alert_level") == "CRITICAL", "Critical system issues detected")
            .when(col("fault_events") > 0, concat(lit("Fault events detected: "), col("fault_events")))
            .when(col("availability_pct") < 90, concat(lit("Low availability: "), col("availability_pct"), lit("%")))
            .when(col("avg_angle_deviation_deg") > 5, concat(lit("High angle deviation: "), col("avg_angle_deviation_deg"), lit("°")))
            .otherwise("General maintenance attention required")
        )
    )