In [None]:
from pyspark.sql import functions as F
import pyspark.sql.types as T
import dlt
from local_time.local_utils import add_local_time
from day_time.day_utils import add_daytime_flag

In [None]:
@dlt.table(
    comment="Silver layer: cleaned and enriched IoT weather data"
)
def silver_table():
    

    # Read bronze table using DLT semantics (incremental)
    bronze_df = dlt.readStream("bronze_table")

    # ------------------------------------------------------------
    # 1. Select & cast columns
    # ------------------------------------------------------------
    df_new = bronze_df.select(

        # Identity & Location
        F.sha2(F.concat_ws("_", F.col("id"), F.col("dt")), 256).alias("event_uuid"),
        F.col("id").cast(T.StringType()).alias("city_id"),
        F.col("name").cast(T.StringType()).alias("name"),
        F.col("sys.country").cast(T.StringType()).alias("country"),
        F.col("coord.lat").cast(T.DecimalType(10, 2)).alias("latitude"),
        F.col("coord.lon").cast(T.DecimalType(10, 2)).alias("longitude"),
        F.col("timezone").cast(T.IntegerType()).alias("timezone"),

        # Time Dimensions
        F.col("dt").cast(T.TimestampType()).alias("datetime"),
        F.col("sys.sunrise").cast(T.TimestampType()).alias("sunrise"),
        F.col("sys.sunset").cast(T.TimestampType()).alias("sunset"),

        # Weather Metrics â€“ Main
        F.col("main.temp").cast(T.DecimalType(10, 2)).alias("temperature"),
        F.col("main.feels_like").cast(T.DecimalType(10, 2)).alias("feels_like"),
        F.col("main.temp_min").cast(T.DecimalType(10, 2)).alias("temperature_min"),
        F.col("main.temp_max").cast(T.DecimalType(10, 2)).alias("temperature_max"),
        F.col("main.humidity").cast(T.IntegerType()).alias("humidity"),
        F.col("main.pressure").cast(T.IntegerType()).alias("pressure"),
        F.col("main.sea_level").cast(T.IntegerType()).alias("main_sea_level"),
        F.col("main.grnd_level").cast(T.IntegerType()).alias("main_grnd_level"),

        # Atmosphere
        F.col("weather")[0]["main"].cast(T.StringType()).alias("weather_main"),
        F.col("weather")[0]["description"].cast(T.StringType()).alias("weather_description"),
        F.col("weather")[0]["icon"].cast(T.StringType()).alias("weather_icon"),
        F.col("weather")[0]["id"].cast(T.IntegerType()).alias("weather_id"),
        F.col("visibility").cast(T.IntegerType()).alias("visibility"),
        F.col("clouds.all").cast(T.IntegerType()).alias("clouds_all"),
        F.col("rain.1h").cast(T.DecimalType(10, 2)).alias("rain_1h"),

        # Wind
        F.col("wind.speed").cast(T.DecimalType(10, 2)).alias("windspeed"),
        F.col("wind.gust").cast(T.DecimalType(10, 2)).alias("wind_gust"),
        F.col("wind.deg").cast(T.IntegerType()).alias("wind_deg"),

        # Technical Metadata
        F.col("base").cast(T.StringType()).alias("base"),
        F.col("cod").cast(T.IntegerType()).alias("cod"),
        F.col("sys.id").cast(T.IntegerType()).alias("sys_id"),
        F.col("sys.type").cast(T.IntegerType()).alias("sys_type"),
        F.col("ingestion_date").cast(T.DateType()).alias("ingestion_date"),

        # Ingestion Metadata
        F.struct(
            F.col("_ingest_timestamp").cast(T.TimestampType()).alias("timestamp"),
            F.col("_ingest_file_name").alias("file_name"),
            F.col("_ingest_file_path").alias("file_path"),
            F.col("_rescued_data").alias("rescued_data")
        ).alias("metadata")
    )

    # ------------------------------------------------------------
    # 2. Add local time
    # ------------------------------------------------------------
    df_local = add_local_time(
        df=df_new,
        datetime_col="datetime",
        timezone_col="timezone",
        output_col="local_time"
    )

    # ------------------------------------------------------------
    # 3. Add daytime flag
    # ------------------------------------------------------------
    df_normalized = add_daytime_flag(
        df=df_local,
        datetime_col="datetime",
        sunrise_col="sunrise",
        sunset_col="sunset",
        output_col="is_daytime"
    )

    # ------------------------------------------------------------
    # 4. Data Quality Rules
    # ------------------------------------------------------------
    rule_geo = (F.col("latitude").between(-90, 90)) & (F.col("longitude").between(-180, 180))
    rule_time = F.col("datetime").isNotNull()
    rule_temp = F.col("temperature").isNotNull()

    dq_df = (
        df_normalized
        .withColumn("is_valid_geo", rule_geo)
        .withColumn("is_valid_timestamp", rule_time)
        .withColumn("is_valid_sensor", rule_temp)
        .withColumn(
            "dq_status",
            F.when(rule_geo & rule_time & rule_temp, "PASS").otherwise("FAIL")
        )
        .withColumn(
            "failure_reason",
            F.concat_ws(
                "; ",
                F.when(~rule_geo, "INVALID_GEO"),
                F.when(~rule_time, "NULL_TIME"),
                F.when(~rule_temp, "NULL_TEMP")
            )
        )
    )

    # ------------------------------------------------------------
    # 5. Filter PASS rows only
    # ------------------------------------------------------------
    clean_df = dq_df.filter("dq_status = 'PASS'") \
        .drop("dq_status", "is_valid_geo", "is_valid_timestamp", "is_valid_sensor", "failure_reason")

    # ------------------------------------------------------------
    # 6. Final column ordering (metadata at the end)
    # ------------------------------------------------------------
    return clean_df.select(
        *[c for c in clean_df.columns if c != "metadata"],
        "metadata"
    )
