In [None]:
import dlt
from pyspark.sql import functions as F
import pyspark.sql.types as T

**DS GOLD TABLE**

In [None]:
@dlt.table(
    comment="Gold layer: aggregated metrics for BI consumption"
)
def gold_ds():
    silver_df = dlt.readStream("silver_table")
    df_ds = (
        silver_df
        .groupBy(
            F.col("city_id"),
            F.col("name").alias("city_name"),
            F.col("country").alias("country_code"),
            F.window(F.col("local_time"), "1 hour").alias("time_window")
        )
        .agg(
            # --- Temperature Stats ---
            F.round(F.avg("temperature"), 2).alias("temp_mean"),
            F.max("temperature").alias("temp_max"),
            F.min("temperature").alias("temp_min"),
            F.round(F.stddev("temperature"), 3).alias("temp_std"),

            # --- Humidity & Pressure ---
            F.round(F.avg("humidity"), 1).alias("humidity_mean"),
            F.round(F.stddev("humidity"), 2).alias("humidity_std"),
            F.round(F.avg("pressure"), 1).alias("pressure_mean"),

            # --- Wind ---
            F.round(F.avg("windspeed"), 2).alias("wind_speed_mean"),

            # --- Totals ---
            F.sum(F.coalesce("rain_1h", F.lit(0))).alias("rain_total_mm"),
            F.round(F.avg("clouds_all"), 1).alias("clouds_avg_pct")
        )
        # Extract window start/end for proper time alignment
        .withColumn("hour_local_start", F.col("time_window.start"))
        .withColumn("hour_local_end", F.col("time_window.end"))
        .drop("time_window")
    )

    return df_ds