In [0]:
# Gold (NO WIDGETS) - first write without lookback filter
from pyspark.sql.functions import col, window, avg

CATALOG = "demo_catalog"
SCHEMA  = "demo_schema"
SILVER  = f"{CATALOG}.{SCHEMA}.silver_charts"
GOLD    = f"{CATALOG}.{SCHEMA}.gold_signals"

WINDOW_SPEC = "5 minutes"

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {GOLD} (
  bucket_start TIMESTAMP,
  bucket_end   TIMESTAMP,
  symbol       STRING,
  avg_price    DOUBLE,
  latest_signal STRING
) USING DELTA
""")

silver = spark.table(SILVER).where("open_time IS NOT NULL")

agg_df = (
    silver
    .groupBy(window(col("open_time"), WINDOW_SPEC), col("symbol"))
    .agg(avg(col("close")).alias("avg_price"))
    .select(
        col("window.start").alias("bucket_start"),
        col("window.end").alias("bucket_end"),
        col("symbol"),
        col("avg_price")
    )
    .withColumn("latest_signal", col("avg_price")*0 - col("avg_price")*0)  # NULL
)

# Upsert
agg_df.createOrReplaceTempView("gold_upserts")
spark.sql(f"""
MERGE INTO {GOLD} AS t
USING gold_upserts AS s
ON t.symbol = s.symbol AND t.bucket_start = s.bucket_start
WHEN MATCHED THEN UPDATE SET
  t.bucket_end    = s.bucket_end,
  t.avg_price     = s.avg_price,
  t.latest_signal = s.latest_signal
WHEN NOT MATCHED THEN INSERT *
""")

print("Gold aggregate complete")


In [0]:
%sql
SELECT count(*) FROM demo_catalog.demo_schema.gold_signals;
SELECT * FROM demo_catalog.demo_schema.gold_signals
ORDER BY bucket_start DESC, symbol
LIMIT 50;
