In [0]:
# Databricks Notebook: Silver -> Gold (4h resample for cross signal)
from pyspark.sql import functions as F
from pyspark.sql.window import Window

# ========== 설정 ==========
CATALOG = "demo_catalog"
SCHEMA  = "demo_schema"
SILVER  = f"{CATALOG}.{SCHEMA}.silver_charts"
GOLD_4H = f"{CATALOG}.{SCHEMA}.gold_prices_4h"
DAYS_BACK = 120   # 크로스 신호 안정화를 위해 충분한 히스토리(예: 120일)

# ========== 타깃 테이블 ==========
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {GOLD_4H} (
  symbol       STRING,
  bucket_start TIMESTAMP,
  bucket_end   TIMESTAMP,
  close_4h     DOUBLE,
  ma50_4h      DOUBLE,
  ma200_4h     DOUBLE,
  cross_signal STRING,      -- Golden Cross / Dead Cross / Neutral
  dt           DATE
) USING DELTA
PARTITIONED BY (dt)
""")

# ========== 소스 로드(15m → 4h 리샘플) ==========
src = (
  spark.table(SILVER)
       .where("open_time IS NOT NULL")
       .where("interval = '15m'")
       .where(f"dt >= date_sub(current_date(), {DAYS_BACK})")
       .select("symbol","open_time","close")
)

# 4시간 윈도우로 리샘플: 마지막 close를 4h 종가로 사용
win = F.window("open_time", "4 hours")
resampled = (
  src.groupBy("symbol", win)
     .agg(F.last("close").alias("close_4h"))
     .select(
        F.col("symbol"),
        F.col("window.start").alias("bucket_start"),
        F.col("window.end").alias("bucket_end"),
        F.col("close_4h"),
        F.col("window.start").cast("date").alias("dt")
     )
)

# 이동평균(4h 버킷 기반)
w_sym = Window.partitionBy("symbol").orderBy("bucket_start")
w50   = w_sym.rowsBetween(-49, 0)
w200  = w_sym.rowsBetween(-199, 0)

gold_4h_df = (
  resampled
    .withColumn("ma50_4h",  F.avg("close_4h").over(w50))
    .withColumn("ma200_4h", F.avg("close_4h").over(w200))
    .withColumn(
        "cross_signal",
        F.when(F.col("ma50_4h") > F.col("ma200_4h"), F.lit("Golden Cross"))
         .when(F.col("ma50_4h") < F.col("ma200_4h"), F.lit("Dead Cross"))
         .otherwise(F.lit("Neutral"))
    )
)

gold_4h_df.createOrReplaceTempView("gold4h_upserts")
spark.sql(f"""
MERGE INTO {GOLD_4H} AS t
USING gold4h_upserts AS s
ON  t.symbol = s.symbol AND t.bucket_start = s.bucket_start
WHEN MATCHED THEN UPDATE SET
  t.bucket_end   = s.bucket_end,
  t.close_4h     = s.close_4h,
  t.ma50_4h      = s.ma50_4h,
  t.ma200_4h     = s.ma200_4h,
  t.cross_signal = s.cross_signal,
  t.dt           = s.dt
WHEN NOT MATCHED THEN INSERT *
""")

print(f"[GOLD_4H] upsert complete: {GOLD_4H}")
