In [0]:
# Databricks Notebook: Silver -> Gold (4h MA & Cross + 24h change, no close_time)
from pyspark.sql import functions as F
from pyspark.sql.window import Window

# ===== 설정 =====
CATALOG = "demo_catalog"
SCHEMA  = "demo_schema"
SILVER  = f"{CATALOG}.{SCHEMA}.silver_charts"   # 4h 캔들만 사용
GOLD_4H = f"{CATALOG}.{SCHEMA}.gold_prices_4h"
DAYS_BACK = 200  # MA200 안정화용

spark.sql("SET spark.sql.session.timeZone=UTC")

# ===== 타깃 테이블 생성 =====
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {GOLD_4H} (
  symbol         STRING,
  bucket_start   TIMESTAMP,   -- 4h open_time
  close_4h       DOUBLE,
  ma50_4h        DOUBLE,
  ma200_4h       DOUBLE,
  cross_signal   STRING,      -- Golden Cross / Dead Cross / Neutral
  pct_change_24h DOUBLE,      -- 24시간 전 대비 %
  dt             DATE
) USING DELTA
PARTITIONED BY (dt)
""")

# ===== 소스 로드(4h만) =====
src = (
  spark.table(SILVER)
       .where("interval = '4h'")
       .where(f"dt >= date_sub(current_date(), {DAYS_BACK})")
       .select(
         "symbol",
         F.col("open_time").alias("bucket_start"),
         F.col("close").alias("close_4h"),
         "dt"
       )
)

# ===== 이동평균/24h 변화율 계산 =====
w = Window.partitionBy("symbol").orderBy("bucket_start")
w50   = w.rowsBetween(-49, 0)
w200  = w.rowsBetween(-199, 0)

gold_df = (
  src
    .withColumn("ma50_4h",  F.avg("close_4h").over(w50))
    .withColumn("ma200_4h", F.avg("close_4h").over(w200))
    .withColumn(
        "cross_signal",
        F.when(F.col("ma50_4h") > F.col("ma200_4h"), F.lit("Golden Cross"))
         .when(F.col("ma50_4h") < F.col("ma200_4h"), F.lit("Dead Cross"))
         .otherwise(F.lit("Neutral"))
    )
    .withColumn("close_24h_ago", F.lag("close_4h", 6).over(w))  # 6개 바 = 24시간
    .withColumn(
        "pct_change_24h",
        F.when(F.col("close_24h_ago").isNull(), F.lit(None).cast("double"))
         .otherwise((F.col("close_4h") - F.col("close_24h_ago")) / F.col("close_24h_ago") * 100.0)
    )
    .select("symbol","bucket_start","close_4h",
            "ma50_4h","ma200_4h","cross_signal","pct_change_24h","dt")
)

# ===== MERGE =====
gold_df.createOrReplaceTempView("gold4h_upserts")
spark.sql(f"""
MERGE INTO {GOLD_4H} AS t
USING gold4h_upserts AS s
ON  t.symbol = s.symbol AND t.bucket_start = s.bucket_start
WHEN MATCHED THEN UPDATE SET
  t.close_4h       = s.close_4h,
  t.ma50_4h        = s.ma50_4h,
  t.ma200_4h       = s.ma200_4h,
  t.cross_signal   = s.cross_signal,
  t.pct_change_24h = s.pct_change_24h,
  t.dt             = s.dt
WHEN NOT MATCHED THEN INSERT *
""")

print(f"[GOLD_4H] upsert complete: {GOLD_4H}")
