In [0]:
# Databricks Notebook: Transform Bronze (Binance) -> Silver (improved, 4h-focused)
from pyspark.sql.functions import col, from_json, split, coalesce, to_date
from pyspark.sql.types import ArrayType, StringType
from delta.tables import DeltaTable

spark.sql("SET spark.sql.session.timeZone=UTC")  # 경계 정합

DAYS_BACK = 14
CATALOG = "demo_catalog"
SCHEMA  = "demo_schema"
BRONZE  = f"{CATALOG}.{SCHEMA}.bronze_charts"
SILVER  = f"{CATALOG}.{SCHEMA}.silver_charts"

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {SILVER} (
  symbol       STRING,
  interval     STRING,
  open_time    TIMESTAMP,
  open         DOUBLE,
  high         DOUBLE,
  low          DOUBLE,
  close        DOUBLE,
  volume       DOUBLE,
  unique_key   STRING,
  event_time   TIMESTAMP,
  dt           DATE
) USING DELTA
PARTITIONED BY (dt)
""")

bronze = spark.table(BRONZE)
if DAYS_BACK is not None:
    bronze = bronze.where(f"dt >= date_sub(current_date(), {DAYS_BACK})")

# 4시간봉만 사용한다면 초기에 필터
bronze = bronze.where("interval = '4h' OR unique_key LIKE '%|4h|%'")

arr = from_json(col("raw_json"), ArrayType(StringType()))
uk  = split(col("unique_key"), "\\|")

silver_df = (
    bronze
      # 브론즈에 컬럼이 있으면 우선 사용, 없으면 uk에서 보완
      .withColumn("symbol_eff",   coalesce(col("symbol"),   uk.getItem(0)))
      .withColumn("interval_eff", coalesce(col("interval"), uk.getItem(1)))
      .withColumn("open_time",  (arr.getItem(0).cast("long")/1000).cast("timestamp"))
      .withColumn("open",       arr.getItem(1).cast("double"))
      .withColumn("high",       arr.getItem(2).cast("double"))
      .withColumn("low",        arr.getItem(3).cast("double"))
      .withColumn("close",      arr.getItem(4).cast("double"))
      .withColumn("volume",     arr.getItem(5).cast("double"))
      .withColumn("dt",         to_date(col("open_time")))             # open_time 기준으로 통일
      .selectExpr(
          "symbol_eff as symbol",
          "interval_eff as interval",
          "open_time","open","high","low","close","volume",
          "unique_key","event_time","dt"
      )
      .dropDuplicates(["unique_key"])
      .repartition("dt")
)

target = DeltaTable.forName(spark, SILVER)
(target.alias("t")
  .merge(
    silver_df.alias("s"),
    "t.unique_key = s.unique_key AND t.dt = s.dt"
  )
  .whenMatchedUpdate(set={
      "symbol":     "s.symbol",
      "interval":   "s.interval",
      "open_time":  "s.open_time",
      "open":       "s.open",
      "high":       "s.high",
      "low":        "s.low",
      "close":      "s.close",
      "volume":     "s.volume",
      "event_time": "s.event_time",
      "dt":         "s.dt"
  })
  .whenNotMatchedInsertAll()
  .execute())

print(f"[SILVER] upsert complete: {SILVER}")
