In [0]:
# Databricks Notebook: Transform Bronze (Futures Leaderboard Positions) -> Silver (with meta cols)
from pyspark.sql.functions import col
from delta.tables import DeltaTable

CATALOG = "demo_catalog"
SCHEMA  = "demo_schema"
BRONZE  = f"{CATALOG}.{SCHEMA}.bronze_futures_leaderboard_positions"
SILVER  = f"{CATALOG}.{SCHEMA}.silver_futures_positions"

# =================================
# (A) Silver 테이블 생성(메타 컬럼 포함)
# =================================
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {SILVER} (
  source        STRING,
  endpoint_name STRING,
  account_label STRING,
  uid        STRING,
  symbol     STRING,
  unique_key STRING,
  entryPrice DOUBLE,
  markPrice  DOUBLE,
  pnl        DOUBLE,
  roe        DOUBLE,
  amount     DOUBLE,
  leverage   DOUBLE,
  yellow     BOOLEAN,
  tradeBefore BOOLEAN,
  update_ts  BIGINT,
  event_time  TIMESTAMP,
  ingest_time TIMESTAMP,
  dt          DATE
) USING DELTA
PARTITIONED BY (dt)
""")

# =================================
# (B) Bronze 로드 & 컬럼 셀렉트/정제
# =================================
bronze = spark.table(BRONZE)

silver_df = (
    bronze.select(
        col("source"),
        col("endpoint_name"),
        col("account_label"),
        col("uid"),
        col("symbol"),
        col("unique_key"),
        col("entryPrice").cast("double"),
        col("markPrice").cast("double"),
        col("pnl").cast("double"),
        col("roe").cast("double"),
        col("amount").cast("double"),
        col("leverage").cast("double"),
        col("yellow").cast("boolean"),
        col("tradeBefore").cast("boolean"),
        col("update_ts").cast("long"),
        col("event_time").cast("timestamp"),
        col("ingest_time").cast("timestamp"),
        col("dt").cast("date")
    )
    .dropDuplicates(["unique_key"])   # 멱등/중복 제거
    .repartition("dt", "symbol")      # 파티션/프루닝 효율
)

# =================================
# (C) Delta MERGE (upsert)
#   - 동일 dt 파티션 내 unique_key 기준으로 업서트
# =================================
target = DeltaTable.forName(spark, SILVER)

(target.alias("t")
  .merge(
      silver_df.alias("s"),
      "t.unique_key = s.unique_key AND t.dt = s.dt"
  )
  .whenMatchedUpdateAll()
  .whenNotMatchedInsertAll()
  .execute()
)

print(f"Silver transform complete: upserted into {SILVER}")
