In [0]:
# Silver (NO WIDGETS) - JSON 배열 파싱 버전
from pyspark.sql.functions import col, from_json, split
from pyspark.sql.types import ArrayType, StringType

CATALOG = "demo_catalog"
SCHEMA  = "demo_schema"
BRONZE  = f"{CATALOG}.{SCHEMA}.bronze_charts"
SILVER  = f"{CATALOG}.{SCHEMA}.silver_charts"

spark.sql(f"""
CREATE TABLE IF NOT EXISTS {SILVER} (
  symbol       STRING,
  interval     STRING,
  open_time    TIMESTAMP,
  open         DOUBLE,
  high         DOUBLE,
  low          DOUBLE,
  close        DOUBLE,
  volume       DOUBLE,
  unique_key   STRING,
  event_time   TIMESTAMP,
  dt           DATE
) USING DELTA
PARTITIONED BY (dt)
""")

bronze = spark.table(BRONZE)

# 1) raw_json을 "배열"로 파싱
arr = from_json(col("raw_json"), ArrayType(StringType()))

# 2) unique_key: "symbol|interval|open_ms"
uk = split(col("unique_key"), "\\|")

silver = (bronze
  .withColumn("symbol",   uk.getItem(0))
  .withColumn("interval", uk.getItem(1))
  .withColumn("open_time",  (arr.getItem(0).cast("long")/1000).cast("timestamp"))
  .withColumn("open",   arr.getItem(1).cast("double"))
  .withColumn("high",   arr.getItem(2).cast("double"))
  .withColumn("low",    arr.getItem(3).cast("double"))
  .withColumn("close",  arr.getItem(4).cast("double"))
  .withColumn("volume", arr.getItem(5).cast("double"))
  .withColumn("dt", col("event_time").cast("date"))
  .select("symbol","interval","open_time","open","high","low","close","volume",
          "unique_key","event_time","dt")
  .dropDuplicates(["unique_key"])
)

(silver.write
  .format("delta")
  .mode("append")
  .saveAsTable(SILVER))

print(f"Silver transform (array parser) complete: wrote to {SILVER}")


In [0]:
%sql
select * FROM demo_catalog.demo_schema.silver_charts