#Day 10 of 14 Days Databricks Challenge

In [0]:

from datetime import datetime
import time

def log(msg):
    print(f"[{datetime.now()}] ðŸ”¹ {msg}")


In [0]:
log("Analyzing query execution plan for purchase events")

query = "SELECT * FROM default.silver_events WHERE event_type='purchase'"

log("Running EXPLAIN (extended)")
spark.sql(query).explain(True)

log("Explain plan generated successfully")


In [0]:
display(spark.sql("SELECT * FROM default.silver_events LIMIT 10"))

In [0]:
log("Creating partitioned Silver table with derived event_date")

spark.sql("""
  CREATE TABLE IF NOT EXISTS default.silver_events_part
  USING DELTA
  PARTITIONED BY (event_date, event_type)
  AS
  SELECT
    *,
    CAST(event_time AS DATE) AS event_date
  FROM default.silver_events
""")

log("Partitioned table default.silver_events_part created successfully")


In [0]:
log("Checking partition columns")

spark.sql("""
  DESCRIBE DETAIL default.silver_events_part
""").show(truncate=False)


In [0]:
log("Listing partitions")

spark.sql("""
  SHOW PARTITIONS default.silver_events_part
""").show(10, truncate=False)


In [0]:
log("Running OPTIMIZE with ZORDER on partitioned table")

spark.sql("""
  OPTIMIZE default.silver_events_part
  ZORDER BY (user_id, product_id)
""")

log("OPTIMIZE + ZORDER completed")


In [0]:
log("Benchmarking unpartitioned table")

import time
start = time.time()

spark.sql("""
  SELECT * FROM default.silver_events
  WHERE event_type = 'purchase'
""").count()

log(f"Unpartitioned query time: {time.time() - start:.2f} seconds")


In [0]:
log("Benchmarking partitioned table")

start = time.time()

spark.sql("""
  SELECT * FROM default.silver_events_part
  WHERE event_date = '2019-10-17'
    AND event_type = 'purchase'
""").count()

log(f"Partitioned query time: {time.time() - start:.2f} seconds")


In [0]:
log("Explain plan for unpartitioned table")
spark.sql("""
  SELECT * FROM default.silver_events
  WHERE event_type = 'purchase'
""").explain(True)


In [0]:
log("Explain plan for partitioned table")
spark.sql("""
  SELECT * FROM default.silver_events_part
  WHERE event_date = '2019-10-17'
    AND event_type = 'purchase'
""").explain(True)
