In [0]:
from delta.tables import DeltaTable

delta_table = DeltaTable.forName(spark, "layers.bronze.book_daily_raw")

# Show Delta table version history
history_df = delta_table.history()
display(history_df)



In [0]:
%python
df_v2 = spark.read.format("delta").option("versionAsOf", 1).table("layers.bronze.book_daily_raw")
display(df_v2)

In [0]:
spark.read.format("delta").table("layers.bronze.book_daily_raw").orderBy("book_name","scrape_ts").display()

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, first, coalesce, lit
from pyspark.sql.window import Window
from pyspark.sql.functions import date_format,to_date

df_raw = spark.read.format("delta").table("layers.bronze.book_daily_raw")

# Create lookup table: first non-null ISBN and author per book_name 
lookup_by_name = df_raw.groupBy("book_name").agg(
    first(col("isbn"), ignorenulls=True).alias("lookup_isbn"),
    first(col("author"), ignorenulls=True).alias("lookup_author")
)

print("Lookup table by book_name:")
lookup_by_name.display()

df_filled = df_raw.join(lookup_by_name, on="book_name", how="left")

df_cleaned = df_filled.select(
    coalesce(col("isbn"), col("lookup_isbn")).alias("isbn"),
    col("book_name"),
    coalesce(col("author"), col("lookup_author")).alias("author"),
    col("source"),
    col("price"),
    col("scrape_ts"),
    to_date(col("scrape_ts")).alias("scrape_date"),
    col("url")
).orderBy("scrape_ts") 

df_for_master = df_cleaned.withColumn(
    "scrape_date", 
    to_date(col("scrape_ts"))
).orderBy("scrape_ts") 
df_for_master.display()

In [0]:
from delta.tables import DeltaTable

master_table = DeltaTable.forName(spark, "layers.silver.book_master")

master_table.alias("master").merge(
    df_cleaned.alias("daily"),
    "master.book_name = daily.book_name AND master.source = daily.source AND master.scrape_ts = daily.scrape_ts"
).whenMatchedUpdate(set={
    "price": "daily.price"
}).whenNotMatchedInsertAll().execute()

print("Merge completed!")

In [0]:
spark.read.format("delta").table("layers.silver.book_master").display()