In [0]:
import json
from pathlib import Path
from layer_02_silver import edsm_silver_transform, edsm_silver_upsert
from functions import create_table_if_not_exists
from pyspark.sql.functions import col

settings = json.loads(Path("../layer_02_silver/powerPlay.json").read_text())

# These are for rescuing a table side by side without destroying the original
# settings["src_table_name"] = "edsm.bronze.table"
# settings["dst_table_name"] = "edsm.silver.table_rescue"

history = spark.sql(f"DESCRIBE HISTORY {settings['src_table_name']}")
max_version = history.agg({"version":"max"}).first()[0]
print(f"Max version: {max_version}")

spark.sql(f"DROP TABLE IF EXISTS {settings['dst_table_name']}")
dbutils.fs.rm(settings["writeStreamOptions"]["checkpointLocation"], recurse=True)
upsert = edsm_silver_upsert(spark, settings)

for version in range(0, max_version+1):
    if version == 0:
        df = spark.read.format("delta").option("versionAsOf", version).table(settings["src_table_name"])
        df = edsm_silver_transform(spark, settings, df)
        create_table_if_not_exists(spark, df, settings["dst_table_name"])
        print(f"Current version: {version}")
        continue

    prev = spark.read.format("delta").option("versionAsOf", version-1).table(settings["src_table_name"])
    cur  = spark.read.format("delta").option("versionAsOf", version).table(settings["src_table_name"])
    df   = cur.subtract(prev)

    df = edsm_silver_transform(spark, settings, df)
    upsert(df, version-1)
    print(f"Current version: {version}")