### Day 6: Delta Lake Time Travel, Versioning, and Vacuum


In [0]:
from pyspark.sql import SparkSession

spark= SparkSession.builder.appName("DeltaLakeDay6").getOrCreate()

### 1. Load Gold Table for Versioning


In [0]:
spark.sql("USE CATALOG databricks_cat")
spark.sql("USE SCHEMA gold")
df_daily_vacc = spark.table("daily_country_vaccinations")
df_daily_vacc.show(5)

+-------------+----------+-----------------+
|     location|      date|people_vaccinated|
+-------------+----------+-----------------+
|United States|2021-11-04|      2.2325143E8|
|       France|2021-08-21|      4.7987077E7|
|United States|2021-02-11|      4.1179619E7|
|        Italy|2021-08-31|      4.2819383E7|
|United States|2021-03-21|      9.0645717E7|
+-------------+----------+-----------------+
only showing top 5 rows


#### 2. View Table History


In [0]:
from delta.tables import DeltaTable

delta_table = DeltaTable.forName(spark, "databricks_cat.gold.daily_country_vaccinations")

delta_table.history().display(10,truncate=False)

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2025-05-18T07:14:25.000Z,6932417523389613,khanayub25@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,,0518-065843-2lw6m337-v2n,1.0,WriteSerializable,False,"Map(numFiles -> 1, numRemovedFiles -> 1, numRemovedBytes -> 27123, numOutputRows -> 3011, numOutputBytes -> 27123)",,Databricks-Runtime/16.3.x-photon-scala2.12
1,2025-05-18T00:37:11.000Z,6932417523389613,khanayub25@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,,0518-001711-4tx6pkuo-v2n,0.0,WriteSerializable,False,"Map(numFiles -> 1, numRemovedFiles -> 1, numRemovedBytes -> 27123, numOutputRows -> 3011, numOutputBytes -> 27123)",,Databricks-Runtime/16.3.x-photon-scala2.12
0,2025-05-18T00:25:29.000Z,6932417523389613,khanayub25@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,,0518-001711-4tx6pkuo-v2n,,WriteSerializable,False,"Map(numFiles -> 1, numRemovedFiles -> 0, numRemovedBytes -> 0, numOutputRows -> 3011, numOutputBytes -> 27123)",,Databricks-Runtime/16.3.x-photon-scala2.12


#### 3. Time Travel: Query an Older Version


In [0]:
df_old_version = spark.read.option("versionAsOf",1).table("daily_country_vaccinations")
df_old_version.show(5)

+-------------+----------+-----------------+
|     location|      date|people_vaccinated|
+-------------+----------+-----------------+
|United States|2021-11-04|      2.2325143E8|
|       France|2021-08-21|      4.7987077E7|
|United States|2021-02-11|      4.1179619E7|
|        Italy|2021-08-31|      4.2819383E7|
|United States|2021-03-21|      9.0645717E7|
+-------------+----------+-----------------+
only showing top 5 rows


#### 4. Time Travel: Query Using Timestamp

In [0]:
timestamp_str = "2025-05-17T10:00:00.000Z"  # Update as needed

df_time = spark.read.option("timsetampAsOF",timestamp_str).table("daily_country_vaccinations")
df_time.show(5)

+-------------+----------+-----------------+
|     location|      date|people_vaccinated|
+-------------+----------+-----------------+
|United States|2021-11-04|      2.2325143E8|
|       France|2021-08-21|      4.7987077E7|
|United States|2021-02-11|      4.1179619E7|
|        Italy|2021-08-31|      4.2819383E7|
|United States|2021-03-21|      9.0645717E7|
+-------------+----------+-----------------+
only showing top 5 rows


#### 5. Perform a Rollback (Restore Older Version)


In [0]:
df_old_version.write \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .mode("overwrite") \
    .saveAsTable("daily_country_vaccinations")


#### 6. VACUUM: Clean Up Old Data Files


In [0]:
delta_table.vacuum(168)
