In [1]:
from delta import configure_spark_with_delta_pip
from pyspark.sql import SparkSession

builder = SparkSession.builder \
    .master("local[*]") \
    .appName("DeltaLakeExample") \
      
spark = configure_spark_with_delta_pip(builder).getOrCreate()

print("Spark session created successfully!")
print(f"Spark version: {spark.version}")

Spark session created successfully!
Spark version: 4.0.0


26/02/07 07:05:28 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [2]:
df = spark.createDataFrame([(1, "Starter"), (2, "Pro")], ["id", "plan"])
df.write.format("delta").mode("overwrite").save("/tmp/delta-tables/table2")
print("Delta table created successfully!")

                                                                                

Delta table created successfully!


In [3]:
# 1. Read the Delta table we created
print("=== Reading Delta Table ===")
delta_df = spark.read.format("delta").load("/tmp/delta-tables/table2")
delta_df.show()

=== Reading Delta Table ===


26/02/07 07:05:56 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+---+-------+
| id|   plan|
+---+-------+
|  1|Starter|
|  2|    Pro|
+---+-------+



In [4]:
# 2. Check table schema and metadata
print("\n=== Table Schema ===")
delta_df.printSchema()
print("\n=== Table Count ===")
print(f"Number of records: {delta_df.count()}")


=== Table Schema ===
root
 |-- id: long (nullable = true)
 |-- plan: string (nullable = true)


=== Table Count ===
Number of records: 2


In [5]:
# 4. Add more data (Append mode)
print("\n=== Appending New Data ===")
new_data = spark.createDataFrame([(3, "Enterprise"), (4, "Basic")], ["id", "plan"])
new_data.write.format("delta").mode("append").save("/tmp/delta-tables/table2")

# Read updated table
updated_df = spark.read.format("delta").load("/tmp/delta-tables/table2")
print("Updated table:")
updated_df.show()


=== Appending New Data ===
Updated table:
+---+----------+
| id|      plan|
+---+----------+
|  3|Enterprise|
|  1|   Starter|
|  4|     Basic|
|  2|       Pro|
+---+----------+



In [6]:
from delta.tables import DeltaTable

deltaTable = DeltaTable.forPath(spark, "/tmp/delta-tables/table2")

# 5. Update existing records
print("\n=== Updating Records ===")
deltaTable.update(
    condition="id = 1",
    set={"plan": "'Starter Pro'"}
)

# Read after update
print("After update:")
spark.read.format("delta").load("/tmp/delta-tables/table2").show()


=== Updating Records ===


26/02/07 07:07:17 WARN UpdateCommand: Could not validate number of records due to missing statistics.


After update:
+---+-----------+
| id|       plan|
+---+-----------+
|  1|Starter Pro|
|  3| Enterprise|
|  4|      Basic|
|  2|        Pro|
+---+-----------+



In [8]:
# 6. Delete records
print("\n=== Deleting Records ===")
deltaTable.delete("id = 4")

# Read after delete
print("After delete:")
spark.read.format("delta").load("/tmp/delta-tables/table2").show()


=== Deleting Records ===


26/02/04 06:12:33 WARN DeleteCommand: Could not validate number of records due to missing statistics.


After delete:
+---+-----------+
| id|       plan|
+---+-----------+
|  1|Starter Pro|
|  3| Enterprise|
|  2|        Pro|
+---+-----------+



In [9]:
# 7. Upsert (Merge) operation
print("\n=== Upsert/Merge Operation ===")
merge_data = spark.createDataFrame([(2, "Pro Max"), (5, "Premium")], ["id", "plan"])

deltaTable.alias("target").merge(
    merge_data.alias("source"),
    "target.id = source.id"
).whenMatchedUpdate(set={"plan": "source.plan"}).whenNotMatchedInsert(values={"id": "source.id", "plan": "source.plan"}).execute()

print("After merge:")
spark.read.format("delta").load("/tmp/delta-tables/table2").show()


=== Upsert/Merge Operation ===


26/02/04 06:12:34 WARN MapPartitionsRDD: RDD 206 was locally checkpointed, its lineage has been truncated and cannot be recomputed after unpersisting


After merge:
+---+-----------+
| id|       plan|
+---+-----------+
|  1|Starter Pro|
|  3| Enterprise|
|  2|    Pro Max|
|  5|    Premium|
+---+-----------+



In [10]:
# 8. Time travel queries
print("\n=== Time Travel Queries ===")
# Read table at version 0
print("Version 0:")
spark.read.format("delta").option("versionAsOf", 0).load("/tmp/delta-tables/table2").show()


=== Time Travel Queries ===
Version 0:
+---+-------+
| id|   plan|
+---+-------+
|  1|Starter|
|  2|    Pro|
+---+-------+



In [11]:
print("\n=== Final Table State ===")
spark.read.format("delta").load("/tmp/delta-tables/table2").show()
deltaTable.history().select("version", "timestamp", "operation").show()


=== Final Table State ===
+---+-----------+
| id|       plan|
+---+-----------+
|  1|Starter Pro|
|  3| Enterprise|
|  2|    Pro Max|
|  5|    Premium|
+---+-----------+

+-------+--------------------+---------+
|version|           timestamp|operation|
+-------+--------------------+---------+
|     13|2026-02-04 06:12:...|    MERGE|
|     12|2026-02-04 06:12:...|   DELETE|
|     11|2026-02-04 06:12:...|   UPDATE|
|     10|2026-02-04 06:12:...|    WRITE|
|      9|2026-02-04 06:12:...|    WRITE|
|      8|2026-02-04 06:10:...|    MERGE|
|      7|2026-02-04 06:10:...|   DELETE|
|      6|2026-02-04 06:10:...|   UPDATE|
|      5|2026-02-04 06:10:...|    WRITE|
|      4|2026-02-04 06:10:...|    WRITE|
|      3|2026-02-04 06:09:...|    WRITE|
|      2|2026-02-04 06:09:...|    WRITE|
|      1|2026-02-04 06:09:...|    WRITE|
|      0|2026-02-04 06:08:...|    WRITE|
+-------+--------------------+---------+

