In [0]:
from pyspark.sql import SparkSession
from delta.tables import DeltaTable
# Create a Spark session with Delta Lake support
spark = SparkSession.builder.appName("DeltaLakeDemo") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()
# Create a Delta Table
data = [(1, "Alice", 50000), (2, "Bob", 60000)]
columns = ["id", "name", "salary"]
df = spark.createDataFrame(data, columns)
df.write.format("delta").mode("overwrite").save("/mnt/delta/employee")
# Read and display the table
df_delta = spark.read.format("delta").load("/mnt/delta/employee")
df_delta.show()

+---+-----+------+
| id| name|salary|
+---+-----+------+
|  1|Alice| 50000|
|  2|  Bob| 60000|
+---+-----+------+



In [0]:
df = spark.read.format("delta").load("/mnt/delta/employee")
df = df.withColumn("salary", df.salary + 5000)
df.write.format("delta").mode("overwrite").save("/mnt/delta/employee")
df.show()

+---+-----+------+
| id| name|salary|
+---+-----+------+
|  1|Alice| 60000|
|  2|  Bob| 70000|
+---+-----+------+



In [0]:
from delta.tables import *
deltaTable = DeltaTable.forPath(spark, "/mnt/delta/employee")
deltaTable.alias("old").merge(
    spark.createDataFrame([(3, "Charlie", 70000)], ["id", "name", "salary"]).alias("new"),
    "old.id = new.id"
).whenNotMatchedInsert(values={"id": "new.id", "name": "new.name", "salary": "new.salary"}) \
.execute()
df.show()

+---+-------+------+
| id|   name|salary|
+---+-------+------+
|  3|Charlie| 70000|
|  1|  Alice| 55000|
|  2|    Bob| 65000|
+---+-------+------+



In [0]:
df = spark.read.format("delta").option("versionAsOf", 3).load("/mnt/delta/employee")
df.show()

+---+-------+------+
| id|   name|salary|
+---+-------+------+
|  3|Charlie| 70000|
|  1|  Alice| 55000|
|  2|    Bob| 65000|
+---+-------+------+



In [0]:
spark.sql("DESCRIBE HISTORY delta.`/mnt/delta/employee`").show(truncate=False)

+-------+-------------------+----------------+------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------+----+------------------+--------------------+-----------+-----------------+-------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+-----------------------------------+
|versio

In [0]:
from delta.tables import DeltaTable

# Define Delta table path
delta_table_path = "/mnt/delta/employee"

# Read the existing Delta table
df = spark.read.format("delta").load(delta_table_path)

In [0]:
# Repartition data to optimize file layout
df_repartitioned = df.repartition(4)  # Adjust partition count based on data size

# Sort the data manually by the Z-Order column (e.g., 'salary')
df_sorted = df_repartitioned.sort("salary")

# Overwrite the existing Delta table with optimized layout
df_sorted.write.format("delta").mode("overwrite").save(delta_table_path)


In [0]:
df_history = spark.sql("DESCRIBE HISTORY delta.`/mnt/delta/employee`")
df_history.show(truncate=False)


+-------+-------------------+----------------+------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------+----+------------------+--------------------+-----------+-----------------+-------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+-----------------------------------+
|versio