#Delta Lake Protocol Versioning

###How Delta Lake Protocol Versioning Works?
Every Delta table stores its protocol version in the **_delta_log/** transaction log. This version determines which features the table supports and which Databricks Runtime/Spark versions can read/write it.

###Compatibility Rules
Delta Lake is backwards compatible - newer versions can always read/write older tables. However, enabling some features breaks forward compatibility with older versions 

In [0]:
# SETUP:
# 1. Create TWO clusters with different Databricks Runtime versions:
#    - Cluster A: Runtime 11.3 LTS (older)
#    - Cluster B: Runtime 13.3 LTS or higher (newer)
# 2. Run steps sequentially as indicated

In [0]:
# ========================================
# STEP 1: Create table on OLD cluster (Runtime 11.3)
# Run this on Cluster A (Runtime 11.3 LTS)
# ========================================

# Create a simple test table
TABLE_NAME = "hive_metastore.default.protocol_test_table"

In [0]:
# Delta Lake Protocol Breaking Change Demonstration

print("=" * 60)
print("PROTOCOL BREAKING CHANGE TEST")
print("=" * 60)

from pyspark.sql import SparkSession
from pyspark.sql.types import *

print(f"\n[STEP 1] Creating table on OLD Runtime cluster...")
print(f"Table: {TABLE_NAME}")

# Create initial data
data = [
    (1, "Alice", 100),
    (2, "Bob", 200),
    (3, "Charlie", 300)
]

schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("name", StringType(), True),
    StructField("amount", IntegerType(), True)
])

df = spark.createDataFrame(data, schema)

# Write as Delta table
df.write.format("delta").mode("overwrite").saveAsTable(TABLE_NAME)

print("✅ Table created successfully")

# Check the protocol version
print("\n[PROTOCOL CHECK] Initial protocol version:")
protocol_info = spark.sql(f"DESCRIBE DETAIL {TABLE_NAME}").select("minReaderVersion", "minWriterVersion").collect()[0]
print(f"  minReaderVersion: {protocol_info['minReaderVersion']}")
print(f"  minWriterVersion: {protocol_info['minWriterVersion']}")

# Verify read/write works
print("\n[TEST] Can read from table:")
spark.sql(f"SELECT * FROM {TABLE_NAME}").show()

print("\n[TEST] Can write to table:")
spark.sql(f"INSERT INTO {TABLE_NAME} VALUES (4, 'David', 400)")
print("✅ Write successful on Runtime 11.3")

print("\n" + "=" * 60)
print("STEP 1 COMPLETE - Now switch to Cluster B (Runtime 13.3+)")
print("=" * 60)

PROTOCOL BREAKING CHANGE TEST

[STEP 1] Creating table on OLD Runtime cluster...
Table: hive_metastore.default.protocol_test_table
✅ Table created successfully

[PROTOCOL CHECK] Initial protocol version:
  minReaderVersion: 1
  minWriterVersion: 2

[TEST] Can read from table:
+---+-------+------+
| id|   name|amount|
+---+-------+------+
|  3|Charlie|   300|
|  1|  Alice|   100|
|  2|    Bob|   200|
+---+-------+------+


[TEST] Can write to table:
✅ Write successful on Runtime 11.3

STEP 1 COMPLETE - Now switch to Cluster B (Runtime 13.3+)


In [0]:
# ========================================
# STEP 2: Enable feature (enableDeletionVectors) that breaks compatibility
# Run this on Cluster B (Runtime 13.3 LTS or higher)
# ========================================

# Create a simple test table
TABLE_NAME = "hive_metastore.default.protocol_test_table"

In [0]:
print(f"\n[STEP 2] Running on NEWER Runtime cluster...")

# First, verify we can still access the table
print("\n[TEST] Verify table access before upgrade:")
current_count = spark.sql(f"SELECT COUNT(*) as cnt FROM {TABLE_NAME}").collect()[0]['cnt']
print(f"  Current row count: {current_count}")

# Check current protocol
print("\n[PROTOCOL CHECK] Before enabling deletion vectors:")
protocol_before = spark.sql(f"DESCRIBE DETAIL {TABLE_NAME}").select("minReaderVersion", "minWriterVersion").collect()[0]
print(f"  minReaderVersion: {protocol_before['minReaderVersion']}")
print(f"  minWriterVersion: {protocol_before['minWriterVersion']}")

# NOW BREAK COMPATIBILITY by enabling deletion vectors
print("\n[BREAKING CHANGE] Enabling deletion vectors...")
spark.sql(f"""
ALTER TABLE {TABLE_NAME} 
SET TBLPROPERTIES ('delta.enableDeletionVectors' = 'true')
""")

print("✅ Deletion vectors enabled")

# Check protocol after upgrade
print("\n[PROTOCOL CHECK] After enabling deletion vectors:")
protocol_after = spark.sql(f"DESCRIBE DETAIL {TABLE_NAME}").select("minReaderVersion", "minWriterVersion").collect()[0]
print(f"  minReaderVersion: {protocol_after['minReaderVersion']}")
print(f"  minWriterVersion: {protocol_after['minWriterVersion']}")

print(f"\n🚨 PROTOCOL UPGRADED FROM ({protocol_before['minReaderVersion']},{protocol_before['minWriterVersion']}) TO ({protocol_after['minReaderVersion']},{protocol_after['minWriterVersion']})")

# Perform a DELETE operation (creates deletion vectors)
print("\n[TEST] Performing DELETE operation to create deletion vectors:")
spark.sql(f"DELETE FROM {TABLE_NAME} WHERE id = 1")
print("✅ DELETE successful on Runtime 13.3+")

# Show table contents
print("\n[TEST] Table contents after delete:")
spark.sql(f"SELECT * FROM {TABLE_NAME}").show()

print("\n" + "=" * 60)
print("STEP 2 COMPLETE - Now switch BACK to Cluster A (Runtime 11.3)")
print("=" * 60)



[STEP 2] Running on NEWER Runtime cluster...

[TEST] Verify table access before upgrade:
  Current row count: 4

[PROTOCOL CHECK] Before enabling deletion vectors:
  minReaderVersion: 1
  minWriterVersion: 2

[BREAKING CHANGE] Enabling deletion vectors...
✅ Deletion vectors enabled

[PROTOCOL CHECK] After enabling deletion vectors:
  minReaderVersion: 3
  minWriterVersion: 7

🚨 PROTOCOL UPGRADED FROM (1,2) TO (3,7)

[TEST] Performing DELETE operation to create deletion vectors:
✅ DELETE successful on Runtime 13.3+

[TEST] Table contents after delete:
+---+-------+------+
| id|   name|amount|
+---+-------+------+
|  3|Charlie|   300|
|  4|  David|   400|
|  2|    Bob|   200|
+---+-------+------+


STEP 2 COMPLETE - Now switch BACK to Cluster A (Runtime 11.3)


In [0]:
# ========================================
# STEP 3: Run this on Cluster A (Runtime 11.3 LTS)
# ========================================

# Create a simple test table
TABLE_NAME = "hive_metastore.default.protocol_test_table"

In [0]:
print(f"\n[STEP 3] Attempting to access table from OLD Runtime cluster...")

try:
    print("\n[TEST] Trying to READ from table:")
    result = spark.sql(f"SELECT * FROM {TABLE_NAME}")
    result.show()
    print("❌ UNEXPECTED: Read should have failed but didn't")
    
except Exception as e:
    error_msg = str(e)
    print(f"\n🚨 READ FAILED (Expected):")
    print(f"   Error: {error_msg[:200]}...")
    if "protocol" in error_msg.lower() or "version" in error_msg.lower():
        print("   ✅ Confirmed: Protocol version incompatibility!")

try:
    print("\n[TEST] Trying to WRITE to table:")
    spark.sql(f"INSERT INTO {TABLE_NAME} VALUES (5, 'Eve', 500)")
    print("❌ UNEXPECTED: Write should have failed but didn't")
    
except Exception as e:
    error_msg = str(e)
    print(f"\n🚨 WRITE FAILED (Expected):")
    print(f"   Error: {error_msg[:200]}...")
    if "protocol" in error_msg.lower() or "version" in error_msg.lower():
        print("   ✅ Confirmed: Protocol version incompatibility!")

print("\n" + "=" * 60)
print("TEST COMPLETE")
print("=" * 60)

print("""
SUMMARY:
1. Created table on Runtime 11.3 with protocol (1,2)
2. Upgraded protocol to (3,7) by enabling deletion vectors on Runtime 13.3+
3. Runtime 11.3 can NO LONGER access the table

This demonstrates the IRREVERSIBLE nature of protocol upgrades and 
the compatibility issues between different Runtime versions.

SNOWFLAKE COMPARISON:
- No version management needed
- All features available to all users immediately
- No risk of breaking existing queries/jobs
""")



[STEP 3] Attempting to access table from OLD Runtime cluster...

[TEST] Trying to READ from table:

🚨 READ FAILED (Expected):
   Error: An error occurred while calling o368.sql.
: com.databricks.sql.transaction.tahoe.DeltaTableFeatureException: Unable to read this table because it requires reader table feature(s) that are unsupported ...
   ✅ Confirmed: Protocol version incompatibility!

[TEST] Trying to WRITE to table:

🚨 WRITE FAILED (Expected):
   Error: An error occurred while calling o368.sql.
: com.databricks.sql.transaction.tahoe.DeltaTableFeatureException: Unable to read this table because it requires reader table feature(s) that are unsupported ...
   ✅ Confirmed: Protocol version incompatibility!

TEST COMPLETE

SUMMARY:
1. Created table on Runtime 11.3 with protocol (1,2)
2. Upgraded protocol to (3,7) by enabling deletion vectors on Runtime 13.3+
3. Runtime 11.3 can NO LONGER access the table

This demonstrates the IRREVERSIBLE nature of protocol upgrades and 
the compatibi

In [0]:
print("\n" + "=" * 60)
print("BONUS: Inspect Delta Transaction Log")
print("=" * 60)

# Get table location
table_location = spark.sql(f"DESCRIBE DETAIL {TABLE_NAME}").select("location").collect()[0][0]
print(f"\nTable location: {table_location}")
print(f"Transaction log: {table_location}/_delta_log/")

# List log files
print("\n[LOG FILES]")
log_files = dbutils.fs.ls(f"{table_location}/_delta_log/")
for f in sorted(log_files, key=lambda x: x.name):
    if f.name.endswith('.json'):
        print(f"  {f.name}")

# Read the latest commit to see protocol
print("\n[LATEST PROTOCOL IN LOG]")
latest_log = sorted([f.name for f in log_files if f.name.endswith('.json')])[-1]
log_content = spark.read.json(f"{table_location}/_delta_log/{latest_log}")

# Show protocol information
protocol_rows = log_content.filter("protocol is not null").select("protocol.*")
if protocol_rows.count() > 0:
    print("Protocol information from transaction log:")
    protocol_rows.show(truncate=False)


BONUS: Inspect Delta Transaction Log


[0;31m---------------------------------------------------------------------------[0m
[0;31mPy4JJavaError[0m                             Traceback (most recent call last)
[0;32m<command-7522936539118669>[0m in [0;36m<cell line: 11>[0;34m()[0m
[1;32m      9[0m [0;34m[0m[0m
[1;32m     10[0m [0;31m# Get table location[0m[0;34m[0m[0;34m[0m[0m
[0;32m---> 11[0;31m [0mtable_location[0m [0;34m=[0m [0mspark[0m[0;34m.[0m[0msql[0m[0;34m([0m[0;34mf"DESCRIBE DETAIL {TABLE_NAME}"[0m[0;34m)[0m[0;34m.[0m[0mselect[0m[0;34m([0m[0;34m"location"[0m[0;34m)[0m[0;34m.[0m[0mcollect[0m[0;34m([0m[0;34m)[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[1;32m     12[0m [0mprint[0m[0;34m([0m[0;34mf"\nTable location: {table_location}"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[1;32m     13[0m [0mprint[0m[0;34m([0m[0;34mf"Transaction log: {table_location}/_delta_log/"[0m[0;34m)[0m[0;34