# Data Engineering / Platform Review

This notebook is to show the following Delta Lake features:
* micropartitioning, 
* compaction, 
* clustering, 
* time travel,  
* ACID compliance, 
* history/retention + VACUUM, and 
* quick transaction-log peeking.

For more info, check out the Comprehensive Guide to Optimize Databricks, Spark and Delta Lake Workloads ([link](https://www.databricks.com/discover/pages/optimize-data-workloads-guide)).

In [0]:
%run ./Setup



DataFrame[]

## Basic profiling query

In [0]:
USE CATALOG main_jcg;
USE SCHEMA default;

root
 |-- loan_id: string (nullable = false)
 |-- origination_date: date (nullable = false)
 |-- first_payment_date: date (nullable = false)
 |-- maturity_date: date (nullable = false)
 |-- origination_unpaid_principal: double (nullable = false)
 |-- current_unpaid_principal: double (nullable = false)
 |-- interest_rate: double (nullable = false)
 |-- borrower_credit_score: integer (nullable = true)
 |-- debt_to_income_ratio: double (nullable = true)
 |-- loan_to_value_ratio: double (nullable = true)
 |-- occupancy_type: string (nullable = true)
 |-- channel: string (nullable = true)
 |-- property_state: string (nullable = true)
 |-- property_type: string (nullable = true)
 |-- number_of_units: integer (nullable = true)
 |-- msa: string (nullable = true)
 |-- loan_purpose: string (nullable = true)
 |-- product_type: string (nullable = true)
 |-- delinquency_status: string (nullable = true)
 |-- status_as_of: date (nullable = false)

+----------+----------------+------------------+-----

## Create the Delta table (clustering-ready)

In [0]:
SELECT property_state, delinquency_status, COUNT(*) AS loans, AVG(interest_rate) AS avg_rate
FROM main_jcg.default.mortgage_loans_delta
GROUP BY property_state, delinquency_status
ORDER BY loans DESC
LIMIT 10;


property_state,delinquency_status,loans,avg_rate
CA,Current,19655,4.487714881709486
IL,Current,19496,4.503735791957329
NY,Current,19431,4.496383407956373
NC,Current,19293,4.49711092105946
CO,Current,19258,4.509145757607249
TX,Current,19251,4.509667445846989
WA,Current,19224,4.486591396171445
FL,Current,19192,4.485579981242165
GA,Current,19143,4.4935556600324045
VA,Current,19095,4.501645561665367


DataFrame[]

## Generate multiple versions for time travel

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

## Show history for time travel demo

In [0]:
DESCRIBE HISTORY main_jcg.default.mortgage_loans_delta;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
12,2026-01-05T18:27:06.000Z,3090522805942289,joy.garnett@databricks.com,VACUUM END,Map(status -> COMPLETED),,List(3593553633486293),0105-154401-tlgrgv8e-v2n,11.0,SnapshotIsolation,True,"Map(numDeletedFiles -> 1, numVacuumedDirectories -> 2)",,Databricks-Runtime/17.3.x-photon-scala2.13
11,2026-01-05T18:27:05.000Z,3090522805942289,joy.garnett@databricks.com,VACUUM START,"Map(retentionCheckEnabled -> true, defaultRetentionMillis -> 604800000)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,10.0,SnapshotIsolation,True,"Map(numFilesToDelete -> 1, sizeOfDataToDelete -> 0)",,Databricks-Runtime/17.3.x-photon-scala2.13
10,2026-01-05T18:26:59.000Z,3090522805942289,joy.garnett@databricks.com,SET TBLPROPERTIES,"Map(properties -> {""delta.deletedFileRetentionDuration"":""7 days"",""delta.logRetentionDuration"":""30 days""})",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,9.0,WriteSerializable,True,Map(),,Databricks-Runtime/17.3.x-photon-scala2.13
9,2026-01-05T18:23:16.000Z,3090522805942289,joy.garnett@databricks.com,OPTIMIZE,"Map(predicate -> [], auto -> false, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,8.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 2, numRemovedBytes -> 4980652, p25FileSize -> 4943100, numDeletionVectorsRemoved -> 1, minFileSize -> 4943100, numAddedFiles -> 1, maxFileSize -> 4943100, p75FileSize -> 4943100, p50FileSize -> 4943100, numAddedBytes -> 4943100)",,Databricks-Runtime/17.3.x-photon-scala2.13
8,2026-01-05T18:23:13.000Z,3090522805942289,joy.garnett@databricks.com,OPTIMIZE,"Map(predicate -> [], auto -> false, clusterBy -> [""property_state"",""delinquency_status""], isFull -> false, zOrderBy -> [], batchId -> -1)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,7.0,SnapshotIsolation,True,Map(),,Databricks-Runtime/17.3.x-photon-scala2.13
7,2026-01-05T18:23:10.000Z,3090522805942289,joy.garnett@databricks.com,DELETE,"Map(predicate -> [""(delinquency_status#124062 = REO)""])",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,6.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 1, numAddedChangeFiles -> 0, executionTimeMs -> 742, numDeletionVectorsUpdated -> 1, numDeletedRows -> 1881, scanTimeMs -> 524, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 218)",,Databricks-Runtime/17.3.x-photon-scala2.13
6,2026-01-05T18:23:08.000Z,3090522805942289,joy.garnett@databricks.com,UPDATE,"Map(predicate -> [""((property_state#122668 = CA) AND (delinquency_status#122674 = 30))""])",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,5.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 1, executionTimeMs -> 1750, numDeletionVectorsUpdated -> 0, scanTimeMs -> 599, numAddedFiles -> 1, numUpdatedRows -> 319, numAddedBytes -> 19203, rewriteTimeMs -> 1150)",,Databricks-Runtime/17.3.x-photon-scala2.13
5,2026-01-05T18:22:40.000Z,3090522805942289,joy.garnett@databricks.com,MERGE,"Map(predicate -> [""(loan_id#120966 = loan_id#119368)""], clusterBy -> [""property_state"",""delinquency_status""], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> false, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [], clusteringOnWriteStatus -> null)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,4.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 4961449, numTargetBytesRemoved -> 4961486, numTargetDeletionVectorsAdded -> 0, numTargetRowsMatchedUpdated -> 200000, executionTimeMs -> 6271, materializeSourceTimeMs -> 2581, numTargetRowsInserted -> 0, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 1079, numTargetRowsUpdated -> 200000, numOutputRows -> 200000, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 1, numSourceRows -> 200000, numTargetFilesRemoved -> 1, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 2561)",,Databricks-Runtime/17.3.x-photon-scala2.13
4,2026-01-05T18:22:33.000Z,3090522805942289,joy.garnett@databricks.com,MERGE,"Map(predicate -> [""(loan_id#120107 = loan_id#119368)""], clusterBy -> [""property_state"",""delinquency_status""], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> false, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [], clusteringOnWriteStatus -> null)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,3.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 4961486, numTargetBytesRemoved -> 4961004, numTargetDeletionVectorsAdded -> 0, numTargetRowsMatchedUpdated -> 200000, executionTimeMs -> 5697, materializeSourceTimeMs -> 2209, numTargetRowsInserted -> 0, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 1118, numTargetRowsUpdated -> 200000, numOutputRows -> 200000, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 1, numSourceRows -> 200000, numTargetFilesRemoved -> 8, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 2317)",,Databricks-Runtime/17.3.x-photon-scala2.13
3,2026-01-05T18:22:26.000Z,3090522805942289,joy.garnett@databricks.com,CLUSTER BY,"Map(oldClusteringColumns -> , newClusteringColumns -> property_state,delinquency_status)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,2.0,WriteSerializable,True,Map(),,Databricks-Runtime/17.3.x-photon-scala2.13


## Time travel: view table as of an earlier version (e.g., version 0 or 1)

In [0]:
SELECT COUNT(*) AS loans_v1
FROM main_jcg.default.mortgage_loans_delta VERSION AS OF 1;

loans_v1
200000


In [0]:
SELECT property_state, delinquency_status, COUNT(*) AS loans_v3
FROM main_jcg.default.mortgage_loans_delta VERSION AS OF 3
GROUP BY property_state, delinquency_status
LIMIT 15;

property_state,delinquency_status,loans_v3
IL,Foreclosed,135
NY,Current,17906
IL,REO,111
NC,90,109
IL,90,140
GA,REO,131
IL,60,468
CA,30,1384
NC,Current,17708
TX,Foreclosed,130


## Demonstrate ACID: run UPDATE / DELETE and then show consistent reads.

In [0]:
UPDATE main_jcg.default.mortgage_loans_delta
SET delinquency_status = 'Current'
WHERE property_state = 'CA' AND delinquency_status = '30';

DELETE FROM main_jcg.default.mortgage_loans_delta
WHERE delinquency_status = 'REO';

num_affected_rows
1881


## Compaction and clustering optimization (OPTIMIZE rewrites many small files into fewer large ones)

In [0]:
-- Compaction and clustering optimization (OPTIMIZE rewrites many small files into fewer large ones).
OPTIMIZE main_jcg.default.mortgage_loans_delta;

-- Or full reclustering if you change keys later:
-- OPTIMIZE ${catalog}.${schema}.${table_name} FULL;

path,metrics
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 0, null, null, 0, 0, 2, 0, false, 0, 0, 1767637390551, 1767637392974, 8, 0, null, List(0, 0), null, 20, 20, 0, 0, List(4980652, true, false, false, null, null, null, null, 0, 0, 0, 0, 2, 4980652, 4926076, null, log, 16777216, 67108864, 4, 0, 0, null, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, List(71, 42, 0, 36, 0, 1265), 2, 1, 5, sizeAware))"
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7,"List(1, 2, List(4943100, 4943100, 4943100.0, 1, 4943100), List(19203, 4961449, 2490326.0, 2, 4980652), 0, null, null, 0, 1, 2, 0, true, 0, 0, 1767637393009, 1767637395718, 8, 1, null, List(1, 2200), null, 20, 20, 565, 0, List(4980652, false, false, false, null, null, null, post-optimize-compaction, 0, 0, 0, 0, 2, 4980652, 4926076, null, null, 33554432, 67108864, 0, 0, 0, List(0, 0), 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 4980652, 0, 4926076, 0, 0, 0, 0, 0, 4980652, 4926076, List(0, 0, 549, 0, 0, 0), 15, 1, 1, null))"


## Query to highlight clustering benefits:

Filter and group on clustering keys to show efficient access.

In [0]:
SELECT property_state, delinquency_status, COUNT(*) AS loans
FROM main_jcg.default.mortgage_loans_delta
WHERE property_state IN ('CA', 'TX', 'NY')
GROUP BY property_state, delinquency_status;

property_state,delinquency_status,loans
NY,Current,19431
CA,Current,19655
TX,Current,19251
NY,30,374
TX,90,97
TX,Foreclosed,110
TX,30,344
CA,90,70
CA,Foreclosed,109
NY,Foreclosed,104


## History retention and VACUUM

In [0]:
-- Check current table history retention behavior
DESCRIBE HISTORY main_jcg.default.mortgage_loans_delta;


version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
9,2026-01-05T18:23:16.000Z,3090522805942289,joy.garnett@databricks.com,OPTIMIZE,"Map(predicate -> [], auto -> false, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,8.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 2, numRemovedBytes -> 4980652, p25FileSize -> 4943100, numDeletionVectorsRemoved -> 1, minFileSize -> 4943100, numAddedFiles -> 1, maxFileSize -> 4943100, p75FileSize -> 4943100, p50FileSize -> 4943100, numAddedBytes -> 4943100)",,Databricks-Runtime/17.3.x-photon-scala2.13
8,2026-01-05T18:23:13.000Z,3090522805942289,joy.garnett@databricks.com,OPTIMIZE,"Map(predicate -> [], auto -> false, clusterBy -> [""property_state"",""delinquency_status""], isFull -> false, zOrderBy -> [], batchId -> -1)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,7.0,SnapshotIsolation,True,Map(),,Databricks-Runtime/17.3.x-photon-scala2.13
7,2026-01-05T18:23:10.000Z,3090522805942289,joy.garnett@databricks.com,DELETE,"Map(predicate -> [""(delinquency_status#124062 = REO)""])",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,6.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 1, numAddedChangeFiles -> 0, executionTimeMs -> 742, numDeletionVectorsUpdated -> 1, numDeletedRows -> 1881, scanTimeMs -> 524, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 218)",,Databricks-Runtime/17.3.x-photon-scala2.13
6,2026-01-05T18:23:08.000Z,3090522805942289,joy.garnett@databricks.com,UPDATE,"Map(predicate -> [""((property_state#122668 = CA) AND (delinquency_status#122674 = 30))""])",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,5.0,WriteSerializable,False,"Map(numRemovedFiles -> 0, numRemovedBytes -> 0, numCopiedRows -> 0, numDeletionVectorsAdded -> 1, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 1, executionTimeMs -> 1750, numDeletionVectorsUpdated -> 0, scanTimeMs -> 599, numAddedFiles -> 1, numUpdatedRows -> 319, numAddedBytes -> 19203, rewriteTimeMs -> 1150)",,Databricks-Runtime/17.3.x-photon-scala2.13
5,2026-01-05T18:22:40.000Z,3090522805942289,joy.garnett@databricks.com,MERGE,"Map(predicate -> [""(loan_id#120966 = loan_id#119368)""], clusterBy -> [""property_state"",""delinquency_status""], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> false, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [], clusteringOnWriteStatus -> null)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,4.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 4961449, numTargetBytesRemoved -> 4961486, numTargetDeletionVectorsAdded -> 0, numTargetRowsMatchedUpdated -> 200000, executionTimeMs -> 6271, materializeSourceTimeMs -> 2581, numTargetRowsInserted -> 0, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 1079, numTargetRowsUpdated -> 200000, numOutputRows -> 200000, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 1, numSourceRows -> 200000, numTargetFilesRemoved -> 1, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 2561)",,Databricks-Runtime/17.3.x-photon-scala2.13
4,2026-01-05T18:22:33.000Z,3090522805942289,joy.garnett@databricks.com,MERGE,"Map(predicate -> [""(loan_id#120107 = loan_id#119368)""], clusterBy -> [""property_state"",""delinquency_status""], matchedPredicates -> [{""actionType"":""update""}], statsOnLoad -> false, notMatchedBySourcePredicates -> [], notMatchedPredicates -> [], clusteringOnWriteStatus -> null)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,3.0,WriteSerializable,False,"Map(numTargetRowsCopied -> 0, numTargetRowsDeleted -> 0, numTargetFilesAdded -> 1, numTargetBytesAdded -> 4961486, numTargetBytesRemoved -> 4961004, numTargetDeletionVectorsAdded -> 0, numTargetRowsMatchedUpdated -> 200000, executionTimeMs -> 5697, materializeSourceTimeMs -> 2209, numTargetRowsInserted -> 0, numTargetRowsMatchedDeleted -> 0, numTargetDeletionVectorsUpdated -> 0, scanTimeMs -> 1118, numTargetRowsUpdated -> 200000, numOutputRows -> 200000, numTargetDeletionVectorsRemoved -> 0, numTargetRowsNotMatchedBySourceUpdated -> 0, numTargetChangeFilesAdded -> 1, numSourceRows -> 200000, numTargetFilesRemoved -> 8, numTargetRowsNotMatchedBySourceDeleted -> 0, rewriteTimeMs -> 2317)",,Databricks-Runtime/17.3.x-photon-scala2.13
3,2026-01-05T18:22:26.000Z,3090522805942289,joy.garnett@databricks.com,CLUSTER BY,"Map(oldClusteringColumns -> , newClusteringColumns -> property_state,delinquency_status)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,2.0,WriteSerializable,True,Map(),,Databricks-Runtime/17.3.x-photon-scala2.13
2,2026-01-05T18:22:25.000Z,3090522805942289,joy.garnett@databricks.com,ROW TRACKING BACKFILL,Map(batchId -> 0),,List(3593553633486293),0105-154401-tlgrgv8e-v2n,1.0,SnapshotIsolation,False,Map(),,Databricks-Runtime/17.3.x-photon-scala2.13
1,2026-01-05T18:22:24.000Z,3090522805942289,joy.garnett@databricks.com,UPGRADE PROTOCOL,"Map(newProtocol -> {""minReaderVersion"":3,""minWriterVersion"":7,""readerFeatures"":[""deletionVectors""],""writerFeatures"":[""deletionVectors"",""domainMetadata"",""rowTracking"",""invariants"",""changeDataFeed"",""appendOnly""]})",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,0.0,WriteSerializable,True,Map(),,Databricks-Runtime/17.3.x-photon-scala2.13
0,2026-01-05T18:22:23.000Z,3090522805942289,joy.garnett@databricks.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> true, properties -> {""delta.enableChangeDataFeed"":""true"",""delta.parquet.compression.codec"":""zstd"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> true)",,List(3593553633486293),0105-154401-tlgrgv8e-v2n,,WriteSerializable,False,"Map(numFiles -> 8, numRemovedFiles -> 0, numRemovedBytes -> 0, numDeletionVectorsRemoved -> 0, numOutputRows -> 200000, numOutputBytes -> 4961004)",,Databricks-Runtime/17.3.x-photon-scala2.13


In [0]:
-- Show current retention-related table properties (if any)
DESCRIBE EXTENDED main_jcg.default.mortgage_loans_delta;


col_name,data_type,comment
loan_id,string,
origination_date,date,
first_payment_date,date,
maturity_date,date,
origination_unpaid_principal,double,
current_unpaid_principal,double,
interest_rate,double,
borrower_credit_score,int,
debt_to_income_ratio,double,
loan_to_value_ratio,double,


In [0]:
-- (Optional) Set explicit retention policies for this demo table.
-- deletedFileRetentionDuration controls how long data files are kept for time travel.
-- logRetentionDuration controls how long transaction log history is kept. 

ALTER TABLE main_jcg.default.mortgage_loans_delta
SET TBLPROPERTIES (
  delta.deletedFileRetentionDuration = '7 days',
  delta.logRetentionDuration = '30 days'
);


In [0]:
-- VACUUM removes data files older than the specified retention period.
-- After this, you cannot time travel to versions that depend on removed files.

-- For demo purposes, keep the default 7 days:
VACUUM main_jcg.default.mortgage_loans_delta;

-- Or be explicit:
-- VACUUM main_jcg.default.mortgage_loans_delta RETAIN 168 HOURS;


path
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7


## Quick Transaction Log Peek

These cells tie ACID and time travel back to the _delta_log folder and log metadata.

In [0]:
-- See detailed table metadata, including the storage location and file stats. 
DESCRIBE DETAIL main_jcg.default.mortgage_loans_delta;


format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,a397ba51-02f6-40a3-b851-ee44597ffee1,main_jcg.default.mortgage_loans_delta,,s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7,2026-01-05T18:22:20.015Z,2026-01-05T18:27:06.000Z,List(),"List(property_state, delinquency_status)",1,4943100,"Map(delta.parquet.compression.codec -> zstd, delta.enableChangeDataFeed -> true, delta.deletedFileRetentionDuration -> 7 days, delta.enableDeletionVectors -> true, delta.logRetentionDuration -> 30 days, delta.enableRowTracking -> true, delta.checkpointPolicy -> v2, delta.rowTracking.materializedRowCommitVersionColumnName -> _row-commit-version-col-c566b37a-f0cd-4978-bac7-211ac67b839b, delta.rowTracking.materializedRowIdColumnName -> _row-id-col-5795c27d-bd49-4c2f-9416-d5fb41077097)",3,7,"List(appendOnly, changeDataFeed, clustering, deletionVectors, domainMetadata, invariants, rowTracking, v2Checkpoint)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


In [0]:
%python
# Replace this with the actual 'location' value from DESCRIBE DETAIL

# The .json file is one committed transaction versino of the Delta table that lists all of the actions for that commit. For instance, that's files removed/added, metadata changes, protocol changes, commit info, etc. It's now Delta reconstructs the table state for ACID, time travel, and schema enforcement. 

# The .crc file is a checksum/validation file for the json log file that makes suer that the json file isn't corrupted and make sure that it's not partially written during an interrupted write. 

table_path = "s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7"

display(dbutils.fs.ls(f"{table_path}/_delta_log"))


path,name,size,modificationTime
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000000.crc,00000000000000000000.crc,20302,1767637343000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000000.json,00000000000000000000.json,19565,1767637343000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000001.crc,00000000000000000001.crc,20333,1767637344000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000001.json,00000000000000000001.json,886,1767637344000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000002.crc,00000000000000000002.crc,20834,1767637345000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000002.json,00000000000000000002.json,17468,1767637345000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000003.crc,00000000000000000003.crc,21333,1767637346000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000003.json,00000000000000000003.json,3284,1767637346000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000004.crc,00000000000000000004.crc,6656,1767637353000
s3://databricks-e2demofieldengwest/b169b504-4c54-49f2-bc3a-adf4b128f36d/tables/9f6f4e4a-1e1b-47ec-9cf0-30a75b8b3dc7/_delta_log/00000000000000000004.json,00000000000000000004.json,7491,1767637353000


If your cluster has permission to read Delta log files in the S3 bucket, you can peek at the latest JSON commit (transaction log entry) show atomic actions.

In the demo environment, we don't have access to do so but you can on your environment.