## How to See OPTIMIZE Doing Real Work
### Step 1: Write many files artificially

In [0]:
for i in range(20):
    df = spark.createDataFrame([(i, f"name{i}", 10000+i)], ("id", "name", "salary"))
    df.write.format("delta").mode("append").save("/Volumes/workspace/default/tmp/employees_delta_big")


This writes **20 separate small files**

### Step 2: Run OPTIMIZE

In [0]:
%sql
OPTIMIZE delta.`/Volumes/workspace/default/tmp/employees_delta_big`;


path,metrics
dbfs:/Volumes/workspace/default/tmp/employees_delta_big,"List(1, 20, List(1290, 1290, 1290.0, 1, 1290), List(1023, 1029, 1026.4, 20, 20528), 0, null, null, 0, 1, 20, 0, true, 0, 0, 1763563942711, 1763563945767, 8, 1, null, List(0, 0), null, 3, 3, 450, 0, null)"


### Visual Explanation
**Before OPTIMIZE**

In [0]:
%sql
DESCRIBE HISTORY delta.`/Volumes/workspace/default/tmp/employees_delta_big`


version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
20,2025-11-19T14:52:26.000Z,76766366570240,geethasamynathan2011@gmail.com,OPTIMIZE,"Map(predicate -> [], auto -> false, clusterBy -> [], zOrderBy -> [], batchId -> 0)",,List(809526054020352),1119-144400-4j5d6qgh-v2n,19.0,SnapshotIsolation,False,"Map(numRemovedFiles -> 20, numRemovedBytes -> 20528, p25FileSize -> 1290, numDeletionVectorsRemoved -> 0, minFileSize -> 1290, numAddedFiles -> 1, maxFileSize -> 1290, p75FileSize -> 1290, p50FileSize -> 1290, numAddedBytes -> 1290)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
19,2025-11-19T14:51:37.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,18.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
18,2025-11-19T14:51:36.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,17.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
17,2025-11-19T14:51:35.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,16.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
16,2025-11-19T14:51:34.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,15.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
15,2025-11-19T14:51:33.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,14.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
14,2025-11-19T14:51:32.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,13.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
13,2025-11-19T14:51:31.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,12.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
12,2025-11-19T14:51:29.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,11.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13
11,2025-11-19T14:51:28.000Z,76766366570240,geethasamynathan2011@gmail.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(809526054020352),1119-144400-4j5d6qgh-v2n,10.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1029)",,Databricks-Runtime/17.2.x-aarch64-photon-scala2.13


In [0]:
df_v0 = spark.read.format("delta").option("versionAsOf", 0).load("/Volumes/workspace/default/tmp/employees_delta_big")
display(df_v0)

id,name,salary
0,name0,10000
