# Delta Lake in Databricks
### Efficient Storage and Warehousing Capabilities

Delta Lake is an open-source storage layer that brings **reliability** and **performance** to data lakes, combining the best of data lakes and data warehouses. It helps solve common challenges related to:

1. **Data Quality**: Delta Lake provides ACID (Atomicity, Consistency, Isolation, Durability) transactions, ensuring data integrity and reliability.
2. **Efficient Storage**: Delta Lake uses efficient data compression and partitioning techniques to reduce storage costs and improve read/write performance.
3. **Data Versioning**: Delta Lake maintains a version history of your data, making it easy to perform time travel queries or roll back changes.
4. **Schema Enforcement and Evolution**: It provides automatic schema validation and supports schema changes without breaking existing data pipelines.
5. **Real-Time Analytics**: Delta supports both batch and streaming data, enabling near real-time analytics.
6. **Warehouse-Grade Performance**: Built-in optimizations ensure faster query performance, especially for large-scale datasets.

With these features, Delta Lake in Databricks enables scalable, cost-effective, and high-performance data lake storage while offering the governance and performance capabilities of a data warehouse.


####Read Data from an existing table

In [0]:
select * from psl_salesdev.silver.product_cleaned

productID,productName,subCategory,category,source,ingestionTimestamp
TEC-PH-10000486,Plantronics HL10 Handset Lifter,Phones,Technology,Retail CSV,2024-09-09T19:36:52.229Z
TEC-AC-10003832,Logitech P710e Mobile Speakerphone,Accessories,Technology,Retail CSV,2024-09-09T19:36:52.229Z
TEC-AC-10002076,Microsoft Natural Keyboard Elite,Accessories,Technology,Retail CSV,2024-09-09T19:36:52.229Z
TEC-PH-10004667,Cisco 8x8 Inc. 6753i IP Business Phone System,Phones,Technology,Retail CSV,2024-09-09T19:36:52.229Z
OFF-BI-10000545,GBC Ibimaster 500 Manual ProClick Binding System,Binders,Office Supplies,Retail CSV,2024-09-09T19:36:52.229Z
OFF-PA-10003543,Xerox 1985,Paper,Office Supplies,Retail CSV,2024-09-09T19:36:52.229Z
OFF-PA-10000994,Xerox 1915,Paper,Office Supplies,Retail CSV,2024-09-09T19:36:52.229Z
FUR-TA-10003008,"Lesro Round Back Collection Coffee Table, End Table",Tables,Furniture,Retail CSV,2024-09-09T19:36:52.229Z
OFF-PA-10002464,HP Office Recycled Paper (20Lb. and 87 Bright),Paper,Office Supplies,Retail CSV,2024-09-09T19:36:52.229Z
TEC-AC-10001109,Logitech Trackman Marble Mouse,Accessories,Technology,Retail CSV,2024-09-09T19:36:52.229Z


####create external delta table

In [0]:
create or replace table psl_salesdev.silver.product_cleaned_deltademo 
using delta 
options(path 'abfss://sales@storageaccountname.dfs.core.windows.net/silver/product_cleaneddeltademo')
as select * from psl_salesdev.silver.product_cleaned

num_affected_rows,num_inserted_rows


####INSERT

In [0]:
insert into psl_salesdev.silver.product_cleaned_deltademo
values(1,'ProductDemo','subCategory1','category1','source1',current_timestamp())

num_affected_rows,num_inserted_rows
1,1


####Check if Insert was succesful

In [0]:
select * from psl_salesdev.silver.product_cleaned_deltademo
where productID = '1'

productID,productName,subCategory,category,source,ingestionTimestamp
1,ProductDemo,subCategory1,category1,source1,2024-09-12T13:47:25.778Z


In [0]:
select count(*) from psl_salesdev.silver.product_cleaned_deltademo

count(1)
1894


In [0]:
update psl_salesdev.silver.product_cleaned_deltademo
set productName='Product_updated1'
where productID='1'

num_affected_rows
1


In [0]:
select * from psl_salesdev.silver.product_cleaned_deltademo
where productID = '1'

productID,productName,subCategory,category,source,ingestionTimestamp
1,Product_updated1,subCategory1,category1,source1,2024-09-12T13:47:25.778Z


####Describe Transaction history

In [0]:
select * from json.`abfss://sales@storageaccountname.dfs.core.windows.net/silver/product_cleaneddeltademo/_delta_log/00000000000000000001.json`

add,commitInfo
,"Map(txnId -> 0bcf955b-de30-4446-91be-1cd428b833eb, timestamp -> 1726148851349, tags -> Map(noRowsCopied -> true, restoresDeletedRows -> false), job -> Map(jobId -> , runId -> ), operationMetrics -> Map(numFiles -> 1, numOutputBytes -> 1816, numOutputRows -> 1), isolationLevel -> WriteSerializable, operationParameters -> Map(mode -> Append, partitionBy -> [], statsOnLoad -> false), engineInfo -> Databricks-Runtime/15.3.x-photon-scala2.12, operation -> WRITE, isBlindAppend -> true, userId -> 6408908189935837, readVersion -> 0, userName -> vikas.rajpoot96@outlook.com)"
"Map(stats -> {""numRecords"":1,""minValues"":{""productID"":""1"",""productName"":""ProductDemo"",""subCategory"":""subCategory1"",""category"":""category1"",""source"":""source1"",""ingestionTimestamp"":""2024-09-12T13:47:25.778Z""},""maxValues"":{""productID"":""1"",""productName"":""ProductDemo"",""subCategory"":""subCategory1"",""category"":""category1"",""source"":""source1"",""ingestionTimestamp"":""2024-09-12T13:47:25.778Z""},""nullCount"":{""productID"":0,""productName"":0,""subCategory"":0,""category"":0,""source"":0,""ingestionTimestamp"":0},""tightBounds"":true}, path -> part-00000-dd71a7f7-86a0-4137-b9b4-24347d50f2db.c000.snappy.parquet, size -> 1816, tags -> Map(INSERTION_TIME -> 1726148850000000, MAX_INSERTION_TIME -> 1726148850000000, MIN_INSERTION_TIME -> 1726148850000000, OPTIMIZE_TARGET_SIZE -> 268435456), modificationTime -> 1726148850000, dataChange -> true)",


In [0]:
describe history psl_salesdev.silver.product_cleaned_deltademo

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2024-09-12T13:50:25.000Z,6408908189935837,vikas.rajpoot96@outlook.com,UPDATE,"Map(predicate -> [""(productID#2320 = 1)""])","Map(jobName -> null, jobOwnerId -> null, jobId -> , jobRunId -> null, runId -> , triggerType -> null)",,,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1816, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 6472, numDeletionVectorsUpdated -> 0, scanTimeMs -> 2859, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1879, rewriteTimeMs -> 3592)",,Databricks-Runtime/15.3.x-photon-scala2.12
1,2024-09-12T13:47:31.000Z,6408908189935837,vikas.rajpoot96@outlook.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])","Map(jobName -> null, jobOwnerId -> null, jobId -> , jobRunId -> null, runId -> , triggerType -> null)",,,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1816)",,Databricks-Runtime/15.3.x-photon-scala2.12
0,2024-09-12T13:46:19.000Z,6408908189935837,vikas.rajpoot96@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> false, properties -> {""delta.checkpoint.writeStatsAsStruct"":""true"",""delta.checkpoint.writeStatsAsJson"":""false"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> false)","Map(jobName -> null, jobOwnerId -> null, jobId -> , jobRunId -> null, runId -> , triggerType -> null)",,,,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 1893, numOutputBytes -> 63454)",,Databricks-Runtime/15.3.x-photon-scala2.12


####Let us delete records

In [0]:
delete from psl_salesdev.silver.product_cleaned_deltademo
where productID = '1'

num_affected_rows
1


In [0]:
select * from psl_salesdev.silver.product_cleaned_deltademo
where productID = '1'

productID,productName,subCategory,category,source,ingestionTimestamp


###Lets just check the history log again

In [0]:
describe history psl_salesdev.silver.product_cleaned_deltademo

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
3,2024-09-12T14:07:54.000Z,6408908189935837,vikas.rajpoot96@outlook.com,DELETE,"Map(predicate -> [""(productID#1079 = 1)""])","Map(jobName -> null, jobOwnerId -> null, jobId -> , jobRunId -> null, runId -> , triggerType -> null)",,,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1879, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 7073, numDeletionVectorsUpdated -> 0, numDeletedRows -> 1, scanTimeMs -> 6844, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 215)",,Databricks-Runtime/15.3.x-photon-scala2.12
2,2024-09-12T13:50:25.000Z,6408908189935837,vikas.rajpoot96@outlook.com,UPDATE,"Map(predicate -> [""(productID#2320 = 1)""])","Map(jobName -> null, jobOwnerId -> null, jobId -> , jobRunId -> null, runId -> , triggerType -> null)",,,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1816, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 6472, numDeletionVectorsUpdated -> 0, scanTimeMs -> 2859, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1879, rewriteTimeMs -> 3592)",,Databricks-Runtime/15.3.x-photon-scala2.12
1,2024-09-12T13:47:31.000Z,6408908189935837,vikas.rajpoot96@outlook.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])","Map(jobName -> null, jobOwnerId -> null, jobId -> , jobRunId -> null, runId -> , triggerType -> null)",,,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 1816)",,Databricks-Runtime/15.3.x-photon-scala2.12
0,2024-09-12T13:46:19.000Z,6408908189935837,vikas.rajpoot96@outlook.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], clusterBy -> [], description -> null, isManaged -> false, properties -> {""delta.checkpoint.writeStatsAsStruct"":""true"",""delta.checkpoint.writeStatsAsJson"":""false"",""delta.enableDeletionVectors"":""true""}, statsOnLoad -> false)","Map(jobName -> null, jobOwnerId -> null, jobId -> , jobRunId -> null, runId -> , triggerType -> null)",,,,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 1893, numOutputBytes -> 63454)",,Databricks-Runtime/15.3.x-photon-scala2.12


####Let us do the time travel

In [0]:
select * from psl_salesdev.silver.product_cleaned_deltademo
where productID = '1'

productID,productName,subCategory,category,source,ingestionTimestamp


In [0]:
select * from psl_salesdev.silver.product_cleaned_deltademo version as of 3
where productID='1'

productID,productName,subCategory,category,source,ingestionTimestamp


###time travel with specific timestamp

In [0]:
select * from psl_salesdev.silver.product_cleaned_deltademo timestamp as of '2024-09-12'
where productID='1'

<b>Do you want to remove time travel feature?<br>


####Vacuum command

In [0]:
vacuum psl_salesdev.silver.product_cleaned_deltademo