## Make Data

In [0]:
data = [
    (101, "Alice", None),
    (102, "Bob", None),
    (103, "Charlie", 101),
    (104, "David", 102),
    (105, "Eve", 101)
]
df = spark.createDataFrame(data, schema=['EmpID','EmpName','MgrID'])
df.show()

+-----+-------+-----+
|EmpID|EmpName|MgrID|
+-----+-------+-----+
|  101|  Alice| null|
|  102|    Bob| null|
|  103|Charlie|  101|
|  104|  David|  102|
|  105|    Eve|  101|
+-----+-------+-----+



In [0]:
df.write.saveAsTable('EmpData')

## table instance

In [0]:
from delta.tables import *

In [0]:
tbl_inst = DeltaTable.forName(spark, 'EmpData')
tbl_inst.toDF().show()

+-----+-------+-----+
|EmpID|EmpName|MgrID|
+-----+-------+-----+
|  103|Charlie|  101|
|  104|  David|  102|
|  105|    Eve|  101|
|  101|  Alice| null|
|  102|    Bob| null|
+-----+-------+-----+



## Update DF(instance) and check results in table and DF
--> Note:- Update was applied on the instance, not on dataframe

In [0]:
tbl_inst.update(
    condition = "EmpName = 'Eve' ",
    set = {"MgrID": "103"}
)

tbl_inst.toDF().where("EmpName = 'Eve'").show()   ## delta table
print()
spark.sql("select * from empData where EmpName = 'Eve'").show()  ## table instance

+-----+-------+-----+
|EmpID|EmpName|MgrID|
+-----+-------+-----+
|  105|    Eve|  103|
+-----+-------+-----+


+-----+-------+-----+
|EmpID|EmpName|MgrID|
+-----+-------+-----+
|  105|    Eve|  103|
+-----+-------+-----+



## update table and check results in DF(instance) and table

In [0]:
spark.sql("update empData set EmpID=110 where EmpName='Eve' ")

tbl_inst.toDF().where("EmpName = 'Eve'").show()   ## delta table
print()
spark.sql("select * from empData where EmpName = 'Eve'").show()  ## table instance

+-----+-------+-----+
|EmpID|EmpName|MgrID|
+-----+-------+-----+
|  110|    Eve|  103|
+-----+-------+-----+


+-----+-------+-----+
|EmpID|EmpName|MgrID|
+-----+-------+-----+
|  110|    Eve|  103|
+-----+-------+-----+



## Check the history

In [0]:
%sql
describe history empData

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
5,2025-04-09T07:23:43.000+0000,2187969817410935,jithinvyas2001@gmail.com,UPDATE,"Map(predicate -> [""(EmpName#6100 = Eve)""])",,List(2959953206238096),0409-055955-xy4nwb9w,4.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1097, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 1676, scanTimeMs -> 804, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1097, rewriteTimeMs -> 871)",,Databricks-Runtime/12.2.x-scala2.12
4,2025-04-09T07:15:02.000+0000,2187969817410935,jithinvyas2001@gmail.com,UPDATE,"Map(predicate -> [""(EmpName#3413 = Eve)""])",,List(2959953206238096),0409-055955-xy4nwb9w,3.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1097, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 1450, scanTimeMs -> 583, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1097, rewriteTimeMs -> 867)",,Databricks-Runtime/12.2.x-scala2.12
3,2025-04-09T07:13:14.000+0000,2187969817410935,jithinvyas2001@gmail.com,UPDATE,"Map(predicate -> [""(EmpName#3413 = Eve)""])",,List(2959953206238096),0409-055955-xy4nwb9w,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1097, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 2340, scanTimeMs -> 1478, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1097, rewriteTimeMs -> 862)",,Databricks-Runtime/12.2.x-scala2.12
2,2025-04-09T07:11:45.000+0000,2187969817410935,jithinvyas2001@gmail.com,UPDATE,"Map(predicate -> [""(EmpName#3413 = Eve)""])",,List(2959953206238096),0409-055955-xy4nwb9w,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1097, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 2364, scanTimeMs -> 1440, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1097, rewriteTimeMs -> 924)",,Databricks-Runtime/12.2.x-scala2.12
1,2025-04-09T07:11:28.000+0000,2187969817410935,jithinvyas2001@gmail.com,UPDATE,"Map(predicate -> [""(EmpName#3413 = Eve)""])",,List(2959953206238096),0409-055955-xy4nwb9w,0.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1097, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 3307, scanTimeMs -> 1884, numAddedFiles -> 1, numUpdatedRows -> 1, numAddedBytes -> 1097, rewriteTimeMs -> 1349)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-04-09T07:03:43.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE TABLE AS SELECT,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(2959953206238096),0409-055955-xy4nwb9w,,WriteSerializable,True,"Map(numFiles -> 5, numOutputRows -> 5, numOutputBytes -> 5413)",,Databricks-Runtime/12.2.x-scala2.12


### Know about _sqldf
---> A Temporary dataframe object

In [0]:
temp_var = _sqldf.toPandas()
temp_var[['version','operation','timestamp']]

Unnamed: 0,version,operation,timestamp
0,5,UPDATE,2025-04-09 07:23:43
1,4,UPDATE,2025-04-09 07:15:02
2,3,UPDATE,2025-04-09 07:13:14
3,2,UPDATE,2025-04-09 07:11:45
4,1,UPDATE,2025-04-09 07:11:28
5,0,CREATE TABLE AS SELECT,2025-04-09 07:03:43
