In [0]:
csv_data = """id,name,category,price
1,Amit,Electronics,50000
2,Priya,Furniture,3000
3,Rahul,Stationery,200
4,Neha,Books,800
5,Karthik,Electronics,45000
"""

# Save the inline CSV string to a local file
dbutils.fs.put("dbfs:/tmp/products.csv", csv_data)

Wrote 139 bytes.


True

In [0]:
df=spark.read.options(header='true').csv("dbfs:/tmp/products.csv")
df.show()


+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
+---+-------+-----------+-----+



In [0]:
df.write.format("delta").mode("overwrite").save("/temp/delta/products_table")
delta_df=spark.read.format("delta").load("/temp/delta/products_table")
delta_df.show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
+---+-------+-----------+-----+



In [0]:
from delta.tables import DeltaTable
delta_table=DeltaTable.forPath(spark,path="/temp/delta/products_table")
delta_table.update(condition="id=2",set={"price":"3500"})
delta_table.toDF().show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
|  2|  Priya|  Furniture| 3500|
+---+-------+-----------+-----+



In [0]:
new_data = [
    (2, "Priya", "Furniture", 4000),     # Update
    (6, "Sneha", "Kitchen", 1200)        # Insert
]
 
update_df=spark.createDataFrame(new_data,["id","name","category","price"])
update_df.show()
delta_table.alias("target").merge(update_df.alias("source"),"target.id=source.id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
delta_table.toDF().show()

+---+-----+---------+-----+
| id| name| category|price|
+---+-----+---------+-----+
|  2|Priya|Furniture| 4000|
|  6|Sneha|  Kitchen| 1200|
+---+-----+---------+-----+

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
|  6|  Sneha|    Kitchen| 1200|
|  2|  Priya|  Furniture| 4000|
+---+-------+-----------+-----+



Time traveling versions using delta table

In [0]:
delta_table.toDF().show()   
previous_df=spark.read.format("delta").option("versionAsOf",0).load("/temp/delta/products_table")
previous_df.show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
|  2|  Priya|  Furniture| 4000|
|  6|  Sneha|    Kitchen| 1200|
+---+-------+-----------+-----+

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
|  5|Karthik|Electronics|45000|
+---+-------+-----------+-----+



saving the files in partition

In [0]:
df.write.format("delta").mode("overwrite").partitionBy("category").save("/temp/delta/products_table_partitioned")


In [0]:
ff=spark.read.format("delta").load("/temp/delta/products_table_partitioned")
ff.show()

+---+-------+-----------+-----+
| id|   name|   category|price|
+---+-------+-----------+-----+
|  5|Karthik|Electronics|45000|
|  1|   Amit|Electronics|50000|
|  2|  Priya|  Furniture| 3000|
|  3|  Rahul| Stationery|  200|
|  4|   Neha|      Books|  800|
+---+-------+-----------+-----+

