https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/83657679869138/441500159838114/2187969817410935/latest.html

In [0]:
from delta.tables import *

Do this, as table is registered in the metastore and has files in DBFS,

In [0]:
## This permanently deletes the table's underlying data 
## but does not remove the table metadata from the metastore.
dbutils.fs.rm("dbfs:/FileStore/tables/CreateTable",True)

spark.sql('drop table if exists deltatable_sample1')      # drops table

Out[2]: DataFrame[]

## CREATE

In [0]:
DeltaTable.create(spark).tableName("default.deltatable_sample1") \
    .addColumn("sid", "INT") \
    .addColumn("Name", "string") \
    .property("description", 'for sample demo only') \
    .location("dbfs:/FileStore/tables/CreateTable") \
    .execute()
    # "default" is the default schema name

spark.sql("select * from default.deltatable_sample1").display()
spark.sql("describe history default.deltatable_sample1").display() # version as of 0

sid,Name


version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,2025-03-06T07:16:53.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE TABLE,"Map(isManaged -> false, description -> null, partitionBy -> [], properties -> {""description"":""for sample demo only""})",,List(441500159838114),0306-060324-uukowot1,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


## CREATE OR REPLACE

In [0]:
DeltaTable.createOrReplace(spark).tableName("deltatable_sample1") \
    .addColumn("sid", "INT") \
    .addColumn("Name", "string") \
    .addColumn("Contact_No", "BIGINT") \
    .execute()

spark.sql("SELECT * FROM default.deltatable_sample1").display()
spark.sql("DESCRIBE HISTORY default.deltatable_sample1").display()  # version as of 1 (extra column)

sid,Name,Contact_No


version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2025-03-06T07:17:35.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE OR REPLACE TABLE,"Map(isManaged -> false, description -> null, partitionBy -> [], properties -> {})",,List(441500159838114),0306-060324-uukowot1,0.0,WriteSerializable,False,Map(),,Databricks-Runtime/12.2.x-scala2.12
0,2025-03-06T07:16:53.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE TABLE,"Map(isManaged -> false, description -> null, partitionBy -> [], properties -> {""description"":""for sample demo only""})",,List(441500159838114),0306-060324-uukowot1,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


## CERATE TABLE IF NOT EXISTS

In [0]:
DeltaTable.createIfNotExists(spark).tableName("deltatable_sample1") \
    .addColumn("sid", "INT") \
    .addColumn("Name", "string") \
    .addColumn("Contact_No", "BIGINT") \
    .execute()

spark.sql("SELECT * FROM default.deltatable_sample1").display()
spark.sql("DESCRIBE HISTORY default.deltatable_sample1").display()  # version as of 1 **(Not 2, since table exist)**

sid,Name,Contact_No


version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2025-03-06T07:17:35.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE OR REPLACE TABLE,"Map(isManaged -> false, description -> null, partitionBy -> [], properties -> {})",,List(441500159838114),0306-060324-uukowot1,0.0,WriteSerializable,False,Map(),,Databricks-Runtime/12.2.x-scala2.12
0,2025-03-06T07:16:53.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE TABLE,"Map(isManaged -> false, description -> null, partitionBy -> [], properties -> {""description"":""for sample demo only""})",,List(441500159838114),0306-060324-uukowot1,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


## DATA TYPES

In [0]:
spark.sql("desc table deltatable_sample1").show()  # MySQL Syntax
spark.sql("select * from deltatable_sample1").printSchema()

+----------+---------+-------+
|  col_name|data_type|comment|
+----------+---------+-------+
|       sid|      int|   null|
|      Name|   string|   null|
|Contact_No|   bigint|   null|
+----------+---------+-------+

root
 |-- sid: integer (nullable = true)
 |-- Name: string (nullable = true)
 |-- Contact_No: long (nullable = true)



## INSERT DATA
This is pyspark way of inserting data - using df.write.**insertInto()**

In [0]:
data = [(101, "AAA", 9876543210), (102, "BBB", 8765432109)]
columns = ["sid", "Name", "Contact_No"]
new_df = spark.createDataFrame(data, columns)

new_df.write.insertInto("deltatable_sample1",overwrite=False)   # version as of 2

In [0]:
spark.sql("SELECT * FROM default.deltatable_sample1").display()
spark.sql("DESCRIBE HISTORY default.deltatable_sample1").display()

sid,Name,Contact_No
101,AAA,9876543210
102,BBB,8765432109


version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2025-03-06T07:20:31.000+0000,2187969817410935,jithinvyas2001@gmail.com,WRITE,"Map(mode -> Append, partitionBy -> [])",,List(441500159838114),0306-060324-uukowot1,1.0,WriteSerializable,True,"Map(numFiles -> 2, numOutputRows -> 2, numOutputBytes -> 2143)",,Databricks-Runtime/12.2.x-scala2.12
1,2025-03-06T07:17:35.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE OR REPLACE TABLE,"Map(isManaged -> false, description -> null, partitionBy -> [], properties -> {})",,List(441500159838114),0306-060324-uukowot1,0.0,WriteSerializable,False,Map(),,Databricks-Runtime/12.2.x-scala2.12
0,2025-03-06T07:16:53.000+0000,2187969817410935,jithinvyas2001@gmail.com,CREATE TABLE,"Map(isManaged -> false, description -> null, partitionBy -> [], properties -> {""description"":""for sample demo only""})",,List(441500159838114),0306-060324-uukowot1,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


## METADATA

In [0]:
spark.sql("DESCRIBE DETAIL deltatable_sample1").toPandas().T

Unnamed: 0,0
format,delta
id,bf5e2393-9c55-40cc-a87f-f78e5c76a481
name,spark_catalog.default.deltatable_sample1
description,
location,dbfs:/FileStore/tables/CreateTable
createdAt,2025-03-06 07:17:34.667000
lastModified,2025-03-06 07:20:31
partitionColumns,[]
numFiles,2
sizeInBytes,2143


In [0]:
spark.sql("DESCRIBE FORMATTED deltatable_sample1").toPandas().set_index("col_name").rename_axis('METADATA')

Unnamed: 0_level_0,data_type,comment
METADATA,Unnamed: 1_level_1,Unnamed: 2_level_1
sid,int,
Name,string,
Contact_No,bigint,
,,
# Detailed Table Information,,
Catalog,spark_catalog,
Database,default,
Table,deltatable_sample1,
Created Time,Thu Mar 06 07:16:54 UTC 2025,
Last Access,UNKNOWN,


deltatable_sample1 is the **external table**. Why?

- If Type = EXTERNAL, your data is safe after DROP TABLE (only metadata is removed).
- If Type = MANAGED, DROP TABLE deletes everything, including data files.

In [0]:
from pyspark.sql.functions import *

spark.sql("DESCRIBE FORMATTED deltatable_sample1").filter(col("col_name")=='Type').toPandas()
# The table is external, and its data is stored outside of Spark’s control. 

Unnamed: 0,col_name,data_type,comment
0,Type,EXTERNAL,


## DELTA TABLE INSTANCE


In [0]:
delta_table = DeltaTable.forName(spark, "deltatable_sample1")
detail_df = delta_table.detail()
detail_df.toPandas().T

Unnamed: 0,0
format,delta
id,bf5e2393-9c55-40cc-a87f-f78e5c76a481
name,
description,
location,dbfs:/FileStore/tables/CreateTable
createdAt,2025-03-06 07:17:34.667000
lastModified,2025-03-06 07:20:31
partitionColumns,[]
numFiles,2
sizeInBytes,2143


In [0]:
delta_table = DeltaTable.forPath(spark, "dbfs:/FileStore/tables/CreateTable")
hist_df = delta_table.history(2)        # last 2 versions only
hist_df.toPandas().T

Unnamed: 0,0,1
version,2,1
timestamp,2025-03-06 07:20:31,2025-03-06 07:17:35
userId,2187969817410935,2187969817410935
userName,jithinvyas2001@gmail.com,jithinvyas2001@gmail.com
operation,WRITE,CREATE OR REPLACE TABLE
operationParameters,"{'mode': 'Append', 'partitionBy': '[]'}","{'isManaged': 'false', 'description': None, 'p..."
job,,
notebook,{'notebookId': '441500159838114'},{'notebookId': '441500159838114'}
clusterId,0306-060324-uukowot1,0306-060324-uukowot1
readVersion,1,0


In [0]:
delta_table.toDF().display()

sid,Name,Contact_No
101,AAA,9876543210
102,BBB,8765432109
