In [None]:
import warnings
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql import functions as F
from pyspark.sql import Window
from delta.tables import DeltaTable

warnings.filterwarnings("ignore", category=FutureWarning)

# Create SparkSession
spark = SparkSession.builder.appName("DeltaSession") \
            .config("spark.jars.packages", "io.delta:delta-core_2.12:2.3.0") \
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")\
            .getOrCreate()

In [None]:
# Define the schema for a DataFrame
schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("age", IntegerType(), True),
    StructField("city", StringType(), True)
])

# Create a DataFrame using the schema
data = [(1, "Alice", 25, "New York"), 
        (2, "Bob", 30, "San Francisco"), 
        (3, "Charlie", 35, "Chicago")]

df = spark.createDataFrame(data, schema)

# Show the DataFrame
df.toPandas()

In [None]:
df.write.format("delta").saveAsTable("02deltatable")

# DeltaTable

## [forPath](https://docs.delta.io/latest/api/python/index.html#delta.tables.DeltaTable.forPath)

forPath(sparkSession: pyspark.sql.session.SparkSession, path: str, hadoopConf: Dict[str, str] = {}) → delta.tables.DeltaTable
Instantiate a DeltaTable object representing the data at the given path, If the given path is invalid (i.e. either no table exists or an existing table is not a Delta table), it throws a not a Delta table error.

Parameters:	
- sparkSession (pyspark.sql.SparkSession) – SparkSession to use for loading the table
- hadoopConf (optional dict with str as key and str as value.) – Hadoop configuration starting with “fs.” or “dfs.” will be picked up by DeltaTable to access the file system when executing queries. Other configurations will not be allowed.

Returns:	
loaded Delta table

Return type:

In [None]:
dt = DeltaTable.forPath(spark, 'extract/01delta')

dt.toDF().toPandas()

In [None]:
dt

## [forName](https://docs.delta.io/latest/api/python/index.html#delta.tables.DeltaTable.forName)

forName(sparkSession: pyspark.sql.session.SparkSession, tableOrViewName: str) → delta.tables.DeltaTable
Instantiate a DeltaTable object using the given table or view name. If the given tableOrViewName is invalid (i.e. either no table exists or an existing table is not a Delta table), it throws a not a Delta table error.

The given tableOrViewName can also be the absolute path of a delta datasource (i.e. delta.`path`), If so, instantiate a DeltaTable object representing the data at the given path (consistent with the forPath).

Parameters:	
- sparkSession – SparkSession to use for loading the table
- tableOrViewName – name of the table or view

Returns:	
loaded Delta table

Return type:	
DeltaTable

In [None]:
dt = DeltaTable.forName(spark, "02deltatable")
dt.toDF().toPandas()

In [None]:
spark.sql("DESCRIBE 02deltatable").toPandas()

## [isDeltaTable](https://docs.delta.io/latest/api/python/index.html#delta.tables.DeltaTable.isDeltaTable)

isDeltaTable(sparkSession: pyspark.sql.session.SparkSession, identifier: str) → bool
Check if the provided identifier string, in this case a file path, is the root of a Delta table using the given SparkSession.

Parameters:	
- sparkSession – SparkSession to use to perform the check
- path – location of the table

Returns:	
If the table is a delta table or not

Return type:	
bool

In [None]:
DeltaTable.isDeltaTable(spark, 'extract/01delta')

## Show Tables

In [None]:
spark.sql("SHOW TABLES").toPandas()

In [None]:
spark.sql("SELECT * FROM 02deltatable").toPandas()

In [None]:
!pwd

In [None]:
spark.sql("SELECT * FROM delta.`/home/jovyan/delta/extract/01delta/`").toPandas()

In [None]:
DeltaTable.forName(spark, "02deltatable").toDF().toPandas()

In [None]:
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

In [None]:
spark.sql("SHOW TBLPROPERTIES delta.`/home/jovyan/delta/extract/01delta/`").toPandas()

## [detail](https://docs.delta.io/latest/api/python/index.html#delta.tables.DeltaTable.detail)

Get the details of a Delta table such as the format, name, and size.

In [None]:
spark.sql("DESCRIBE DETAIL 02deltatable").toPandas()

In [None]:
dt.detail().toPandas()

## [history](https://docs.delta.io/latest/api/python/index.html#delta.tables.DeltaTable.history)

Get the information of the latest limit commits on this table as a Spark DataFrame. The information is in reverse chronological order.

Parameters:	limit – Optional, number of latest commits to returns in the history.

Returns:	Table’s commit history. See the online Delta Lake documentation for more details.

Return type:	pyspark.sql.DataFrame

In [None]:
spark.sql("DESCRIBE HISTORY 02deltatable").toPandas()

In [None]:
spark.sql("""
INSERT INTO 02deltatable (id, name, age, city)
VALUES (1, 'Marcelo', 5, 'Sao Paulo')
""")

In [None]:
spark.sql("DESCRIBE HISTORY 02deltatable").toPandas()

In [None]:
spark.sql("""
DESCRIBE HISTORY 02deltatable LIMIT 1
""").toPandas()

In [None]:
dt.history().toPandas()

In [None]:
dt.history(1).toPandas()

# describe

In [None]:
spark.sql("DESCRIBE 02deltatable").toPandas()

In [None]:
spark.sql("DESCRIBE TABLE EXTENDED 02deltatable").toPandas()