In [None]:
import warnings
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql import functions as F
from pyspark.sql import Window
from delta.tables import DeltaTable

warnings.filterwarnings("ignore", category=FutureWarning)

# Create SparkSession
spark = SparkSession.builder.appName("DeltaSession") \
            .config("spark.jars.packages", "io.delta:delta-core_2.12:2.3.0") \
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")\
            .getOrCreate()

In [None]:
dt = DeltaTable.forPath(spark, 'extract/01delta')

dt.toDF().toPandas()

In [None]:
dt.history().toPandas()

## versionAsOf

In [None]:
!pwd

In [None]:
spark.sql("""
SELECT * FROM delta.`/home/jovyan/delta/extract/01delta` 
VERSION AS OF 0
""").toPandas()

In [None]:
(spark.read
     .format("delta")
     .option("versionAsOf", 0)
     .load('extract/01delta')
).toPandas()

In [None]:
(spark.read
     .format("delta")
     .option("versionAsOf", 4)
     .load('extract/01delta')
).toPandas()

## timestampAsOf

In [None]:
spark.sql("""
SELECT * FROM delta.`/home/jovyan/delta/extract/01delta` 
TIMESTAMP AS OF '2023-05-02 21:19:30.502'
""").toPandas()

In [None]:
(spark.read
 .format("delta")
 .option("timestampAsOf", "2023-05-02 21:19:30.502")
 .load('extract/01delta')
).toPandas()

## restoreToVersion

In [None]:
dt = DeltaTable.forPath(spark, 'extract/01delta')

dt.toDF().toPandas()

In [None]:
# sql
spark.sql("""
RESTORE TABLE delta.`/home/jovyan/delta/extract/01delta`
TO VERSION AS OF 1""").toPandas()

In [None]:
# python
dt.restoreToVersion(1).toPandas()

In [None]:
# show delta table
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

## restoreToTimestamp

In [None]:
# restore sql
spark.sql("""
RESTORE TABLE delta.`/home/jovyan/delta/extract/01delta`
TO TIMESTAMP AS OF '2023-05-02 21:19:30.502'
""").toPandas()

In [None]:
spark.sql("""
DESCRIBE HISTORY delta.`/home/jovyan/delta/extract/01delta` LIMIT 5
""").toPandas()

In [None]:
dt = DeltaTable.forPath(spark, 'extract/01delta')

dt.toDF().toPandas()

In [None]:
dt.restoreToTimestamp("2023-05-02 21:19:30.502").toPandas()

In [None]:
dt.history().toPandas()

In [None]:
# show delta table
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

## vacuum

In [None]:
spark.sql("SHOW TBLPROPERTIES delta.`/home/jovyan/delta/extract/01delta`").toPandas()

In [None]:
spark.sql("""
ALTER TABLE delta.`/home/jovyan/delta/extract/01delta`
SET TBLPROPERTIES(
 delta.logRetentionDuration = "interval 30 days",
 delta.deletedFileRetentionDuration = "interval 7 days"
)
""")

In [None]:
spark.sql("SHOW TBLPROPERTIES delta.`/home/jovyan/delta/extract/01delta`").toPandas()

In [None]:
dt.history(3).toPandas()

In [None]:
# sql
spark.sql("VACUUM delta.`/home/jovyan/delta/extract/01delta` RETAIN 3 HOURS")

### spark.databricks.delta.retentionDurationCheck.enabled

In [None]:
# sql
# spark.sql("SET spark.databricks.delta.retentionDurationCheck.enabled = false")

# python
spark.conf.set("spark.databricks.delta.retentionDurationCheck.enabled", "false")

In [None]:
# sql
spark.sql("VACUUM delta.`/home/jovyan/delta/extract/01delta` RETAIN 3 HOURS").toPandas()

In [None]:
spark.sql("""
DESCRIBE HISTORY delta.`/home/jovyan/delta/extract/01delta` LIMIT 5
""").toPandas()

In [None]:
dt = DeltaTable.forPath(spark, 'extract/01delta')

dt.toDF().toPandas()

In [None]:
# python
dt.vacuum(retentionHours=0)

In [None]:
dt.history().toPandas()

In [None]:
(spark.read
     .format("delta")
     .option("versionAsOf", 0)
     .load('extract/01delta')
).toPandas()

In [None]:
dt.restoreToVersion(0).toPandas()