## Quickstart

Code snippets are from [this guide](https://docs.delta.io/latest/quick-start.html).

In [32]:
import pyspark
from delta import *

In [33]:
builder = (
    pyspark.sql.SparkSession.builder.appName("MyApp")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    )
)

In [34]:
spark = configure_spark_with_delta_pip(builder).getOrCreate()

## Create a table

In [35]:
data = spark.range(0, 5)
data.write.format("delta").save("tmp/delta-table")

## Read a table

In [36]:
df = spark.read.format("delta").load("tmp/delta-table")

In [37]:
df.show()

+---+
| id|
+---+
|  4|
|  2|
|  0|
|  3|
|  1|
+---+



## Update table - overwrite

In [38]:
data = spark.range(5, 10)
data.write.format("delta").mode("overwrite").save("tmp/delta-table")

In [39]:
df = spark.read.format("delta").load("tmp/delta-table")
df.show()

+---+
| id|
+---+
|  6|
|  5|
|  7|
|  8|
|  9|
+---+



## Conditional update without overwrite

In [40]:
from delta.tables import *
from pyspark.sql.functions import *

In [41]:
deltaTable = DeltaTable.forPath(spark, "tmp/delta-table")

In [42]:
# Update every even value by adding 100 to it
deltaTable.update(condition=expr("id % 2 == 0"), set={"id": expr("id + 100")})

In [43]:
deltaTable.toDF().show()

+---+
| id|
+---+
|  5|
|  7|
|108|
|106|
|  9|
+---+



In [44]:
# Delete every even value
deltaTable.delete(condition=expr("id % 2 == 0"))

                                                                                

In [45]:
deltaTable.toDF().show()

+---+
| id|
+---+
|  5|
|  7|
|  9|
+---+



In [46]:
# Upsert (merge) new data
newData = spark.range(0, 20)

deltaTable.alias("oldData").merge(
    newData.alias("newData"), "oldData.id = newData.id"
).whenMatchedUpdate(set={"id": col("newData.id")}).whenNotMatchedInsert(
    values={"id": col("newData.id")}
).execute()

In [47]:
deltaTable.toDF().show()

+---+
| id|
+---+
|  0|
|  1|
|  2|
|  3|
|  4|
|  5|
|  6|
|  7|
|  8|
|  9|
| 10|
| 11|
| 12|
| 13|
| 14|
| 15|
| 16|
| 17|
| 18|
| 19|
+---+



## Read older versions of data using time travel

In [48]:
df = spark.read.format("delta").option("versionAsOf", 0).load("tmp/delta-table")
df.show()

+---+
| id|
+---+
|  4|
|  2|
|  0|
|  3|
|  1|
+---+

