## Quickstart

Code snippets are from [this guide](https://docs.delta.io/latest/quick-start.html).

In [1]:
import pyspark
from delta import *

In [2]:
builder = (
    pyspark.sql.SparkSession.builder.appName("MyApp")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config(
        "spark.sql.catalog.spark_catalog",
        "org.apache.spark.sql.delta.catalog.DeltaCatalog",
    )
)

In [3]:
spark = configure_spark_with_delta_pip(builder).getOrCreate()

22/07/26 13:08:20 WARN Utils: Your hostname, Matthews-MacBook-Air.local resolves to a loopback address: 127.0.0.1; using 192.168.86.203 instead (on interface en0)
22/07/26 13:08:20 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/Users/powers/.sdkman/candidates/spark/3.2.0/jars/ivy-2.5.0.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/powers/.ivy2/cache
The jars for the packages stored in: /Users/powers/.ivy2/jars
io.delta#delta-core_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-ec1e1f32-1c8c-4a4a-a270-cc9a7be9af47;1.0
	confs: [default]
	found io.delta#delta-core_2.12;2.0.0 in central
	found io.delta#delta-storage;2.0.0 in central
	found org.antlr#antlr4-runtime;4.8 in central
	found org.codehaus.jackson#jackson-core-asl;1.9.13 in central
downloading https://repo1.maven.org/maven2/io/delta/delta-core_2.12/2.0.0/delta-core_2.12-2.0.0.jar ...
	[SUCCESSFUL ] io.delta#delta-core_2.12;2.0.0!delta-core_2.12.jar (4236ms)
downloading https://repo1.maven.org/maven2/io/delta/delta-storage/2.0.0/delta-storage-2.0.0.jar ...
	[SUCCESSFUL ] io.delta#delta-storage;2.0.0!delta-storage.jar (104ms)
:: resolution report :: resolve 2432ms :: artifacts dl 4360ms
	:: modules in use:
	io.delta#delta-core_2.12;2.0.0 from central in [default]
	io.delta#delta-sto

## Create a table

In [5]:
data = spark.range(0, 5)
data.write.format("delta").save("tmp/delta-table")

                                                                                

## Read a table

In [6]:
df = spark.read.format("delta").load("tmp/delta-table")

In [7]:
df.show()

+---+
| id|
+---+
|  1|
|  4|
|  2|
|  3|
|  0|
+---+



## Update table - overwrite

In [8]:
data = spark.range(5, 10)
data.write.format("delta").mode("overwrite").save("tmp/delta-table")

                                                                                

In [9]:
df = spark.read.format("delta").load("tmp/delta-table")
df.show()

+---+
| id|
+---+
|  7|
|  6|
|  8|
|  9|
|  5|
+---+



## Conditional update without overwrite

In [10]:
from delta.tables import *
from pyspark.sql.functions import *

In [11]:
deltaTable = DeltaTable.forPath(spark, "tmp/delta-table")

In [12]:
# Update every even value by adding 100 to it
deltaTable.update(condition=expr("id % 2 == 0"), set={"id": expr("id + 100")})

                                                                                

In [13]:
deltaTable.toDF().show()

+---+
| id|
+---+
|  7|
|  9|
|108|
|106|
|  5|
+---+



In [None]:
# Delete every even value
deltaTable.delete(condition=expr("id % 2 == 0"))

In [45]:
deltaTable.toDF().show()

+---+
| id|
+---+
|  5|
|  7|
|  9|
+---+



In [46]:
# Upsert (merge) new data
newData = spark.range(0, 20)

deltaTable.alias("oldData").merge(
    newData.alias("newData"), "oldData.id = newData.id"
).whenMatchedUpdate(set={"id": col("newData.id")}).whenNotMatchedInsert(
    values={"id": col("newData.id")}
).execute()

In [47]:
deltaTable.toDF().show()

+---+
| id|
+---+
|  0|
|  1|
|  2|
|  3|
|  4|
|  5|
|  6|
|  7|
|  8|
|  9|
| 10|
| 11|
| 12|
| 13|
| 14|
| 15|
| 16|
| 17|
| 18|
| 19|
+---+



## Read older versions of data using time travel

In [48]:
df = spark.read.format("delta").option("versionAsOf", 0).load("tmp/delta-table")
df.show()

+---+
| id|
+---+
|  4|
|  2|
|  0|
|  3|
|  1|
+---+

