In [None]:
import warnings
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql import functions as F
from pyspark.sql import Window
from delta.tables import DeltaTable

warnings.filterwarnings("ignore", category=FutureWarning)

# Create SparkSession
spark = SparkSession.builder.appName("DeltaSession") \
            .config("spark.jars.packages", "io.delta:delta-core_2.12:2.3.0") \
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")\
            .getOrCreate()

In [None]:
dt = DeltaTable.forPath(spark, 'extract/01delta')

dt.toDF().toPandas()

## Delete 

In [None]:
# using Spark SQL functions
# dt.delete(F.col('id') == 3)

# using SQL formatted string
dt.delete("id == 2")

In [None]:
!pwd

In [None]:
spark.sql("SELECT * FROM delta.`/home/jovyan/delta/extract/01delta/`").toPandas()

In [None]:
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

In [None]:
dt.history().toPandas()

## Append

In [None]:
spark.sql("""
INSERT INTO delta.`/home/jovyan/delta/extract/01delta/` (id, name, age, city)
VALUES (7, 'Marcelo', 5, 'Sao Paulo')
""")

In [None]:
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

In [None]:
dt.delete("id == 7")

In [None]:
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

In [None]:
spark.sql("SHOW TBLPROPERTIES delta.`/home/jovyan/delta/extract/01delta/`").toPandas()

In [None]:
spark.sql("""
ALTER TABLE delta.`/home/jovyan/delta/extract/01delta/` 
SET TBLPROPERTIES (delta.appendOnly=true)
""")

In [None]:
spark.sql("SHOW TBLPROPERTIES delta.`/home/jovyan/delta/extract/01delta/`").toPandas()

In [None]:
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

In [None]:
dt.delete("id == 1")

In [None]:
DeltaTable.forPath(spark, 'extract/01delta').toDF().toPandas()

### New Table

In [None]:
# sql
#spark.sql("""
#SET spark.databricks.delta.properties.defaults.appendOnly = true
#""")

# python
spark.conf.set("spark.databricks.delta.properties.defaults.appendOnly", "true")

In [None]:
spark.sql("""
CREATE TABLE IF NOT EXISTS 04deltatable (
  id INT,
  name STRING,
  age INT,
  city STRING
) USING DELTA
""")

In [None]:
spark.sql("SHOW TBLPROPERTIES 04deltatable").toPandas()

In [None]:
spark.sql("""
INSERT INTO 04deltatable (id, name, age, city)
VALUES (1, 'Marcelo', 5, 'Sao Paulo')
""")

In [None]:
spark.sql("SELECT * FROM 04deltatable").toPandas()

In [None]:
spark.sql("DELETE FROM 04deltatable WHERE id = 1").toPandas()

In [None]:
spark.sql("""
ALTER TABLE 04deltatable 
SET TBLPROPERTIES (delta.appendOnly=false)
""")

In [None]:
spark.sql("SHOW TBLPROPERTIES 04deltatable").toPandas()

In [None]:
spark.sql("DELETE FROM 04deltatable WHERE id = 1").toPandas()

In [None]:
spark.sql("SELECT * FROM 04deltatable").toPandas()

## Update

In [None]:
spark.sql("SHOW TBLPROPERTIES 04deltatable").toPandas()

In [None]:
spark.sql("""
INSERT INTO 04deltatable (id, name, age, city)
VALUES (2, 'Marcelo', 5, 'Sao Paulo')
""")

In [None]:
spark.sql("SELECT * FROM 04deltatable").toPandas()

In [None]:
# condition using SQL formatted string
# dt.update(
#     condition = "id = '1'",
#     set = { "city": "'Xaxim'" } )

# condition using Spark SQL functions
#dt.update(
#    condition = F.col("id") == "1",
#    set = { "city": F.lit("Xaxim")} 
#)

# condition using sql
spark.sql("""
UPDATE 04deltatable 
SET city = 'Xaxim'
WHERE id = 2
""")

In [None]:
spark.sql("SELECT * FROM 04deltatable").toPandas()

## New Table and Update

In [None]:
# python
spark.conf.set("spark.databricks.delta.properties.defaults.appendOnly", "false")

In [None]:
spark.sql("""
CREATE TABLE IF NOT EXISTS 04deltatable2 (
  id INT,
  name STRING,
  age INT,
  city STRING
) USING DELTA
""")

In [None]:
spark.sql("SHOW TBLPROPERTIES 04deltatable2").toPandas()

In [None]:
spark.sql("SELECT * FROM 04deltatable2").toPandas()

In [None]:
spark.sql("""
INSERT INTO 04deltatable2 (id, name, age, city)
VALUES
(10, 'Carlos',15, 'Sao Paulo'),
(11, 'Maria', 42, 'Cuiaba'),
(12, 'Jorge', 75, 'Manaus'),
(13, 'Lucia', 34, 'Brasilia')
""")

In [None]:
spark.sql("SELECT * FROM 04deltatable2").orderBy('id').toPandas()

In [None]:
dt = DeltaTable.forName(spark, '04deltatable2')

dt.toDF().toPandas()

In [None]:
# Update every even value by adding 100 to it
dt.update(
    condition=F.expr("id % 2 == 0"), 
    set={"id": F.expr("id + 100")}
)

In [None]:
DeltaTable.forName(spark, '04deltatable2').toDF().toPandas()

In [None]:
# Delete every even value
dt.delete(condition=F.expr("id % 2 == 0"))

In [None]:
DeltaTable.forName(spark, '04deltatable2').toDF().toPandas()