In [None]:
%pip install pyspark

In [None]:
import configparser
from os import environ
from pyspark import sql
from pyspark.sql.functions import *
import time

# Read config file
config = configparser.ConfigParser()
config.read('config.ini')
warehouse = config['DEFAULT']['warehouse']
secret = config['DEFAULT']['secret']
storageAccountName  = config['DEFAULT']['storageAccountName']
hive_uri = config['DEFAULT']['hive_uri']


# add Iceberg dependency
ICEBERG_VERSION="0.12.0"
DEPENDENCIES="org.apache.iceberg:iceberg-spark3-runtime:{}".format(ICEBERG_VERSION)
DEPENDENCIES+=",org.apache.hadoop:hadoop-azure:3.2.0"
DEPENDENCIES+=",com.microsoft.azure:azure-storage:7.0.0" 
DEPENDENCIES+=",org.apache.hadoop:hadoop-azure-datalake:3.2.0"

# set environment dependencies
environ['PYSPARK_SUBMIT_ARGS'] = '--packages {} pyspark-shell'.format(DEPENDENCIES)

# Create spark session with jars
spark = sql.SparkSession.builder \
        .master("local[8]") \
        .config('spark.jars.packages', DEPENDENCIES) \
        .getOrCreate() 
        
        

# Set iceberg settings
spark.conf.set("fs.azure.account.key." + storageAccountName + ".blob.core.windows.net", secret)
spark.conf.set("spark.sql.catalog.spark_catalog.warehouse", warehouse)
spark.conf.set("spark.sql.catalog.spark_catalog.type", "hive")
spark.conf.set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog")
spark.conf.set("spark.sql.catalog.spark_catalog.uri", hive_uri)

In [None]:
# Create table
spark.sql("""
CREATE OR REPLACE TABLE default.sample (
    id bigint,
    data string,
    category string)
USING iceberg
PARTITIONED BY (category)""")

In [None]:
# insert records into a table
spark.sql("""INSERT INTO default.sample VALUES (1, 'a', 'orders'), (2, 'b', 'product')""")

In [None]:
# update records in a table
spark.sql("""
UPDATE default.sample
SET data = 'updated_data'
WHERE category = 'orders'""")

In [None]:
# delete records from a table
spark.sql("""DELETE FROM default.sample WHERE category = 'orders'""")

In [None]:
# Droping tables
spark.sql("DROP TABLE default.tickers")

# Writing with DataFrames
[apache Iceberg](https://iceberg.apache.org/spark-writes/#writing-with-dataframes)
<ul>
    <li>df.writeTo(t).create() is equivalent to CREATE TABLE AS SELECT</li>
    <li>df.writeTo(t).replace() is equivalent to REPLACE TABLE AS SELECT</li>
    <li>df.writeTo(t).append() is equivalent to INSERT INTO</li>
    <li>df.writeTo(t).overwritePartitions() is equivalent to dynamic INSERT OVERWRITE</li>
</ul>
