In [None]:
import pyspark
from pyspark.conf import SparkConf
from pyspark.sql import SparkSession
import pandas as pd

CATALOG_URL = "http://server:8080/catalog"
MANAGEMENT_URL = "http://server:8080/management"
DEMO_WAREHOUSE = "demo"
SPARK_VERSION = pyspark.__version__
SPARK_MINOR_VERSION = '.'.join(SPARK_VERSION.split('.')[:2])
ICEBERG_VERSION = "1.5.2"

# Connect with Spark

In [None]:
config = {
    "spark.sql.catalog.demo-catalog": "org.apache.iceberg.spark.SparkCatalog",
    "spark.sql.catalog.demo-catalog.type": "rest",
    "spark.sql.catalog.demo-catalog.uri": CATALOG_URL,
    "spark.sql.catalog.demo-catalog.warehouse": DEMO_WAREHOUSE,
    "spark.sql.catalog.demo-catalog.io-impl": "org.apache.iceberg.aws.s3.S3FileIO",
    "spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions",
    "spark.sql.defaultCatalog": "demo-catalog",
    "spark.jars.packages": f"org.apache.iceberg:iceberg-spark-runtime-{SPARK_MINOR_VERSION}_2.12:{ICEBERG_VERSION},org.apache.iceberg:iceberg-aws-bundle:{ICEBERG_VERSION}",
}


In [None]:
spark_config = SparkConf().setMaster('local').setAppName("Iceberg-REST")
for k, v in config.items():
    spark_config = spark_config.set(k, v)

spark = SparkSession.builder.config(conf=spark_config).getOrCreate()

## Read and Write Tables

In [None]:
spark.sql("CREATE NAMESPACE IF NOT EXISTS spark_demo")
spark.sql("SHOW NAMESPACES").toPandas()

In [None]:
data = pd.DataFrame([[1, 'a-string', 2.2]], columns=['id', 'strings', 'floats'])
sdf = spark.createDataFrame(data)

In [None]:
sdf.writeTo("spark_demo.my_table").createOrReplace()

In [None]:
spark.sql("SELECT * FROM spark_demo.my_table").toPandas()