In [None]:
from pyspark.sql import SparkSession
import os

# Set AWS credentials and region for SDK v2
os.environ['AWS_REGION'] = 'us-east-1'
os.environ['AWS_ACCESS_KEY_ID'] = 'minioadmin'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'minioadmin'

# Polaris + Iceberg with MinIO
spark = SparkSession.builder \
    .appName("Polaris-Iceberg-MinIO") \
    .config("spark.jars.packages", 
            "org.apache.iceberg:iceberg-spark-runtime-3.4_2.12:1.4.3,"
            "org.apache.hadoop:hadoop-aws:3.3.4,"
            "com.amazonaws:aws-java-sdk-bundle:1.12.262,"
            "org.apache.iceberg:iceberg-aws-bundle:1.4.3") \
    .config("spark.sql.catalog.polaris", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.polaris.catalog-impl", "org.apache.iceberg.rest.RESTCatalog") \
    .config("spark.sql.catalog.polaris.uri", "http://host.docker.internal:8181/api/catalog") \
    .config("spark.sql.catalog.polaris.credential", "30d861989a2b1605:c93e64ae4132ecb1a33aba14e8506a6d") \
    .config("spark.sql.catalog.polaris.warehouse", "my_catalog") \
    .config("spark.sql.catalog.polaris.scope", "PRINCIPAL_ROLE:ALL") \
    .config("spark.hadoop.fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000") \
    .config("spark.hadoop.fs.s3a.access.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.secret.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") \
    .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
    .getOrCreate()

print("✓ Spark session created!")
print("✓ Connected to Polaris REST catalog")
print()

# Show namespaces
print("=== Current Namespaces ===")
spark.sql("SHOW NAMESPACES IN polaris").show()

# Create namespace
spark.sql("CREATE NAMESPACE IF NOT EXISTS polaris.test_db")
print("✓ Namespace 'test_db' created!")
print()

# Create table
spark.sql("""
    CREATE TABLE IF NOT EXISTS polaris.test_db.test_table (
        id INT,
        name STRING
    ) USING iceberg
""")
print("✓ Table 'test_table' created!")
print()

# Insert data
spark.sql("INSERT INTO polaris.test_db.test_table VALUES (1, 'Hello'), (2, 'World')")
print("✓ Data inserted!")
print()

# Query data
print("=== Query Results ===")
spark.sql("SELECT * FROM polaris.test_db.test_table").show()

✓ Spark session created!
✓ Connected to Polaris REST catalog

=== Current Namespaces ===
+---------+
|namespace|
+---------+
|  test_db|
+---------+

✓ Namespace 'test_db' created!

✓ Table 'test_table' created!

✓ Data inserted!

=== Query Results ===
+---+-----+
| id| name|
+---+-----+
|  1|Hello|
|  2|World|
+---+-----+



In [4]:
# Count rows
spark.sql("SELECT COUNT(*) as total FROM polaris.test_db.test_table").show()

# Show table metadata
spark.sql("DESCRIBE EXTENDED polaris.test_db.test_table").show(100, truncate=False)

# Show table history (Iceberg time travel feature)
spark.sql("SELECT * FROM polaris.test_db.test_table.history").show(truncate=False)

# Show snapshots
spark.sql("SELECT * FROM polaris.test_db.test_table.snapshots").show(truncate=False)

+-----+
|total|
+-----+
|    2|
+-----+

+----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+
|col_name                    |data_type                                                                                                                                                       |comment|
+----------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+
|id                          |int                                                                                                                                                             |NULL   |
|name                        |string                                                                                                                           