# Test Nessie Catalog

This notebook tests if the Nessie catalog is properly configured and accessible.


In [1]:
# Test basic imports
print("Testing basic imports...")
try:
    from pyspark.sql import SparkSession
    print("✅ SparkSession imported")
    print("✅ Basic imports successful!")
except Exception as e:
    print(f"❌ Import error: {e}")


Testing basic imports...
✅ SparkSession imported
✅ Basic imports successful!


In [2]:
# Set AWS environment variables for MinIO access
import os
os.environ['AWS_ACCESS_KEY_ID'] = 'minio'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'minio12345'
os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
os.environ['AWS_ENDPOINT_URL'] = 'http://minio:9000'
os.environ['AWS_ENDPOINT_URL_S3'] = 'http://minio:9000'
print("✅ AWS environment variables set for MinIO access")


✅ AWS environment variables set for MinIO access


In [3]:
# Initialize Spark session with Iceberg and Nessie support
from pyspark.sql import SparkSession
print("Initializing Spark session...")

spark = SparkSession.builder \
    .appName("NessieCatalogTest") \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.projectnessie.spark.extensions.NessieSparkSessionExtensions") \
    .config("spark.sql.catalog.nessie", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.nessie.catalog-impl", "org.apache.iceberg.nessie.NessieCatalog") \
    .config("spark.sql.catalog.nessie.uri", "http://nessie:19120/api/v2") \
    .config("spark.sql.catalog.nessie.ref", "main") \
    .config("spark.sql.catalog.nessie.warehouse", "s3a://warehouse/") \
    .config("spark.sql.catalog.nessie.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") \
    .config("spark.sql.catalog.nessie.s3.endpoint", "http://minio:9000") \
    .config("spark.sql.catalog.nessie.s3.path-style-access", "true") \
    .config("spark.hadoop.fs.s3a.access.key", "minio") \
    .config("spark.hadoop.fs.s3a.secret.key", "minio12345") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") \
    .getOrCreate()

print("✅ Spark session initialized successfully!")
print(f"Spark version: {spark.version}")


Initializing Spark session...
✅ Spark session initialized successfully!
Spark version: 3.5.0


In [4]:
# Test catalog availability
print("Available catalogs:")
spark.sql("SHOW CATALOGS").show()

print("\nTesting Nessie catalog access:")
try:
    spark.sql("SHOW DATABASES IN nessie").show()
    print("✅ Nessie catalog is accessible!")
except Exception as e:
    print(f"❌ Nessie catalog error: {e}")


Available catalogs:
+-------------+
|      catalog|
+-------------+
|spark_catalog|
+-------------+


Testing Nessie catalog access:
❌ Nessie catalog error: An error occurred while calling o65.sql.
: org.projectnessie.client.http.NessieApiCompatibilityException: API version mismatch, check URI prefix (expected: 1, actual: 2)
	at org.projectnessie.client.http.NessieApiCompatibilityFilter.check(NessieApiCompatibilityFilter.java:78)
	at org.projectnessie.client.http.NessieApiCompatibilityFilter.filter(NessieApiCompatibilityFilter.java:43)
	at org.projectnessie.client.http.impl.BaseHttpRequest.lambda$prepareRequest$0(BaseHttpRequest.java:68)
	at java.base/java.util.Collections$SingletonList.forEach(Collections.java:4966)
	at org.projectnessie.client.http.impl.BaseHttpRequest.prepareRequest(BaseHttpRequest.java:68)
	at org.projectnessie.client.http.impl.jdk11.JavaRequest.executeRequest(JavaRequest.java:95)
	at org.projectnessie.client.http.HttpRequest.get(HttpRequest.java:80)
	at org.projec

In [5]:
# Test creating a simple database
print("Testing database creation:")
try:
    spark.sql("CREATE DATABASE IF NOT EXISTS nessie.test_db")
    print("✅ Database created successfully!")
    
    # Show databases again
    print("\nAvailable databases in nessie catalog:")
    spark.sql("SHOW DATABASES IN nessie").show()
    
except Exception as e:
    print(f"❌ Database creation error: {e}")


Testing database creation:
❌ Database creation error: An error occurred while calling o65.sql.
: org.projectnessie.client.rest.NessieServiceException: Not Found (HTTP/404): 

Additionally, the client-side exception below was caught while decoding the HTTP response:
org.apache.iceberg.shaded.com.fasterxml.jackson.databind.exc.MismatchedInputException: Cannot deserialize value of type `org.projectnessie.error.ImmutableNessieError` from [Unavailable value] (token `JsonToken.NOT_AVAILABLE`)
 at [Source: UNKNOWN; byte offset: #UNKNOWN]
	at org.apache.iceberg.shaded.com.fasterxml.jackson.databind.exc.MismatchedInputException.from(MismatchedInputException.java:59)
	at org.apache.iceberg.shaded.com.fasterxml.jackson.databind.DeserializationContext.reportInputMismatch(DeserializationContext.java:1752)
	at org.apache.iceberg.shaded.com.fasterxml.jackson.databind.DeserializationContext.handleUnexpectedToken(DeserializationContext.java:1526)
	at org.apache.iceberg.shaded.com.fasterxml.jackson.data

In [6]:
# Clean up
spark.stop()
print("✅ Spark session stopped")


✅ Spark session stopped
