In [None]:
from pyspark.sql import SparkSession
import logging
from typing import List
import os

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define all required Kyuubi JARs
kyuubi_jars = [
    "/workspace/delta-jars/kyuubi-common_2.12-1.10.0.jar",
    "/workspace/delta-jars/delta-spark_2.12-3.3.0.jar",
    "/workspace/delta-jars/delta-storage-3.3.0.jar",
    "/workspace/delta-jars/kyuubi-server-plugin-1.10.0.jar",
    "/workspace/delta-jars/kyuubi-spark-sql-engine_2.12-1.10.0.jar"
]

# Remove duplicates and join paths
unique_jars = list(set(kyuubi_jars))
jars = ",".join(unique_jars)


builder = (SparkSession.builder
           .appName("DeltaExample")
           .master("local[*]")
           # Add debug configurations
           .config("spark.hadoop.fs.s3a.connection.maximum", "1")
           .config("spark.hadoop.fs.s3a.attempts.maximum", "1")
           .config("spark.hadoop.fs.s3a.connection.timeout", "5000")
           .config("spark.hadoop.fs.s3a.impl.disable.cache", "true")
           .config("spark.hadoop.fs.s3a.debug.detailed.exceptions", "true")
           # Add jars directly
           .config("spark.jars", jars)
           .config("spark.driver.extraClassPath", jars)
           .config("spark.executor.extraClassPath", jars)
           # Delta Lake configurations
           .config("spark.sql.extensions", "org.apache.spark.sql.kyuubi.extension.KyuubiSparkSQLExtension")
           .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
           .config("spark.kyuubi.session.engine.submit.timeout", "300000")
           .config("spark.kyuubi.session.engine.idle.timeout", "3600000")
           .config("spark.kyuubi.frontend.thrift.binary.bind.host", "0.0.0.0")
           .config("spark.kyuubi.frontend.thrift.binary.bind.port", "10009")
           .config("spark.kyuubi.session.engine.initialize.timeout", "600000")
           .config("spark.kyuubi.engine.share.level", "CONNECTION")
           .config("spark.kyuubi.engine.default.database", "default")
           .config("spark.kyuubi.engine.connection.url", "jdbc:hive2://localhost:10009")
           # S3/MinIO configurations
           .config("spark.hadoop.fs.s3a.access.key", "minioadmin")
           .config("spark.hadoop.fs.s3a.secret.key", "minioadmin")
           .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000")
           .config("spark.hadoop.fs.s3a.path.style.access", "true")
           .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
           .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")
           .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
           # Additional Delta Lake configurations
           .config("spark.delta.logStore.class", "io.delta.storage.S3SingleDriverLogStore")
           .config("spark.hadoop.fs.s3a.fast.upload", "true")
           .config("spark.hadoop.fs.s3a.multipart.size", "104857600")
           .config("spark.sql.warehouse.dir", "/tmp/spark-warehouse"))

# Stop any existing session
if 'spark' in locals():
    spark.stop()


# Create the session
spark = builder.enableHiveSupport().getOrCreate()


# Try a simple operation
# print("Testing Spark Connection...")
# test_df = spark.sql("SELECT 1 + 1 as result")
# print(test_df.collect())

# Test DataFrame
data = [(1, "John"), (2, "Jane")]
df = spark.createDataFrame(data, ["id", "name"])
df.show()


spark.stop()