In [None]:
from pyspark.sql import SparkSession
import logging
from typing import List
import os

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def get_jar_files(jars_home: str) -> List[str]:
    """
    Recursively find all JAR files in the specified directory.
    
    Args:
        jars_home (str): Root directory to search for JAR files
        
    Returns:
        List[str]: List of full paths to JAR files
        
    Raises:
        FileNotFoundError: If jars_home directory doesn't exist
        PermissionError: If there are permission issues accessing the directory
    """
    try:
        if not os.path.exists(jars_home):
            raise FileNotFoundError(f"Directory not found: {jars_home}")
            
        jar_files = []
        
        # Walk through directory tree
        for root, _, files in os.walk(jars_home):
            for file in files:
                if file.lower().endswith('.jar'):
                    full_path = os.path.join(root, file)
                    jar_files.append(full_path)
                    logger.info(f"Found JAR file: {full_path}")
        
        if not jar_files:
            logger.warning(f"No JAR files found in {jars_home}")
            
        return sorted(jar_files)  # Sort for consistent ordering
        
    except PermissionError as e:
        logger.error(f"Permission denied accessing {jars_home}: {str(e)}")
        raise
    except Exception as e:
        logger.error(f"Error while scanning for JAR files: {str(e)}")
        raise
    
# Get the Spark home directory
jars_home = '/workspace/delta-jars'

try:
    # Get all JAR files
    jars = get_jar_files(jars_home)
    
    # Log total count
    logger.info(f"Found {len(jars)} JAR files in total")
    
    # The jars list can now be used for Spark configuration
    
except Exception as e:
    logger.error(f"Failed to load JAR files: {str(e)}")
    raise

# Create SparkSession with Kyuubi configurations
spark = SparkSession.builder \
    .appName("KyuubiTest") \
    .config("spark.sql.extensions", "org.apache.spark.sql.kyuubi.extension.KyuubiSparkSQLExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.kyuubi.SparkCatalog") \
    .config("spark.kyuubi.session.engine.submit.timeout", "300000") \
    .config("spark.kyuubi.session.engine.idle.timeout", "3600000") \
    .config("spark.kyuubi.frontend.thrift.binary.bind.host", "0.0.0.0") \
    .config("spark.kyuubi.frontend.thrift.binary.bind.port", "10009") \
    .getOrCreate()

# Try a simple operation
print("Testing Spark Connection...")
test_df = spark.sql("SELECT 1 + 1 as result")
print(test_df.collect())

spark.stop()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/02/20 21:05:17 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/02/20 21:05:18 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
25/02/20 21:05:19 WARN SparkSession: Cannot use org.apache.spark.sql.kyuubi.extension.KyuubiSparkSQLExtension to configure session extensions.
java.lang.ClassNotFoundException: org.apache.spark.sql.kyuubi.extension.KyuubiSparkSQLExtension
	at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:445)
	at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:592)
	at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525)
	at java.base/java.lang.Class.forName0(Native Method)
	at java.base/java.lang.Class.forName(Class.java:467)
	at org.apache.spark.util.Utils$.classForName(Utils.scala:225)
	at org.a

Testing Spark Connection...


Py4JError: An error occurred while calling o36.sql. Trace:
py4j.Py4JException: Method sql([class java.lang.String, class [Ljava.lang.Object;]) does not exist
	at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:321)
	at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:329)
	at py4j.Gateway.invoke(Gateway.java:274)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:840)

