In [1]:
from pyspark.sql import SparkSession
import sys

spark = SparkSession.builder \
    .appName("Vader Test") \
    .config("spark.executor.memory", "4g") \
    .config("spark.driver.memory", "4g") \
    .config("spark.executor.cores", "3") \
    .config("spark.executor.instances", "4") \
    .config("spark.executor.extraPythonPackages", "vaderSentiment") \
    .config("spark.driver.extraPythonPackages", "vaderSentiment") \
    .config("spark.executorEnv.PYTHONPATH", ":".join(sys.path)) \
    .getOrCreate()

# Check VaderSentiment availability
def check_vader_availability(_):
    try:
        from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
        analyzer = SentimentIntensityAnalyzer()
        return ["Available"]
    except ImportError as e:
        return [f"Not Available: {str(e)}"]

# Create an RDD with dummy data to trigger the check on executors
rdd = spark.sparkContext.parallelize(range(4), 4)  # Adjust number of partitions to match executors
result = rdd.mapPartitions(check_vader_availability).collect()

# Print results
for i, res in enumerate(result):
    print(f"Executor {i}: {res}")

spark.stop()


                                                                                

Executor 0: Available
Executor 1: Available
Executor 2: Available
Executor 3: Available
