## Asegurarnos de que Spark y los workers usan exactamente el mismo intérprete Python

In [None]:
# 1_forzar_python_pyspark.py
import os, sys

# Forzar que los workers usen el mismo python que el kernel
os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable

# Opciones útiles para debugging local
os.environ["PYSPARK_SUBMIT_ARGS"] = "--conf spark.sql.shuffle.partitions=2 pyspark-shell"

print("PYSPARK_PYTHON:", os.environ["PYSPARK_PYTHON"])
print("python executable:", sys.executable)

# Reiniciar/crear SparkSession seguro
try:
    from pyspark.sql import SparkSession
    if 'spark' in globals():
        try:
            spark.stop()
        except Exception:
            pass
    spark = SparkSession.builder \
        .master("local[2]") \
        .appName("debug_pyspark") \
        .config("spark.driver.memory", "2g") \
        .config("spark.executor.memory", "2g") \
        .config("spark.sql.shuffle.partitions", "2") \
        .getOrCreate()
    spark.sparkContext.setLogLevel("WARN")
    print("Spark started:", spark.version, spark.sparkContext.master)
except Exception as e:
    print("ERROR al crear SparkSession:", repr(e))
    raise


In [None]:
## Opcional 3b) forzamos sesion en localhost

In [None]:
# Ejecuta esto en una celda nueva tras reiniciar el kernel
import os, sys
from pyspark.sql import SparkSession
from pyspark import SparkConf

# si tienes una SparkSession abierta, detenerla primero
try:
    spark
    spark.stop()
except NameError:
    pass

conf = SparkConf()
conf.set("spark.sql.shuffle.partitions", "8")
conf.set("spark.driver.memory", "2g")

# IMPORTANT: forzar driver host/bind a localhost evita problemas de firewall/host
conf.set("spark.driver.host", "127.0.0.1")
conf.set("spark.driver.bindAddress", "127.0.0.1")

spark = SparkSession.builder \
    .appName("PySpark-Notebook-Fix") \
    .master("local[*]") \
    .config(conf=conf) \
    .getOrCreate()

spark.sparkContext.setLogLevel("WARN")  # DEBUG si quieres más info
print("Spark creado:", spark.sparkContext.master, "version:", spark.version)
