# Spark quick test
This notebook runs quick checks inside the `quay.io/jupyter/pyspark-notebook` container: shows Python and pyspark versions, creates a SparkSession, builds a small DataFrame, shows it, and writes a CSV into the mounted `./docker_portal` folder so you can verify the host mount.

In [1]:
# Basic environment info
import sys, os
print('Python:', sys.version.replace('', ' '))
print('PYTHONPATH=', os.environ.get('PYTHONPATH'))
print('TZ=', os.environ.get('TZ'))
print('Working dir:', os.getcwd())
# List mounted project directory
print('Mounted project dir listing:')
for p in sorted(os.listdir('/project'))[:50]:
    print(' -', p)

Python:  3 . 1 3 . 7   |   p a c k a g e d   b y   c o n d a - f o r g e   |   ( m a i n ,   S e p     3   2 0 2 5 ,   1 4 : 3 0 : 3 5 )   [ G C C   1 4 . 3 . 0 ] 
PYTHONPATH= /usr/local/spark/python/lib/py4j-0.10.9.9-src.zip:/usr/local/spark/python:/app
TZ= None
Working dir: /home/jovyan
Mounted project dir listing:
 - .env
 - .git
 - .gitignore
 - LICENSE
 - README.md
 - docker-compose.yml
 - docker_portal
 - project
 - spark_test.ipynb


In [2]:
# Check pyspark availability and version
try:
    import pyspark
    from pyspark.sql import SparkSession
    print('pyspark version:', pyspark.__version__)
except Exception as e:
    print('pyspark import failed:', e)

pyspark version: 4.0.1


In [3]:
# Start a SparkSession and do a tiny DataFrame operation
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('test').getOrCreate()
df = spark.createDataFrame([(1,'a'),(2,'b'),(3,'c')], schema=['id','val'])
df.show()
print('Count =', df.count())
# write to mounted folder to verify host mount
out_dir = '/project/docker_portal/spark_test_output'
os.makedirs(out_dir, exist_ok=True)
df.coalesce(1).write.mode('overwrite').option('header', True).csv(out_dir)
print('Wrote CSV to', out_dir)

+---+---+
| id|val|
+---+---+
|  1|  a|
|  2|  b|
|  3|  c|
+---+---+

Count = 3
Wrote CSV to /project/docker_portal/spark_test_output


In [4]:
# Print Jupyter runtime info (where token/logs live)
# Notebook server writes runtime files under /home/jovyan/.local/share/jupyter/runtime or /home/jovyan/.jupyter
for base in ['/home/jovyan/.local/share/jupyter/runtime', '/home/jovyan/.jupyter']:
    if os.path.exists(base):
        print('Contents of', base, ':', os.listdir(base))

Contents of /home/jovyan/.local/share/jupyter/runtime : ['jpserver-7-open.html', 'jupyter_cookie_secret', 'jpserver-7.json', 'kernel-bdf80f13-57c3-4806-9ae1-e406b0810d22.json']
Contents of /home/jovyan/.jupyter : ['jupyter_server_config.py']
