In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder\
    .master("local")\
        .appName("test")\
            .getOrCreate()
            
data = [
    ('James', '', 'Smith', '1991-04-01', 'M', 3000),
    ('Michael', 'Rose', '', '2000-05-19', 'M', 4000),
    ('Robert', '', 'Williams', '1978-09-05', 'M', 4000),
    ('Maria', 'Anne', 'Jones', '1967-12-01', 'F', 4000),
    ('Jen', 'Mary', 'Brown', '1980-02-17', 'F', -1)
]


columns = ["First Name", "Middle Name", "Last Name", "Dob", "Gender", "Salary"]

df = spark.createDataFrame(data = data, schema = columns)

print(df.printSchema())

root
 |-- First Name: string (nullable = true)
 |-- Middle Name: string (nullable = true)
 |-- Last Name: string (nullable = true)
 |-- Dob: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- Salary: long (nullable = true)

None


In [2]:
import os
import sys
import subprocess
from pyspark.sql import SparkSession

def check_python():
    print("Python Executable:", sys.executable)
    print("Python Version:", sys.version)

def check_java():
    try:
        # Run 'java -version' command and capture output
        result = subprocess.run(["java", "-version"], capture_output=True, text=True)
        # Java version info is sent to stderr
        if result.stderr:
            print("Java Version Info:")
            print(result.stderr.strip())
        else:
            print("Java version info not found.")
    except FileNotFoundError:
        print("Java is not installed or 'java' command is not in PATH.")

def check_spark():
    try:
        spark = SparkSession.builder.appName("DependencyCheck").getOrCreate()
        print("PySpark Version:", spark.sparkContext.pythonVer)
        print("Spark Version:", spark.version)
        spark.stop()
    except Exception as e:
        print("SparkSession could not be initialized:", str(e))

if __name__ == "__main__":
    print("===== Checking Python =====")
    check_python()
    print("\n===== Checking Java =====")
    check_java()
    print("\n===== Checking Spark and PySpark =====")
    check_spark()


===== Checking Python =====
Python Executable: C:\Users\ASUS\anaconda3\python.exe
Python Version: 3.11.7 | packaged by Anaconda, Inc. | (main, Dec 15 2023, 18:05:47) [MSC v.1916 64 bit (AMD64)]

===== Checking Java =====
Java Version Info:
java version "11.0.27" 2025-04-15 LTS
Java(TM) SE Runtime Environment 18.9 (build 11.0.27+8-LTS-232)
Java HotSpot(TM) 64-Bit Server VM 18.9 (build 11.0.27+8-LTS-232, mixed mode)

===== Checking Spark and PySpark =====
PySpark Version: 3.11
Spark Version: 3.5.6


In [3]:
import os

print("JAVA_HOME:", os.environ.get("JAVA_HOME"))
print("SPARK_HOME:", os.environ.get("SPARK_HOME"))
print("PATH:", os.environ.get("PATH"))

JAVA_HOME: C:\Program Files\Java\jdk-11
SPARK_HOME: C:\spark\spark-3.5.6-bin-hadoop3
PATH: C:\Users\ASUS\anaconda3;C:\Users\ASUS\anaconda3\Library\mingw-w64\bin;C:\Users\ASUS\anaconda3\Library\usr\bin;C:\Users\ASUS\anaconda3\Library\bin;C:\Users\ASUS\anaconda3\Scripts;C:\Program Files\Common Files\Oracle\Java\javapath;C:\Program Files\Microsoft MPI\Bin\;C:\Program Files\NVIDIA\CUDNN\v9.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libnvvp;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Windows\System32\OpenSSH\;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR;C:\Program Files\dotnet\;C:\Program Files\Git\cmd;C:\WINDOWS\system32;C:\WINDOWS;C:\WINDOWS\System32\Wbem;C:\WINDOWS\System32\WindowsPowerShell\v1.0\;C:\WINDOWS\System32\OpenSSH\;C:\Program Files\Docker\Docker\resources\bin;C:\Program Files\NVI

In [4]:
!pip show pyspark

Name: pyspark
Version: 3.5.1
Summary: Apache Spark Python API
Home-page: https://github.com/apache/spark/tree/master/python
Author: Spark Developers
Author-email: dev@spark.apache.org
License: http://www.apache.org/licenses/LICENSE-2.0
Location: C:\Users\ASUS\anaconda3\Lib\site-packages
Requires: py4j
Required-by: 
