In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("SparkSQLExample") \
    .getOrCreate()

In [10]:
from pyspark.sql import Row

# Sample employee data
data = [
    Row(EmpID=101, Name="Ravi", Department="Sales", Salary=50000),
    Row(EmpID=102, Name="Sneha", Department="Engineering", Salary=80000),
    Row(EmpID=103, Name="Kabir", Department="HR", Salary=45000),
    Row(EmpID=104, Name="Anita", Department="Engineering", Salary=85000),
    Row(EmpID=105, Name="Amit", Department="Sales", Salary=55000),
]

df = spark.createDataFrame(data)
df.show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [11]:
df.createOrReplaceTempView("employees")

In [12]:
spark.sql("SELECT * FROM employees").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+



In [13]:
spark.sql("SELECT Name, Salary FROM employees WHERE Salary > 60000").show()

+-----+------+
| Name|Salary|
+-----+------+
|Sneha| 80000|
|Anita| 85000|
+-----+------+



In [14]:
spark.sql("SELECT Department, AVG(Salary) AS AvgSalary FROM employees GROUP BY Department").show()

+-----------+---------+
| Department|AvgSalary|
+-----------+---------+
|      Sales|  52500.0|
|Engineering|  82500.0|
|         HR|  45000.0|
+-----------+---------+



In [15]:
spark.sql("select * from employees order by salary desc").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  104|Anita|Engineering| 85000|
|  102|Sneha|Engineering| 80000|
|  105| Amit|      Sales| 55000|
|  101| Ravi|      Sales| 50000|
|  103|Kabir|         HR| 45000|
+-----+-----+-----------+------+



# Global temp view

In [16]:
df.createOrReplaceGlobalTempView("employees_global")

In [17]:
spark.sql("SELECT * FROM global_temp.employees_global where salary>60000").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  102|Sneha|Engineering| 80000|
|  104|Anita|Engineering| 85000|
+-----+-----+-----------+------+



# New Spark Session

In [18]:
new_spark = SparkSession.builder.appName("NewSparkSession").getOrCreate()
new_spark.sql("SELECT * FROM global_temp.employees_global").show()

+-----+-----+-----------+------+
|EmpID| Name| Department|Salary|
+-----+-----+-----------+------+
|  101| Ravi|      Sales| 50000|
|  102|Sneha|Engineering| 80000|
|  103|Kabir|         HR| 45000|
|  104|Anita|Engineering| 85000|
|  105| Amit|      Sales| 55000|
+-----+-----+-----------+------+

