In [1]:
from pyspark.sql import SparkSession


In [2]:
spark = (
    SparkSession.builder
    .appName("Sorting_Query_Result")
    .config("spark.jars", "postgresql-42.2.20.jar")
    .getOrCreate()
)
emp_data = (
    spark.read.format("jdbc")
    .option("url", "jdbc:postgresql://localhost:5432/sqla")
    .option("dbtable", "emp")
    .option("user", "postgres")
    .option("password", "admin123")
    .option("driver", "org.postgresql.Driver").load()
)

In [3]:
emp_data.createOrReplaceTempView("emp")

In [6]:
# You want to display the names, job, and salaries of employees in department 10 
# in order based on their salary (from lowest to highest)
spark.sql(
    """
    select ename, job, sal 
    from emp
    
    order by sal asc
    """
).show()

+--------+---------+-------+
|   ename|      job|    sal|
+--------+---------+-------+
|Jonathan|   Editor|   null|
|   SMITH|    CLERK| 880.00|
|   JAMES|    CLERK| 950.00|
|   ADAMS|    CLERK|1210.00|
|    WARD| SALESMAN|1250.00|
|  MARTIN| SALESMAN|1250.00|
|  MILLER|    CLERK|1300.00|
|  TURNER| SALESMAN|1500.00|
|   ALLEN| SALESMAN|1600.00|
|   CLARK|  MANAGER|2450.00|
|   BLAKE|  MANAGER|2850.00|
|   JONES|  MANAGER|3272.50|
|   SCOTT|  ANALYST|3300.00|
|    FORD|  ANALYST|3300.00|
|    KING|PRESIDENT|5000.00|
+--------+---------+-------+



In [8]:
# Sorting by Multiple Fields
spark.sql(
    """
    select empno, deptno, sal, ename, job
    from emp
    order by deptno asc, sal desc
    """
).show()

+-----+------+-------+--------+---------+
|empno|deptno|    sal|   ename|      job|
+-----+------+-------+--------+---------+
|    1|  null|   null|Jonathan|   Editor|
| 7839|    10|5000.00|    KING|PRESIDENT|
| 7782|    10|2450.00|   CLARK|  MANAGER|
| 7934|    10|1300.00|  MILLER|    CLERK|
| 7788|    20|3300.00|   SCOTT|  ANALYST|
| 7902|    20|3300.00|    FORD|  ANALYST|
| 7566|    20|3272.50|   JONES|  MANAGER|
| 7876|    20|1210.00|   ADAMS|    CLERK|
| 7369|    20| 880.00|   SMITH|    CLERK|
| 7698|    30|2850.00|   BLAKE|  MANAGER|
| 7499|    30|1600.00|   ALLEN| SALESMAN|
| 7844|    30|1500.00|  TURNER| SALESMAN|
| 7521|    30|1250.00|    WARD| SALESMAN|
| 7654|    30|1250.00|  MARTIN| SALESMAN|
| 7900|    30| 950.00|   JAMES|    CLERK|
+-----+------+-------+--------+---------+



In [16]:
v_views = spark.sql(
    """
    select 
    replace(translate(data, '0123456789', '#'),'#','') as ename,
    replace(data, replace(translate(data, '0123456789', '#'),'#',''), '') as number
    from(
        select ename || ' ' || deptno as data 
        from emp
    ) x 
    """
).show()


+-------+------+
|  ename|number|
+-------+------+
| ALLEN |    30|
|  WARD |    30|
|MARTIN |    30|
| BLAKE |    30|
| CLARK |    10|
|  KING |    10|
|TURNER |    30|
| JAMES |    30|
|MILLER |    10|
|   null|  null|
| SMITH |    20|
| JONES |    20|
| SCOTT |    20|
| ADAMS |    20|
|  FORD |    20|
+-------+------+



In [25]:
# You want to sort results from EMP by COMM, but the field is nullable. 
# You need a way to specify whether nulls sort last:
spark.sql(
    """

    select ename, sal, coalesce(comm,0)
    from 
    (
        select ename, sal, comm, 
        case
            when comm is null then 0
            when comm is not null then 1
            end as checker
        from emp
        order by checker desc, comm asc
        
    ) x
    """
).show()

+--------+-------+---------------------------------------------------------------+
|   ename|    sal|coalesce(CAST(comm AS DECIMAL(12,2)), CAST(0 AS DECIMAL(12,2)))|
+--------+-------+---------------------------------------------------------------+
|  TURNER|1500.00|                                                           0.00|
|   ALLEN|1600.00|                                                         300.00|
|    WARD|1250.00|                                                         500.00|
|  MARTIN|1250.00|                                                        1400.00|
|   BLAKE|2850.00|                                                           0.00|
|   JAMES| 950.00|                                                           0.00|
|   CLARK|2450.00|                                                           0.00|
|  MILLER|1300.00|                                                           0.00|
|Jonathan|   null|                                                           0.00|
|   

In [30]:
# Problem
# You want to sort based on some conditional logic. For example: if JOB is “SALESMAN” 
# you want to sort on COMM; otherwise, you want to sort by SAL. You want to return the following result set:
spark.sql(
    """
    select ename, sal, job, comm, 	     case when job = 'SALESMAN' then comm else sal end as ordered
    from 
    (
        select ename, sal, job, comm,
        case
            when comm is null then 0 
            when comm is not null then 1
            end as checker
        from emp
        order by case when job == 'SALESMAN' then checker asc
    ) x
    """
).show()

ParseException: 
missing ')' at 'order'(line 11, pos 8)

== SQL ==

    select ename, sal, job, comm
    from 
    (
        select ename, sal, job, comm,
        case
            when comm is null then 0 
            when comm is not null then 1
            end as checker
        from emp
        order by case when job == 'SALESMAN' then checker asc
--------^^^
    ) x
    
