In [1]:
%run helper/setup_notebook.ipynb import display_table

Successfully connected to leetcode50 database.


In [2]:
tables = ['Employee', 'Department']
for table in tables:
    display_table(table)

+----+-------+--------+--------------+
| id |  name | salary | departmentId |
+----+-------+--------+--------------+
| 1  |  Joe  | 85000  |      1       |
| 2  | Henry | 80000  |      2       |
| 3  |  Sam  | 60000  |      2       |
| 4  |  Max  | 90000  |      1       |
| 5  | Janet | 69000  |      1       |
| 6  | Randy | 85000  |      1       |
| 7  |  Will | 70000  |      1       |
+----+-------+--------+--------------+
+----+-------+
| id |  name |
+----+-------+
| 1  |   IT  |
| 2  | Sales |
+----+-------+


#### *A company's executives are interested in seeing who earns the most money in each of the company's departments. A high earner in a department is an employee who has a salary in the top three unique salaries for that department.*

### Write an SQL query to find the employees who are high earners in each of the departments.

```
+------------+----------+--------+
| Department | Employee | Salary |
+------------+----------+--------+
| IT         | Max      | 90000  |
| IT         | Joe      | 85000  |
| IT         | Randy    | 85000  |
| IT         | Will     | 70000  |
| Sales      | Henry    | 80000  |
| Sales      | Sam      | 60000  |
+------------+----------+--------+
Explanation: 
In the IT department:
- Max earns the highest unique salary
- Both Randy and Joe earn the second-highest unique salary
- Will earns the third-highest unique salary

In the Sales department:
- Henry earns the highest salary
- Sam earns the second-highest salary
- There is no third-highest salary as there are only two employees
```

In [3]:
%%sql 

SELECT 
    d.name AS Department,
    e.name AS Employee,
    e.salary AS Salary
FROM Employee e
JOIN Department d ON e.departmentId = d.id 

Department,Employee,Salary
IT,Joe,85000
Sales,Henry,80000
Sales,Sam,60000
IT,Max,90000
IT,Janet,69000
IT,Randy,85000
IT,Will,70000


In [4]:
%%sql 

SELECT 
    salary 
FROM Employee e
JOIN Department d ON e.departmentId = d.id 
WHERE d.name = 'IT'

salary
85000
90000
69000
85000
70000


In [5]:
%%sql 

SELECT 
    DISTINCT salary 
FROM Employee e
JOIN Department d ON e.departmentId = d.id 
WHERE d.name = 'IT'
ORDER BY salary DESC
LIMIT 3

salary
90000
85000
70000


In [6]:
%%sql 

SELECT 
    DISTINCT salary 
FROM Employee e
JOIN Department d ON e.departmentId = d.id 
WHERE d.name = 'Sales'
ORDER BY salary DESC
LIMIT 3

salary
80000
60000


In [7]:
%%sql 

SELECT 
    d.name AS Department, 
    e.name AS Employee, 
    e.salary AS Salary
FROM Employee e
JOIN Department d ON e.departmentId = d.id 
WHERE d.name = 'IT'
    AND e.salary >= (
        SELECT 
            DISTINCT salary
        FROM Employee e1
        JOIN Department d1 ON e1.departmentId = d1.id 
        WHERE d1.name = 'IT'
        ORDER BY salary DESC
        LIMIT 1 OFFSET 2
    )
ORDER BY e.salary DESC;


Department,Employee,Salary
IT,Max,90000
IT,Joe,85000
IT,Randy,85000
IT,Will,70000


In [8]:
%%sql 

SELECT 
    d.name AS Department,
    e.name AS Employee,
    e.salary AS Salary
FROM Employee e
JOIN Department d ON e.departmentId = d.id 
WHERE d.name = 'Sales' AND e.salary >= (
    SELECT 
        DISTINCT salary 
    FROM Employee e
    JOIN Department d ON e.departmentId = d.id 
    WHERE d.name = 'Sales'
    ORDER BY salary DESC
    LIMIT 1 OFFSET 1
)
ORDER BY e.salary DESC

Department,Employee,Salary
Sales,Henry,80000
Sales,Sam,60000


In [9]:
%%sql

-- Solution failing test case
SELECT 
    d.name AS Department,
    e.name AS Employee,
    e.salary AS Salary
FROM Employee e
JOIN Department d ON e.departmentId = d.id 
WHERE (d.name = 'IT' AND e.salary >= (
    SELECT 
        DISTINCT salary 
    FROM Employee e1
    JOIN Department d1 ON e1.departmentId = d1.id 
    WHERE d1.name = 'IT'
    ORDER BY salary DESC
    LIMIT 1 OFFSET 2
))
OR (d.name = 'Sales' AND e.salary >= (
    SELECT 
        DISTINCT salary 
    FROM Employee e2
    JOIN Department d2 ON e2.departmentId = d2.id 
    WHERE d2.name = 'Sales'
    ORDER BY salary DESC
    LIMIT 1 OFFSET 1
))
ORDER BY e.salary DESC;


Department,Employee,Salary
IT,Max,90000
IT,Joe,85000
IT,Randy,85000
Sales,Henry,80000
IT,Will,70000
Sales,Sam,60000


## Solution using CTE

In [10]:
%%sql 

SELECT
d.name AS Department,
e.name AS Employee,
e.Salary AS Salary,
DENSE_RANK() OVER(PARTITION BY e.departmentId ORDER BY e.Salary DESC) AS salary_rank
FROM Employee e
LEFT JOIN Department d ON e.departmentId = d.id

Department,Employee,Salary,salary_rank
IT,Max,90000,1
IT,Joe,85000,2
IT,Randy,85000,2
IT,Will,70000,3
IT,Janet,69000,4
Sales,Henry,80000,1
Sales,Sam,60000,2


In [11]:
%%sql 

WITH salary_rank_cte AS (
    SELECT 
        d.name AS Department,
        e.name AS Employee,
        e.salary AS Salary,
        DENSE_RANK() OVER (PARTITION BY e.departmentId ORDER BY e.Salary DESC) AS salary_rank
    FROM Employee e 
    JOIN Department d ON e.departmentId = d.id 
) 
SELECT 
    Department,
    Employee,
    Salary 
FROM salary_rank_cte
WHERE salary_rank <= 3;

Department,Employee,Salary
IT,Max,90000
IT,Joe,85000
IT,Randy,85000
IT,Will,70000
Sales,Henry,80000
Sales,Sam,60000


In [12]:
%%sql 
-- Without using CTE 
SELECT 
    Department,
    Employee,
    Salary
FROM (
    SELECT
        d.name AS Department,
        e.name AS Employee,
        e.Salary AS Salary,
        DENSE_RANK() OVER (PARTITION BY e.departmentId ORDER BY e.Salary DESC) AS salary_rank
    FROM Employee e
    LEFT JOIN Department d ON e.departmentId = d.id) AS inner_table
WHERE salary_rank <= 3

Department,Employee,Salary
IT,Max,90000
IT,Joe,85000
IT,Randy,85000
IT,Will,70000
Sales,Henry,80000
Sales,Sam,60000


# Using Pandas

In [None]:
employee_query = %sql SELECT * FROM Employee # type: ignore
department_query = %sql SELECT * FROM Department #type: ignore 
employee_df = employee_query.DataFrame()
department_df = department_query.DataFrame()

display(employee_df, department_df)