### Nth Highest Salary

In [1]:
import pandas as pd
import pandasql as ps

In [2]:
data = [[1,100],[2,200],[3,300]]
Employee = pd.DataFrame(data, columns=('id','salary'))
print(Employee)

   id  salary
0   1     100
1   2     200
2   3     300


In [3]:
class SalaryAnalyzer:
    def __init__(self, df):
        self.df = df
    
    @staticmethod
    def ordinal(n):
        if 10 <= n % 100 <= 20:
            suffix = 'th'
        else:
            suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(n % 10, 'th')
        return f"{n}{suffix}"
    
    def nth_highest_salary(self, n):
        column_name = f"{self.ordinal(n)}_highest_salary"
        if n > len(self.df):
            return pd.DataFrame({column_name : [None]})
        else:
            nth_salary = self.df['salary'].nlargest(n).iloc[-1]
            return pd.DataFrame({column_name : [nth_salary]})
        
analyzer = SalaryAnalyzer(Employee)        
print(analyzer.nth_highest_salary(2))

   2nd_highest_salary
0                 200


In [4]:
query = '''
select salary as nth_highest_salary
from(
    select distinct salary
    from Employee
    union
    select null
    order by salary desc
    limit 2
) as subquery
order by nth_highest_salary asc
limit 1;
'''

print(ps.sqldf(query, locals()))

   nth_highest_salary
0                 200


### Second Highest Salary

In [35]:
data = [[1, 100], [2, 200], [3, 300]]
Employee = pd.DataFrame(data, columns=['id', 'salary']).astype({'id':'int64', 'salary':'int64'})

In [31]:
def second_highest_salary(df, n):
    if n > len(df):
        return pd.DataFrame({second_highest_salary.__name__ : [None]})
    else:
        nth_salary = df['salary'].nlargest(n).iloc[-1]
        return pd.DataFrame({second_highest_salary.__name__ : [nth_salary]})

In [32]:
second_highest_salary(Employee,2)

Unnamed: 0,second_highest_salary
0,200


In [38]:
query = '''
select salary as second_highest_salary
from(
    select distinct salary
    from Employee
    union
    select null
    order by salary desc
    limit 2
) as subquery
order by second_highest_salary asc
limit 1;
'''
print(ps.sqldf(query, locals()))

   second_highest_salary
0                    200


### Department Highest Salary

In [46]:
data = [[1, 'Joe', 70000, 1], [2, 'Jim', 90000, 1], [3, 'Henry', 80000, 2], [4, 'Sam', 60000, 2], [5, 'Max', 90000, 1]]
Employee = pd.DataFrame(data, columns=['id', 'name', 'salary', 'departmentId']).astype({'id':'Int64', 'name':'object', 'salary':'Int64', 'departmentId':'Int64'})
data = [[1, 'IT'], [2, 'Sales']]
Department = pd.DataFrame(data, columns=['id', 'name']).astype({'id':'Int64', 'name':'object'})

print(Employee)
print(Department)

   id   name  salary  departmentId
0   1    Joe   70000             1
1   2    Jim   90000             1
2   3  Henry   80000             2
3   4    Sam   60000             2
4   5    Max   90000             1
   id   name
0   1     IT
1   2  Sales


In [83]:
merge_df = pd.merge(Employee, Department, left_on = 'departmentId', right_on = 'id', suffixes=('','_department'))
max_salary = merge_df.groupby('departmentId')['salary'].max().reset_index()
result = pd.merge(merge_df, max_salary, on = ['departmentId', 'salary'])
print(result[['name_department','name','salary']])

  name_department   name  salary
0              IT    Jim   90000
1              IT    Max   90000
2           Sales  Henry   80000


In [87]:
query = '''
select D.name as Department, E.name, salary
from Department D
inner join Employee E
on D.id = E.departmentId
where (E.departmentId, E.salary) in (
    select 
    departmentID, max(salary) as salary
    from Employee
    group by departmentID
)
'''

ps.sqldf(query, locals())

Unnamed: 0,Department,name,salary
0,IT,Jim,90000
1,Sales,Henry,80000
2,IT,Max,90000
