# Solution: groupby with LAMBDA function

In [74]:
import pandas as pd

def department_highest_salary_sol(employee: pd.DataFrame, department: pd.DataFrame) -> pd.DataFrame:
    if employee.empty or department.empty:
        return pd.DataFrame(columns=['Department','Employee', 'Salary'])
    
    # Merge the employee and department DataFrames on 'departmentId' and 'id' columns
    merged_df = employee.merge(department, left_on='departmentId', right_on='id', suffixes=('_employee', '_department'))
    
    # Use groupby to group data by 'departmentId' and apply a lambda function to get employees with highest salary in each group
    highest_salary_df = merged_df.groupby('departmentId').apply(lambda x: x[x['salary'] == x['salary'].max()], include_groups=False)
    
    # Drop the duplicate 'departmentId' column and reset the index
    highest_salary_df = highest_salary_df.reset_index(drop=True)
    
    # Select the required columns and return the result
    result_df = highest_salary_df[['name_department', 'name_employee', 'salary']]
    
    # Rename the columns as specified
    result_df.columns = ['Department','Employee', 'Salary']
    
    return result_df


## My solution

In [75]:
import pandas as pd

def department_highest_salary(employee: pd.DataFrame, department: pd.DataFrame) -> pd.DataFrame:
    emp = employee.merge(department, left_on='departmentId', right_on='id', suffixes=('_emp','_dep'))
    emp.rename(columns={'name_dep':'Department','name_emp':'Employee','salary':'Salary'},inplace=True)
    employees = emp[['Department','Employee','Salary']]

    maxSalary = emp[['Department','Salary']].groupby('Department').max()

    comb = employees.merge(maxSalary, left_on='Department', right_on='Department', suffixes=('_emp','_max'))
    
    return employees[comb['Salary_emp']==comb['Salary_max']]

In [76]:
employee = pd.DataFrame({   'id':[1,2,3,4,5],
                            'name':['Joe','Jim','Henry','Sam','Max'],
                            'salary':[70000,90000,80000,60000,90000],
                            'departmentId':[1,1,2,2,1]})
department = pd.DataFrame({   'id':[1,2],
                            'name':['IT','Sales']})

In [77]:
department_highest_salary_sol(employee, department)

Unnamed: 0,Department,Employee,Salary
0,IT,Jim,90000
1,IT,Max,90000
2,Sales,Henry,80000
