In [1]:
%run helper/setup_notebook.ipynb import display_table

Successfully connected to leetcode50 database.


In [2]:
display_table('Employees')

+-------------+---------+------------+-----+
| employee_id |   name  | reports_to | age |
+-------------+---------+------------+-----+
|      2      | Winston |    None    |  37 |
|      4      |   Bob   |     9      |  36 |
|      6      |  Alice  |     9      |  41 |
|      9      |  Hercy  |    None    |  43 |
+-------------+---------+------------+-----+


#### *For this problem, we will consider a manager an employee who has at least 1 other employee reporting to them.*
### Write an SQL query to report the ids and the names of all managers, the number of employees who report directly to them, and the average age of the reports rounded to the nearest integer. 

#### *Return the result table ordered by `employee_id`.*

```
+-------------+-------+---------------+-------------+
| employee_id | name  | reports_count | average_age |
+-------------+-------+---------------+-------------+
| 9           | Hercy | 2             | 39          |
+-------------+-------+---------------+-------------+
Explanation: Hercy has 2 people report directly to him, Alice and Bob. 
Their average age is (41+36)/2 = 38.5, which is 39 after rounding it to the nearest integer.
```

In [3]:
%%sql 

SELECT 
    COUNT(reports_to) AS reports_count,
    AVG(age) AS average_age
FROM Employees
WHERE reports_to IS NOT NULL
GROUP BY reports_to;

reports_count,average_age
2,38.5


In [4]:
%%sql 

SELECT 
    COUNT(reports_to) AS reports_count,
    ROUND(AVG(age)) AS average_age
FROM Employees
WHERE reports_to IS NOT NULL
GROUP BY reports_to;

reports_count,average_age
2,39


In [5]:
%%sql

SELECT 
    employee_id, 
    name,
    inner_table.reports_count,
    inner_table.average_age
FROM Employees
JOIN 
    (SELECT 
        reports_to,
        COUNT(reports_to) AS reports_count, 
        ROUND(AVG(age)) AS average_age
    FROM Employees 
    WHERE reports_to IS NOT NULL
    GROUP BY reports_to ) AS inner_table ON employee_id = inner_table.reports_to
ORDER BY employee_id;


employee_id,name,reports_count,average_age
9,Hercy,2,39


# Using Pandas

In [6]:
import pandas as pd 
import numpy as np

In [7]:
employees_query = %sql SELECT * FROM Employees # type: ignore 
employees_df = employees_query.DataFrame()

display(employees_df)

Unnamed: 0,employee_id,name,reports_to,age
0,2,Winston,,37
1,4,Bob,9.0,36
2,6,Alice,9.0,41
3,9,Hercy,,43


In [8]:
filtered_df = employees_df.query("~reports_to.isna()")
filtered_df

Unnamed: 0,employee_id,name,reports_to,age
1,4,Bob,9.0,36
2,6,Alice,9.0,41


In [9]:
filtered_df.agg(average_age=('age', 'mean'))

Unnamed: 0,age
average_age,38.5


In [10]:
filtered_df.agg(average_age=('age', 'mean')).round()

Unnamed: 0,age
average_age,38.0


#### In pandas, there is no specific `ceil` function directly available for DataFrames. However, you can use the `apply` function along with the `math.ceil` function to achieve the rounding up behavior on a DataFrame column. Here's an example:

```py
import math

# Sample DataFrame
df = pd.DataFrame({'col1': [1.2, 2.7, 3.5, 4.9, 5.1]})

# Round up to the next integer using apply and math.ceil
df['rounded_up'] = df['col1'].apply(lambda x: math.ceil(x))
```

Output:
```
   col1  rounded_up
0   1.2           2
1   2.7           3
2   3.5           4
3   4.9           5
4   5.1           6
```

In [11]:
# Need to use numpy ceil() as there is no ceil method in pandas
average_age = filtered_df.agg(average_age=('age', 'mean'))
np.ceil(average_age)
average_age

Unnamed: 0,age
average_age,38.5


In [12]:
# Calculate the reports count and average age per manager
filtered_df.groupby('reports_to').agg(
    reports_count=('reports_to', 'size'),
    average_age=('age', 'mean')
)

Unnamed: 0_level_0,reports_count,average_age
reports_to,Unnamed: 1_level_1,Unnamed: 2_level_1
9.0,2,38.5


In [13]:
inner_table_df = filtered_df.groupby('reports_to').agg(
    reports_count=('reports_to', 'size'),
    average_age=('age', 'mean')
)
inner_table_df

Unnamed: 0_level_0,reports_count,average_age
reports_to,Unnamed: 1_level_1,Unnamed: 2_level_1
9.0,2,38.5


In [14]:
inner_table_df['average_age'] = np.ceil(inner_table_df['average_age'])
inner_table_df

Unnamed: 0_level_0,reports_count,average_age
reports_to,Unnamed: 1_level_1,Unnamed: 2_level_1
9.0,2,39.0


In [15]:
# Because employee_id is not present in the inner_table DF, 
# we need to specify explicitly specify the columns 
# in the left and right DataFrames to merge on.
employees_df.merge(inner_table_df, 
                   left_on='employee_id', 
                   right_on='reports_to', 
                   how='inner')

Unnamed: 0,employee_id,name,reports_to,age,reports_count,average_age
0,9,Hercy,,43,2,39.0


In [16]:
merged_df = employees_df.merge(inner_table_df, 
                            left_on='employee_id', 
                            right_on='reports_to', 
                            how='inner')
merged_df.sort_values('employee_id')

Unnamed: 0,employee_id,name,reports_to,age,reports_count,average_age
0,9,Hercy,,43,2,39.0


In [17]:
merged_df = merged_df.sort_values('employee_id')
merged_df[['employee_id', 'name', 'reports_count', 'average_age']]

Unnamed: 0,employee_id,name,reports_count,average_age
0,9,Hercy,2,39.0
