In [1]:
%run helper/setup_notebook.ipynb import display_table

Successfully connected to leetcode50 database.


In [2]:
display_table('Accounts')

+------------+--------+
| account_id | income |
+------------+--------+
|     3      | 108939 |
|     2      | 12747  |
|     8      | 87709  |
|     6      | 91796  |
+------------+--------+


### Calculate the number of bank accounts of each salary category. The salary categories are:

- "Low Salary": All the salaries strictly less than $20000.
- "Average Salary": All the salaries in the inclusive range [$20000, $50000].
- "High Salary": All the salaries strictly greater than $50000.
#### *The result table must contain all three categories. If there are no accounts in a category, then report 0.*

```
+----------------+----------------+
| category       | accounts_count |
+----------------+----------------+
| Low Salary     | 1              |
| Average Salary | 0              |
| High Salary    | 3              |
+----------------+----------------+
Explanation: 
Low Salary: Account 2.
Average Salary: No accounts.
High Salary: Accounts 3, 6, and 8.
```

In [3]:
%%sql 

SELECT 
    (CASE WHEN income < 20000 THEN 'Low Salary'
    WHEN income >= 20000 AND INCOME < 50000 THEN 'Average Salary'
    ELSE 'High Salary' END) AS category
FROM Accounts

category
High Salary
Low Salary
High Salary
High Salary


In [4]:
%%sql 

SELECT
    category,
    COUNT(category) AS accounts_count
FROM (
    SELECT 
        (CASE WHEN income < 20000 THEN 'Low Salary'
        WHEN income >= 20000 AND income < 50000 THEN 'Average Salary'
        ELSE 'High Salary' END) AS category
    FROM Accounts
) AS count_table
GROUP BY category;


category,accounts_count
High Salary,3
Low Salary,1


In [5]:
%%sql 

SELECT 'Low Salary' AS category
UNION 
SELECT 'Average Salary' AS category
UNION
SELECT 'High Salary' AS category

category
Low Salary
Average Salary
High Salary


In [6]:
%%sql 

SELECT 
    category 
FROM (
    SELECT 'Low Salary' AS category
    UNION 
    SELECT 'Average Salary' AS category
    UNION
    SELECT 'High Salary' AS category
) AS definitions_table;

category
Low Salary
Average Salary
High Salary


In [7]:
%%sql

SELECT
    category_table.category,
    count_table.accounts_count
FROM (
    SELECT 
        category 
    FROM (
        SELECT 'Low Salary' AS category
        UNION 
        SELECT 'Average Salary' AS category
        UNION
        SELECT 'High Salary' AS category
    ) AS t1
) AS category_table
LEFT JOIN (
    SELECT
    category,
    COUNT(category) AS accounts_count
    FROM (
        SELECT 
            (CASE WHEN income < 20000 THEN 'Low Salary'
            WHEN income >= 20000 AND income < 50000 THEN 'Average Salary'
            ELSE 'High Salary' END) AS category
        FROM Accounts
    ) AS t2
    GROUP BY category
) AS count_table
ON category_table.category = count_table.category

category,accounts_count
Low Salary,1.0
Average Salary,
High Salary,3.0


In [8]:
%%sql

SELECT
    category_table.category,
    IFNULL(count_table.accounts_count, 0) AS accounts_count
FROM (
    SELECT 
        category 
    FROM (
        SELECT 'Low Salary' AS category
        UNION 
        SELECT 'Average Salary' AS category
        UNION
        SELECT 'High Salary' AS category
    ) AS t1
) AS category_table
LEFT JOIN (
    SELECT
    category,
    COUNT(category) AS accounts_count
    FROM (
        SELECT 
            (CASE WHEN income < 20000 THEN 'Low Salary'
            WHEN income >= 20000 AND income < 50000 THEN 'Average Salary'
            ELSE 'High Salary' END) AS category
        FROM Accounts
    ) AS t2
    GROUP BY category
) AS count_table
ON category_table.category = count_table.category

category,accounts_count
Low Salary,1
Average Salary,0
High Salary,3


# Using CTE

In [9]:
%%sql 

WITH category_cte AS(
    SELECT 
        category
    FROM (
        SELECT 'Low Salary' AS category
        UNION 
        SELECT 'Average Salary' AS category 
        UNION 
        SELECT 'High Salary' AS category 
    ) t1
), 
count_cte AS(
    SELECT 
        category,
        COUNT(category) AS accounts_count
    FROM (
        SELECT 
            (CASE WHEN income < 20000 THEN 'Low Salary'
            WHEN income >= 20000 AND income < 50000 THEN 'Average Salary'
            ELSE 'High Salary' END) AS category
        FROM Accounts
    ) t2 
    GROUP BY t2.category
)
SELECT 
    category_cte.category,
    count_cte.accounts_count
FROM category_cte
LEFT JOIN count_cte ON category_cte.category = count_cte.category;

category,accounts_count
Low Salary,1.0
Average Salary,
High Salary,3.0


In [10]:
%%sql 

WITH category_cte AS(
    SELECT 
        category
    FROM (
        SELECT 'Low Salary' AS category
        UNION 
        SELECT 'Average Salary' AS category 
        UNION 
        SELECT 'High Salary' AS category 
    ) t1
), 
count_cte AS(
    SELECT 
        category,
        COUNT(category) AS accounts_count
    FROM (
        SELECT 
            (CASE WHEN income < 20000 THEN 'Low Salary'
            WHEN income >= 20000 AND income < 50000 THEN 'Average Salary'
            ELSE 'High Salary' END) AS category
        FROM Accounts
    ) t2 
    GROUP BY t2.category
)
SELECT 
    category_cte.category,
    IFNULL(count_cte.accounts_count, 0) AS accounts_count
FROM category_cte
LEFT JOIN count_cte ON category_cte.category = count_cte.category;

category,accounts_count
Low Salary,1
Average Salary,0
High Salary,3


# Using Pandas

In [11]:
import pandas as pd 
import numpy as np

In [12]:
accounts_query = %sql SELECT * FROM Accounts # type: ignore
accounts_df = accounts_query.DataFrame()

display(accounts_df)

Unnamed: 0,account_id,income
0,3,108939
1,2,12747
2,8,87709
3,6,91796


In [13]:
def create_category(df):
    salary = df['income']
    return 'Low Salary' if salary < 20_000 \
                        else 'Average Salary' if 20_000 < salary < 50_000 \
                        else 'High Salary'

accounts_df['category'] = accounts_df.apply(create_category, axis=1)
accounts_df

Unnamed: 0,account_id,income,category
0,3,108939,High Salary
1,2,12747,Low Salary
2,8,87709,High Salary
3,6,91796,High Salary


In [14]:
accounts_df.groupby('category').agg(accounts_count=('category', 'count'))

Unnamed: 0_level_0,accounts_count
category,Unnamed: 1_level_1
High Salary,3
Low Salary,1


In [15]:
category_df = pd.DataFrame({'category': ['Low Salary', 'Average Salary', 'High Salary']})
category_df

Unnamed: 0,category
0,Low Salary
1,Average Salary
2,High Salary


In [16]:
merged_df = category_df.merge(accounts_df, on='category', how='left')
merged_df

Unnamed: 0,category,account_id,income
0,Low Salary,2.0,12747.0
1,Average Salary,,
2,High Salary,3.0,108939.0
3,High Salary,8.0,87709.0
4,High Salary,6.0,91796.0


In [17]:
merged_df.fillna(0, inplace=True)

In [18]:
def find_count(df):
    return 0 if df['income'] == 0 else 1

merged_df['accounts_count'] = merged_df.apply(find_count, axis=1)
merged_df

Unnamed: 0,category,account_id,income,accounts_count
0,Low Salary,2.0,12747.0,1
1,Average Salary,0.0,0.0,0
2,High Salary,3.0,108939.0,1
3,High Salary,8.0,87709.0,1
4,High Salary,6.0,91796.0,1


In [19]:
merged_df['accounts_count2'] = merged_df['income'].apply(lambda x: 0 if x == 0 else 1)
merged_df

Unnamed: 0,category,account_id,income,accounts_count,accounts_count2
0,Low Salary,2.0,12747.0,1,1
1,Average Salary,0.0,0.0,0,0
2,High Salary,3.0,108939.0,1,1
3,High Salary,8.0,87709.0,1,1
4,High Salary,6.0,91796.0,1,1


In [20]:
merged_df.groupby('category').agg(accounts_count=('accounts_count', 'sum'))

Unnamed: 0_level_0,accounts_count
category,Unnamed: 1_level_1
Average Salary,0
High Salary,3
Low Salary,1
