# SQL 50 in Pandas

In [1]:
from typing import List
from typing import Optional
import pandas as pd
import numpy as np

### 1757. Recyclable and Low Fat Products [E]

Write a solution to find the ids of products that are both low fat and recyclable.

Return the result table in any order.

In [13]:
def find_products(products: pd.DataFrame) -> pd.DataFrame:
    return products[ (products['low_fats']=='Y') & (products['recyclable']=='Y') ][['product_id']]

In [2]:
data = [['0', 'Y', 'N'], ['1', 'Y', 'Y'], ['2', 'N', 'Y'], ['3', 'Y', 'Y'], ['4', 'N', 'N']]
products = pd.DataFrame(data, columns=['product_id', 'low_fats', 'recyclable']).astype({'product_id':'int64', 'low_fats':'category', 'recyclable':'category'})

In [14]:
find_products(products)

Unnamed: 0,product_id
1,1
3,3


### 2877. Create a DataFrame from List [E]

Write a solution to create a DataFrame from a 2D list called student_data. This 2D list contains the IDs and ages of some students.

The DataFrame should have two columns, student_id and age, and be in the same order as the original 2D list.

In [16]:
input_list = [
  [1, 15],
  [2, 11],
  [3, 11],
  [4, 20]
]

In [23]:
def createDataframe(student_data: List[List[int]]) -> pd.DataFrame:
    return pd.DataFrame(student_data, columns=['student_id','age'])

In [24]:
createDataframe(input_list)

Unnamed: 0,student_id,age
0,1,15
1,2,11
2,3,11
3,4,20


### 176. Second Highest Salary [M]

Write a solution to find the second highest distinct salary from the Employee table. If there is no second highest salary, return null (return None in Pandas).

The result format is in the following example.

In [64]:
def second_highest_salary(employee: pd.DataFrame) -> pd.DataFrame:
    df = employee['salary'].drop_duplicates().sort_values(ascending=False).reset_index(drop=True)
    if len(df) >= 2:
        return pd.DataFrame([df[1]], columns=['SecondHighestSalary'])
    else:
        return pd.DataFrame([None], columns=['SecondHighestSalary'])

In [25]:
data = [[1, 100], [2, 200], [3, 300]]
employee = pd.DataFrame(data, columns=['id', 'salary']).astype({'id':'int64', 'salary':'int64'})

In [67]:
second_highest_salary(employee)

Unnamed: 0,SecondHighestSalary
0,200


### 584. Find Customer Referee

Find the names of the customer that are not referred by the customer with id = 2.

Return the result table in any order.

In [105]:
def find_customer_referee(customer: pd.DataFrame) -> pd.DataFrame:
    return customer[ (customer['referee_id']!=2) | (customer['referee_id'].isnull() ) ][['name']] # .drop_duplicates()

In [80]:
data = [[1, 'Will', None], [2, 'Jane', None], [3, 'Alex', 2], [4, 'Bill', None], [5, 'Zack', 1], [6, 'Mark', 2]]
customer = pd.DataFrame(data, columns=['id', 'name', 'referee_id']).astype({'id':'Int64', 'name':'object', 'referee_id':'Int64'})

In [106]:
find_customer_referee(customer)

Unnamed: 0,name
0,Will
1,Jane
3,Bill
4,Zack
6,Tse


### 570. Managers with at Least 5 Direct Reports [M]

Write a solution to find managers with at least five direct reports.

Return the result table in any order.

In [129]:
def find_managers(employee: pd.DataFrame) -> pd.DataFrame:
    df5 = employee.groupby('managerId', as_index=False).agg({'id':'count'}).query('id >= 5').rename(columns={'id':'count'})
    if df5.shape[0] > 0:
        return pd.merge(left=employee, right=df5, how='inner', left_on='id', right_on='managerId')[['name']]
    else:
        return pd.DataFrame(None, columns=['name'])

In [107]:
data = [[101, 'John', 'A', None], [102, 'Dan', 'A', 101], [103, 'James', 'A', 101], [104, 'Amy', 'A', 101], [105, 'Anne', 'A', 101], [106, 'Ron', 'B', 101]]
employee = pd.DataFrame(data, columns=['id', 'name', 'department', 'managerId']).astype({'id':'Int64', 'name':'object', 'department':'object', 'managerId':'Int64'})

In [130]:
find_managers(employee)

Unnamed: 0,name


### 185. Department Top Three Salaries [H]

A company's executives are interested in seeing who earns the most money in each of the company's departments. A high earner in a department is an employee who has a salary in the top three unique salaries for that department.

Write a solution to find the employees who are high earners in each of the departments.

Return the result table in any order.

The result format is in the following example.

In [198]:
# Beats 69.03%

def top_three_salaries(employee: pd.DataFrame, department: pd.DataFrame) -> pd.DataFrame:
    department2 = department.rename(columns={'id':'departmentId','name':'Department'})
    employee2 = employee.rename(columns={'name':'Employee','salary':'Salary'})
    df = pd.merge(employee2, department2, how='left', on='departmentId') # id Employee salary departnemtnId Department

    if df.shape[0] < 1:
        return pd.DataFrame(None, columns=['Department','Employee','Salary'])

    count = 0
    for dept in department2['departmentId'].unique():
        df_loop = employee2[ employee2['departmentId']==dept ].sort_values('Salary', ascending=False)[['Salary','departmentId']].drop_duplicates().head(3)

        if count == 0:
            df_top3 = df_loop.copy()
        else:
            df_top3 = pd.concat([df_top3, df_loop], axis=0)
        count += 1

    df2 = pd.merge(df, df_top3, how='inner', on=['Salary', 'departmentId'])[['Department','Employee','Salary']]
    return df2

In [199]:
data = [[1, 'Joe', 85000, 1], [2, 'Henry', 80000, 2], [3, 'Sam', 60000, 2], [4, 'Max', 90000, 1], [5, 'Janet', 69000, 1], [6, 'Randy', 85000, 1], [7, 'Will', 70000, 1]]
employee = pd.DataFrame(data, columns=['id', 'name', 'salary', 'departmentId']).astype({'id':'Int64', 'name':'object', 'salary':'Int64', 'departmentId':'Int64'})
data = [[1, 'IT'], [2, 'Sales']]
department = pd.DataFrame(data, columns=['id', 'name']).astype({'id':'Int64', 'name':'object'})

In [200]:
top_three_salaries(employee, department)

Unnamed: 0,Department,Employee,Salary
0,IT,Joe,85000
1,Sales,Henry,80000
2,Sales,Sam,60000
3,IT,Max,90000
4,IT,Randy,85000
5,IT,Will,70000


### 197. Rising Temperature [E]

Write a solution to find all dates' id with higher temperatures compared to its previous dates (yesterday).

Return the result table in any order.


In [248]:
def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
    df = weather.sort_values('recordDate', ascending=True).reset_index(drop=True)
    df['day_diff'] = df['recordDate'].diff()
    df['temp_diff'] = df['temperature'].diff()
    return df[ (df['day_diff']== pd.Timedelta(days=1)) & (df['temp_diff']>0) ][['id']]

In [267]:
def rising_temperature(weather: pd.DataFrame) -> pd.DataFrame:
    weather['recordDate'] = pd.to_datetime(weather['recordDate'])
    weather2 = weather.copy()
    weather2['recordDate'] = weather2['recordDate'] + pd.Timedelta(days=1) # temp yesterday
    df = pd.merge(weather, weather2, how='left', on='recordDate') # x is today, y is yesterday
    return df[ df['temperature_x'] - df['temperature_y'] > 0 ][['id_x']].rename(columns={'id_x':'id'})


In [272]:
data = [[1, '2015-01-01', 10], [2, '2015-01-02', 25], [3, '2015-01-03', 20], [4, '2015-01-04', 30]]
# data = [[1, '2015-01-01', 10], [2, '2015-01-02', 25]]
weather = pd.DataFrame(data, columns=['id', 'recordDate', 'temperature']).astype({'id':'Int64', 'recordDate':'datetime64[ns]', 'temperature':'Int64'})

In [271]:
rising_temperature(weather)

Unnamed: 0,id
1,2
