# Задачи с сайта leetcode.com


In [1]:
import sqlite3

import pandas as pd

con = sqlite3.connect("sqlite.db")
cur = con.cursor()

In [2]:
def select(sql):
    return pd.read_sql(sql, con)


def crud(sql):
    cur.executescript(sql)
    con.commit()


def execute_formatted_sqlite(sql_schema):
    sql = ";\n".join(sql_schema.split("\n"))
    crud(sql)


### 175. Combine Two Tables (Easy)

Write a solution to report the first name, last name, city, and state of each person in the Person table. If the address of a personId is not present in the Address table, report null instead.

Return the result table in any order.


In [3]:
# SQLite3 не знает что такое TRUNCATE TABLE, поэтому нужно из запроса удалить эти команды.
# Добавить PRIMARY KEY к полям Id и OR REPLACE или IGNORE к INSERT, чтобы не дублировались строки
# при перезапуске. Или просто удалять таблици, если уже создана (DROP TABLE IF EXISTS table).

sql_schema = """
Drop table If Exists Person
Create table If Not Exists Person (personId int, firstName varchar(255), lastName varchar(255))
insert into Person (personId, lastName, firstName) values ('1', 'Wang', 'Allen')
insert into Person (personId, lastName, firstName) values ('2', 'Alice', 'Bob')
Drop table If Exists Address
Create table If Not Exists Address (addressId int, personId int, city varchar(255), state varchar(255))
insert into Address (addressId, personId, city, state) values ('1', '2', 'New York City', 'New York')
insert into Address (addressId, personId, city, state) values ('2', '3', 'Leetcode', 'California')
"""

# А так же после кажной строки поставить ';'
sql = ";\n".join(sql_schema.split("\n"))
print(sql)

# Так как несколько команд, нужен executescript(), а не execute()
cur.executescript(sql)

# В SQLite3 в Python нет автокомита (в CLI есть), поэтому:
con.commit()

;
Drop table If Exists Person;
Create table If Not Exists Person (personId int, firstName varchar(255), lastName varchar(255));
insert into Person (personId, lastName, firstName) values ('1', 'Wang', 'Allen');
insert into Person (personId, lastName, firstName) values ('2', 'Alice', 'Bob');
Drop table If Exists Address;
Create table If Not Exists Address (addressId int, personId int, city varchar(255), state varchar(255));
insert into Address (addressId, personId, city, state) values ('1', '2', 'New York City', 'New York');
insert into Address (addressId, personId, city, state) values ('2', '3', 'Leetcode', 'California');



In [4]:
sql = """--sql
SELECT
    p.firstName,
    p.lastName,
    a.city,
    a.state
FROM
    Person p
    LEFT JOIN Address a ON p.personId = a.personId;
"""
select(sql)

Unnamed: 0,firstName,lastName,city,state
0,Allen,Wang,,
1,Bob,Alice,New York City,New York


### 176. Second Highest Salary (Med.)

Write a solution to find the second highest distinct salary from the Employee table. If there is no second highest salary, return null (return None in Pandas).


In [5]:
sql_schema = """
Drop table If Exists Employee
Create table If Not Exists Employee (id int, salary int)
insert into Employee (id, salary) values ('1', '100')
insert into Employee (id, salary) values ('2', '200')
insert into Employee (id, salary) values ('3', '300')
"""
execute_formatted_sqlite(sql_schema)

In [6]:
# Если в таблице всего одна зарплата, OFFSET 1 не находит строк — запрос не возвращает вообще ничего.
# Вложив запрос в SELECT (...) — он вернёт строку, даже если внутри ничего не найдено.

sql = """--sql
SELECT
    (
        SELECT DISTINCT
            e.salary
        FROM
            Employee e
        ORDER BY
            e.salary DESC
        LIMIT
            1
        OFFSET
            1
    ) AS SecondHighestSalary;
"""
select(sql)

Unnamed: 0,SecondHighestSalary
0,200


### 177. Nth Highest Salary (Med.)

Write a solution to find the nth highest distinct salary from the Employee table. If there are less than n distinct salaries, return null.


In [7]:
sql_schema = """
Drop table If Exists Employee
Create table If Not Exists Employee (Id int, Salary int)
insert into Employee (id, salary) values ('1', '100')
insert into Employee (id, salary) values ('2', '200')
insert into Employee (id, salary) values ('3', '300')
"""
execute_formatted_sqlite(sql_schema)

In [8]:
# Для PostgreSQL:

# CREATE
# OR REPLACE FUNCTION NthHighestSalary (n INT) RETURNS INT AS $$ -- начало тела функции
# SELECT
#     CASE
#         WHEN n < 1 THEN NULL
#         ELSE (
#             SELECT
#                 Salary
#             FROM
#                 (
#                     SELECT DISTINCT
#                         e.Salary
#                     FROM
#                         Employee e
#                     ORDER BY
#                         e.Salary DESC
#                     LIMIT
#                         1 OFFSET n -1
#                 ) AS result
#         )
#     END;

# $$ LANGUAGE SQL; -- конец тела функции с указанием используемого диалекта (чистый SQL)

# SELECT
#     NthHighestSalary (2);


# Имитация функции getNthHighestSalary для SQLite
def getNthHighestSalary(n):
    if n >= 1:
        sql = f"""--sql
        SELECT
            (
                SELECT DISTINCT
                    e.Salary
                FROM
                    Employee e
                ORDER BY
                    e.Salary desc
                LIMIT
                    1
                OFFSET
                    {n - 1}
            ) AS "getNthHighestSalary({n})";
        """
    else:
        sql = f"""--sql
        SELECT
            NULL AS "getNthHighestSalary({n})";
        """
    return select(sql)


getNthHighestSalary(-1)

Unnamed: 0,getNthHighestSalary(-1)
0,


### 178. Rank Scores (Med.)

Write a solution to find the rank of the scores. The ranking should be calculated according to the following rules:

The scores should be ranked from the highest to the lowest.
If there is a tie between two scores, both should have the same ranking.
After a tie, the next ranking number should be the next consecutive integer value. In other words, there should be no holes between ranks.
Return the result table ordered by score in descending order.


In [9]:
sql_schema = """
Drop table If Exists Scores
Create table If Not Exists Scores (id int, score DECIMAL(3,2))
insert into Scores (id, score) values ('1', '3.5')
insert into Scores (id, score) values ('2', '3.65')
insert into Scores (id, score) values ('3', '4.0')
insert into Scores (id, score) values ('4', '3.85')
insert into Scores (id, score) values ('5', '4.0')
insert into Scores (id, score) values ('6', '3.65')
"""
execute_formatted_sqlite(sql_schema)

In [10]:
sql = """--sql
SELECT
    s.score,
    DENSE_RANK() OVER (
        ORDER BY
            s.score DESC
    ) AS RANK
FROM
    Scores s;
"""
select(sql)

Unnamed: 0,score,RANK
0,4.0,1
1,4.0,1
2,3.85,2
3,3.65,3
4,3.65,3
5,3.5,4


### 180. Consecutive Numbers (Med.)

Find all numbers that appear at least three times consecutively.


In [11]:
sql_schema = """
Drop table If Exists Logs
Create table If Not Exists Logs (id int, num int)
insert into Logs (id, num) values ('1', '1')
insert into Logs (id, num) values ('2', '1')
insert into Logs (id, num) values ('3', '1')
insert into Logs (id, num) values ('4', '2')
insert into Logs (id, num) values ('5', '1')
insert into Logs (id, num) values ('6', '2')
insert into Logs (id, num) values ('7', '2')
"""
execute_formatted_sqlite(sql_schema)

In [12]:
sql = """--sql
SELECT DISTINCT
    t.num AS ConsecutiveNums
FROM
    (
        SELECT
            l.num,
            LAG(l.num, 1) OVER (
                ORDER BY
                    id
            ) AS prev_num,
            LAG(l.num, 2) OVER (
                ORDER BY
                    id
            ) AS sec_prev_num
        FROM
            Logs l
    ) t
WHERE
    t.num = t.prev_num
    AND t.num = t.sec_prev_num;
"""
select(sql)

Unnamed: 0,ConsecutiveNums
0,1


### 181. Employees Earning More Than Their Managers (Easy)

Write a solution to find the employees who earn more than their managers.


In [13]:
sql_schema = """
Drop table If Exists Employee
Create table If Not Exists Employee (id int, name varchar(255), salary int, managerId int)
insert into Employee (id, name, salary, managerId) values ('1', 'Joe', '70000', '3')
insert into Employee (id, name, salary, managerId) values ('2', 'Henry', '80000', '4')
insert into Employee (id, name, salary, managerId) values ('3', 'Sam', '60000', NULL)
insert into Employee (id, name, salary, managerId) values ('4', 'Max', '90000', NULL)
"""
execute_formatted_sqlite(sql_schema)

In [14]:
sql = """--sql
SELECT
    e.name AS Employee
FROM
    Employee e
WHERE
    e.salary > (
        SELECT
            m.salary
        FROM
            Employee m
        WHERE
            m.id = e.managerId
    );
"""
select(sql)

Unnamed: 0,Employee
0,Joe


### 182. Duplicate Emails (Easy)

Write a solution to report all the duplicate emails. Note that it's guaranteed that the email field is not NULL.


In [15]:
sql_schema = """
Drop table If Exists Person
Create table If Not Exists Person (id int, email varchar(255))
insert into Person (id, email) values ('1', 'a@b.com')
insert into Person (id, email) values ('2', 'c@d.com')
insert into Person (id, email) values ('3', 'a@b.com')
"""
execute_formatted_sqlite(sql_schema)

In [16]:
sql = """--sql
SELECT
    e.email AS Email
FROM
    (
        SELECT
            p.email,
            COUNT(p.email) dupl
        FROM
            Person p
        GROUP BY
            p.email
    ) e
WHERE
    e.dupl > 1;
"""
select(sql)

Unnamed: 0,Email
0,a@b.com


### 183. Customers Who Never Order (Easy)

Write a solution to find all customers who never order anything.


In [17]:
sql_schema = """
Drop table If Exists Customers
Create table If Not Exists Customers (id int, name varchar(255))
insert into Customers (id, name) values ('1', 'Joe')
insert into Customers (id, name) values ('2', 'Henry')
insert into Customers (id, name) values ('3', 'Sam')
insert into Customers (id, name) values ('4', 'Max')
Drop table If Exists Orders
Create table If Not Exists Orders (id int, customerId int)
insert into Orders (id, customerId) values ('1', '3')
insert into Orders (id, customerId) values ('2', '1')
"""
execute_formatted_sqlite(sql_schema)

In [18]:
sql = """--sql
SELECT
    c.name AS Customers
FROM
    Customers c
WHERE
    c.id NOT IN (
        SELECT
            o.customerId
        FROM
            Orders o
    );
"""
select(sql)

Unnamed: 0,Customers
0,Henry
1,Max


### 184. Department Highest Salary (Med.)

Write a solution to find employees who have the highest salary in each of the departments.


In [19]:
sql_schema = """
Drop table If Exists Employee
Create table If Not Exists Employee (id int, name varchar(255), salary int, departmentId int)
insert into Employee (id, name, salary, departmentId) values ('1', 'Joe', '70000', '1')
insert into Employee (id, name, salary, departmentId) values ('2', 'Jim', '90000', '1')
insert into Employee (id, name, salary, departmentId) values ('3', 'Henry', '80000', '2')
insert into Employee (id, name, salary, departmentId) values ('4', 'Sam', '60000', '2')
insert into Employee (id, name, salary, departmentId) values ('5', 'Max', '90000', '1')
Drop table If Exists Department
Create table If Not Exists Department (id int, name varchar(255))
insert into Department (id, name) values ('1', 'IT')
insert into Department (id, name) values ('2', 'Sales')
"""
execute_formatted_sqlite(sql_schema)

In [20]:
sql = """--sql
SELECT
    d.name AS Department,
    e.name AS Employee,
    e.salary AS Salary
FROM
    (
        SELECT
            e.*,
            DENSE_RANK() OVER (
                PARTITION BY
                    e.departmentId
                ORDER BY
                    e.salary DESC
            ) AS RANK
        FROM
            Employee e
    ) e
    LEFT JOIN Department d ON e.departmentId = d.id
WHERE
    e.rank = 1
ORDER BY
    d.name,
    e.name;
"""
select(sql)

Unnamed: 0,Department,Employee,Salary
0,IT,Jim,90000
1,IT,Max,90000
2,Sales,Henry,80000


### 185. Department Top Three Salaries (Hard)

A company's executives are interested in seeing who earns the most money in each of the company's departments. A high earner in a department is an employee who has a salary in the top three unique salaries for that department.

Write a solution to find the employees who are high earners in each of the departments.


In [21]:
sql_schema = """
Drop table If Exists Employee
Create table If Not Exists Employee (id int, name varchar(255), salary int, departmentId int)
insert into Employee (id, name, salary, departmentId) values ('1', 'Joe', '85000', '1')
insert into Employee (id, name, salary, departmentId) values ('2', 'Henry', '80000', '2')
insert into Employee (id, name, salary, departmentId) values ('3', 'Sam', '60000', '2')
insert into Employee (id, name, salary, departmentId) values ('4', 'Max', '90000', '1')
insert into Employee (id, name, salary, departmentId) values ('5', 'Janet', '69000', '1')
insert into Employee (id, name, salary, departmentId) values ('6', 'Randy', '85000', '1')
insert into Employee (id, name, salary, departmentId) values ('7', 'Will', '70000', '1')
Drop table If Exists Department
Create table If Not Exists Department (id int, name varchar(255))
insert into Department (id, name) values ('1', 'IT')
insert into Department (id, name) values ('2', 'Sales')
"""
execute_formatted_sqlite(sql_schema)

In [22]:
sql = """--sql
SELECT
    d.name AS Department,
    e.name AS Employee,
    e.salary AS Salary
FROM
    (
        SELECT
            e.*,
            DENSE_RANK() OVER (
                PARTITION BY
                    e.departmentId
                ORDER BY
                    e.salary DESC
            ) AS RANK
        FROM
            Employee e
    ) e
    LEFT JOIN Department d ON e.departmentId = d.id
WHERE
    e.rank <= 3
ORDER BY
    d.name,
    e.salary DESC;
"""
select(sql)

Unnamed: 0,Department,Employee,Salary
0,IT,Max,90000
1,IT,Joe,85000
2,IT,Randy,85000
3,IT,Will,70000
4,Sales,Henry,80000
5,Sales,Sam,60000


### 196. Delete Duplicate Emails (Easy)

Write a solution to delete all duplicate emails, keeping only one unique email with the smallest id.

For SQL users, please note that you are supposed to write a DELETE statement and not a SELECT one.

For Pandas users, please note that you are supposed to modify Person in place.

After running your script, the answer shown is the Person table. The driver will first compile and run your piece of code and then show the Person table. The final order of the Person does not matter.


In [23]:
sql_schema = """
Drop table If Exists Person
Create table If Not Exists Person (Id int, Email varchar(255))
insert into Person (id, email) values ('1', 'john@example.com')
insert into Person (id, email) values ('2', 'bob@example.com')
insert into Person (id, email) values ('3', 'john@example.com')
"""
execute_formatted_sqlite(sql_schema)

In [24]:
sql = """--sql
DELETE FROM Person
WHERE
    id NOT IN (
        SELECT
            MIN(p.id)
        FROM
            Person p
        GROUP BY
            p.email
    );
"""
crud(sql)

### 197. Rising Temperature (Easy)

Write a solution to find all dates' id with higher temperatures compared to its previous dates (yesterday).


In [25]:
sql_schema = """
Drop table If Exists Weather
Create table If Not Exists Weather (id int, recordDate date, temperature int)
insert into Weather (id, recordDate, temperature) values ('1', '2015-01-01', '10')
insert into Weather (id, recordDate, temperature) values ('2', '2015-01-02', '25')
insert into Weather (id, recordDate, temperature) values ('3', '2015-01-03', '20')
insert into Weather (id, recordDate, temperature) values ('4', '2015-01-04', '30')
"""
execute_formatted_sqlite(sql_schema)

In [26]:
# Для PostgreSL `ON w1.recordDate = w2.recordDate + INTERVAL '1 day'`
# Для MySQL `ON w1.recordDate = DATE_ADD(w2.recordDate, INTERVAL 1 DAY)'`
sql = """--sql
SELECT
    w1.id
FROM
    Weather w1
    INNER JOIN Weather w2 ON w1.recordDate = DATE (w2.recordDate, '+1 day')
WHERE
    w1.temperature > w2.temperature
ORDER BY
    w1.id;
"""
select(sql)

Unnamed: 0,id
0,2
1,4


### 262. Trips and Users (Hard)

The cancellation rate is computed by dividing the number of canceled (by client or driver) requests with unbanned users by the total number of requests with unbanned users on that day.

Write a solution to find the cancellation rate of requests with unbanned users (both client and driver must not be banned) each day between "2013-10-01" and "2013-10-03" with at least one trip. Round Cancellation Rate to two decimal points.


In [27]:
# ENUM - перечислимый тип фиксированным списком СТРОКОВЫХ значений хранящихся внутри схемы БД.
# SQLite не поддерживает ENUM, но можно сделать ограничение через CHECK.
# ENUM('completed', 'cancelled_by_driver', 'cancelled_by_client') заменить на TEXT вместе с CHECK:
# TEXT CHECK(status IN ('completed', 'cancelled_by_driver', 'cancelled_by_client'))
sql_schema = """
Drop table If Exists Trips
Create table If Not Exists Trips (id int, client_id int, driver_id int, city_id int, status text CHECK(status In ('completed', 'cancelled_by_driver', 'cancelled_by_client')), request_at varchar(50))
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('1', '1', '10', '1', 'completed', '2013-10-01')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('2', '2', '11', '1', 'cancelled_by_driver', '2013-10-01')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('3', '3', '12', '6', 'completed', '2013-10-01')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('4', '4', '13', '6', 'cancelled_by_client', '2013-10-01')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('5', '1', '10', '1', 'completed', '2013-10-02')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('6', '2', '11', '6', 'completed', '2013-10-02')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('7', '3', '12', '6', 'completed', '2013-10-02')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('8', '2', '12', '12', 'completed', '2013-10-03')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('9', '3', '10', '12', 'completed', '2013-10-03')
insert into Trips (id, client_id, driver_id, city_id, status, request_at) values ('10', '4', '13', '12', 'cancelled_by_driver', '2013-10-03')
Drop table If Exists Users
Create table If Not Exists Users (users_id int, banned varchar(50), role text CHECK(role In ('client', 'driver', 'partner')))
insert into Users (users_id, banned, role) values ('1', 'No', 'client')
insert into Users (users_id, banned, role) values ('2', 'Yes', 'client')
insert into Users (users_id, banned, role) values ('3', 'No', 'client')
insert into Users (users_id, banned, role) values ('4', 'No', 'client')
insert into Users (users_id, banned, role) values ('10', 'No', 'driver')
insert into Users (users_id, banned, role) values ('11', 'No', 'driver')
insert into Users (users_id, banned, role) values ('12', 'No', 'driver')
insert into Users (users_id, banned, role) values ('13', 'No', 'driver')
"""
execute_formatted_sqlite(sql_schema)

In [28]:
# в SQLite 0 / 2 = 0, а 1 / 2 = 0, если оба аргумента — INTEGER. Поэтому явно бодавляем * 1.0.
# P.S. Одинарные кавычки (') нельзя использовать для именования столбцов — они интерпретируются как
# строковые литералы. Для совместимости с другими СУБД использовать двойные (").
# В PostgreSQL нельзя напрямую применять SUM(...) к BOOLEAN - нужно использовать CASE WHEN. А также
# полноценный подзапрос вместо NOT IN bloked_user.
sql = """--sql
WITH
    blocked_user AS (
        SELECT
            u.users_id
        FROM
            Users u
        WHERE
            u.banned = 'Yes'
    )
SELECT
    t.request_at AS DAY,
    ROUND(SUM(t.status != 'completed') * 1.0 / COUNT(*), 2) AS "Cancellation Rate"
FROM
    Trips t
WHERE
    t.client_id NOT IN blocked_user
    AND t.driver_id NOT IN blocked_user
    AND t.request_at BETWEEN '2013-10-01' AND '2013-10-03'
GROUP BY
    t.request_at
ORDER BY
    DAY;
"""
select(sql)

Unnamed: 0,DAY,Cancellation Rate
0,2013-10-01,0.33
1,2013-10-02,0.0
2,2013-10-03,0.5


### 511. Game Play Analysis I (Easy)

Write a solution to find the first login date for each player.


In [29]:
sql_schema = """
Drop table If Exists Activity
Create table If Not Exists Activity (player_id int, device_id int, event_date date, games_played int)
insert into Activity (player_id, device_id, event_date, games_played) values ('1', '2', '2016-03-01', '5')
insert into Activity (player_id, device_id, event_date, games_played) values ('1', '2', '2016-05-02', '6')
insert into Activity (player_id, device_id, event_date, games_played) values ('2', '3', '2017-06-25', '1')
insert into Activity (player_id, device_id, event_date, games_played) values ('3', '1', '2016-03-02', '0')
insert into Activity (player_id, device_id, event_date, games_played) values ('3', '4', '2018-07-03', '5')
"""
execute_formatted_sqlite(sql_schema)

In [30]:
sql = """--sql
SELECT
    a.player_id,
    MIN(a.event_date) AS first_login
FROM
    Activity a
GROUP BY
    a.player_id;
"""
select(sql)

Unnamed: 0,player_id,first_login
0,1,2016-03-01
1,2,2017-06-25
2,3,2016-03-02


### 550. Game Play Analysis IV (Med.)

Write a solution to report the fraction of players that logged in again on the day after the day they first logged in, rounded to 2 decimal places. In other words, you need to determine the number of players who logged in on the day immediately following their initial login, and divide it by the number of total players.


In [31]:
sql_schema = """
Drop table If Exists Activity
Create table If Not Exists Activity (player_id int, device_id int, event_date date, games_played int)
insert into Activity (player_id, device_id, event_date, games_played) values ('1', '2', '2016-03-01', '5')
insert into Activity (player_id, device_id, event_date, games_played) values ('1', '2', '2016-03-02', '6')
insert into Activity (player_id, device_id, event_date, games_played) values ('2', '3', '2017-06-25', '1')
insert into Activity (player_id, device_id, event_date, games_played) values ('3', '1', '2016-03-02', '0')
insert into Activity (player_id, device_id, event_date, games_played) values ('3', '4', '2018-07-03', '5')
"""
execute_formatted_sqlite(sql_schema)

In [32]:
# В PostgreSQL: a.event_date - INTERVAL '1 day'
# Вместо CAST(COUNT(r.player_id) AS real) можно просто * 1.0
sql = """--sql
WITH
    first_login AS (
        SELECT
            a.player_id,
            MIN(a.event_date) AS first_login
        FROM
            Activity a
        GROUP BY
            a.player_id
    ),
    reentry_next_day AS (
        SELECT
            f.*,
            CASE
                WHEN a.event_date IS NOT NULL THEN 1
                ELSE 0
            END AS reentry_login
        FROM
            first_login f
            LEFT JOIN Activity a ON f.player_id = a.player_id
            AND f.first_login = DATE (a.event_date, '-1 day')
    )
SELECT
    ROUND(
        1.0 * SUM(r.reentry_login) / COUNT(r.player_id),
        2
    ) AS fraction
FROM
    reentry_next_day r;
"""
select(sql)

Unnamed: 0,fraction
0,0.33


### 570. Managers with at Least 5 Direct Reports (Med.)

Write a solution to find managers with at least five direct reports.


In [33]:
sql_schema = """
Drop table If Exists Employee
Create table If Not Exists Employee (id int, name varchar(255), department varchar(255), managerId int)
insert into Employee (id, name, department, managerId) values ('101', 'John', 'A', NULL)
insert into Employee (id, name, department, managerId) values ('102', 'Dan', 'A', '101')
insert into Employee (id, name, department, managerId) values ('103', 'James', 'A', '101')
insert into Employee (id, name, department, managerId) values ('104', 'Amy', 'A', '101')
insert into Employee (id, name, department, managerId) values ('105', 'Anne', 'A', '101')
insert into Employee (id, name, department, managerId) values ('106', 'Ron', 'B', '101')
"""
execute_formatted_sqlite(sql_schema)

In [34]:
# Вариант с JOIN работае быстрее чем с WHERE (WHERE e.id in (SELECT ...), т.к. SQL-оптимизатор ряда
# СУБД преобразует его в EXISTS или JOIN). COUNT(e.managerId) можно вынести из SELTCT в HAVING и тогда
# в SELECT оставить только e.managerId (сразу готовый список id-шников без дополнительного подзапроса).
sql = """--sql
SELECT
    e.name
FROM
    Employee e
    INNER JOIN (
        SELECT
            e.managerId
        FROM
            Employee e
        GROUP BY
            e.managerId
        HAVING
            COUNT(e.managerId) >= 5
    ) m ON e.id = m.managerId;
"""
select(sql)

Unnamed: 0,name
0,John


### 577. Employee Bonus (Easy)

Write a solution to report the name and bonus amount of each employee with a bonus less than 1000.


In [35]:
sql_schema = """
Drop table If Exists Employee
Create table If Not Exists Employee (empId int, name varchar(255), supervisor int, salary int)
insert into Employee (empId, name, supervisor, salary) values ('3', 'Brad', NULL, '4000')
insert into Employee (empId, name, supervisor, salary) values ('1', 'John', '3', '1000')
insert into Employee (empId, name, supervisor, salary) values ('2', 'Dan', '3', '2000')
insert into Employee (empId, name, supervisor, salary) values ('4', 'Thomas', '3', '4000')
Drop table If Exists Bonus
Create table If Not Exists Bonus (empId int, bonus int)
insert into Bonus (empId, bonus) values ('2', '500')
insert into Bonus (empId, bonus) values ('4', '2000')
"""
execute_formatted_sqlite(sql_schema)

In [36]:
# Подразумевается что если сотрудник без бонуса - его бонус равен 0. Поэтому эти сотрудники включены в результат.
# Т.е. сотрудники WHERE COALESCE(b.bonus, 0) < 1000.
sql = """--sql
SELECT
    e.name,
    b.bonus
FROM
    Employee e
    LEFT JOIN Bonus b ON e.empId = b.empId
WHERE
    b.bonus < 1000
    OR b.bonus IS NULL
ORDER BY
    b.bonus,
    e.name;
"""
select(sql)

Unnamed: 0,name,bonus
0,Brad,
1,John,
2,Dan,500.0


### 584. Find Customer Referee (Easy)

Find the names of the customer that are either:

1. referred by any customer with id != 2.
2. not referred by any customer.


In [37]:
sql_schema = """
Drop table If Exists Customer
Create table If Not Exists Customer (id int, name varchar(25), referee_id int)
insert into Customer (id, name, referee_id) values ('1', 'Will', NULL)
insert into Customer (id, name, referee_id) values ('2', 'Jane', NULL)
insert into Customer (id, name, referee_id) values ('3', 'Alex', '2')
insert into Customer (id, name, referee_id) values ('4', 'Bill', NULL)
insert into Customer (id, name, referee_id) values ('5', 'Zack', '1')
insert into Customer (id, name, referee_id) values ('6', 'Mark', '2')
"""
execute_formatted_sqlite(sql_schema)

In [38]:
sql = """--sql
SELECT
    c.name
FROM
    Customer c
WHERE
    c.referee_id != 2
    OR c.referee_id IS NULL
ORDER BY
    c.name;
"""
select(sql)

Unnamed: 0,name
0,Bill
1,Jane
2,Will
3,Zack


### 585. Investments in 2016 (Med.)

Write a solution to report the sum of all total investment values in 2016 tiv_2016, for all policyholders who:

- have the same tiv_2015 value as one or more other policyholders, and
- are not located in the same city as any other policyholder (i.e., the (lat, lon) attribute pairs must be unique).

Round tiv_2016 to two decimal places.


In [39]:
sql_schema = """
Drop table If Exists Insurance
Create Table If Not Exists Insurance (pid int, tiv_2015 float, tiv_2016 float, lat float, lon float)
insert into Insurance (pid, tiv_2015, tiv_2016, lat, lon) values ('1', '10', '5', '10', '10')
insert into Insurance (pid, tiv_2015, tiv_2016, lat, lon) values ('2', '20', '20', '20', '20')
insert into Insurance (pid, tiv_2015, tiv_2016, lat, lon) values ('3', '10', '30', '20', '20')
insert into Insurance (pid, tiv_2015, tiv_2016, lat, lon) values ('4', '10', '40', '40', '40')
"""
execute_formatted_sqlite(sql_schema)

In [40]:
# И опять же в PostgreSQL нужно было бы tiv_2016 привести к numeric, т.к. по умолчанию SUM() возращает
# результат в double precision, а ROUND() работает только с numeric: ROUND(SUM(i.tiv_2016)::numeric, 2).
# JOIN работает быстрее IN, поэтому лучше использовать его вместо WHERE с поиском по IN.
# INNER join - логический аналог WHERE ... IN.
sql = """--sql
SELECT
    ROUND(SUM(i.tiv_2016), 2) AS tiv_2016
FROM
    Insurance i
    INNER JOIN (
        SELECT
            tiv_2015
        FROM
            Insurance
        GROUP BY
            tiv_2015
        HAVING
            COUNT(*) > 1
    ) i1 ON i.tiv_2015 = i1.tiv_2015
    INNER JOIN (
        SELECT
            i2.lat,
            i2.lon
        FROM
            Insurance i2
        GROUP BY
            i2.lat,
            i2.lon
        HAVING
            COUNT(*) = 1
    ) i2 ON i.lat = i2.lat
    AND i.lon = i2.lon;
"""
select(sql)

Unnamed: 0,tiv_2016
0,45.0


### 586. Customer Placing the Largest Number of Orders (Easy)

Write a solution to find the customer_number for the customer who has placed the largest number of orders.

The test cases are generated so that exactly one customer will have placed more orders than any other customer.


In [41]:
sql_schema = """
Drop table If Exists orders
Create table If Not Exists orders (order_number int, customer_number int)
insert into orders (order_number, customer_number) values ('1', '1')
insert into orders (order_number, customer_number) values ('2', '2')
insert into orders (order_number, customer_number) values ('3', '3')
insert into orders (order_number, customer_number) values ('4', '3')
"""
execute_formatted_sqlite(sql_schema)

In [42]:
sql = """--sql
SELECT
    o.customer_number
FROM
    orders o
GROUP BY
    o.customer_number
ORDER BY
    COUNT(o.order_number) DESC
LIMIT
    1;
"""
select(sql)

Unnamed: 0,customer_number
0,3


### 595. Big Countries (Easy)

A country is big if:

- it has an area of at least three million (i.e., 3000000 km2), or
- it has a population of at least twenty-five million (i.e., 25000000).

Write a solution to find the name, population, and area of the big countries.


In [43]:
sql_schema = """
Drop table If Exists World
Create table If Not Exists World (name varchar(255), continent varchar(255), area int, population int, gdp bigint)
insert into World (name, continent, area, population, gdp) values ('Afghanistan', 'Asia', '652230', '25500100', '20343000000')
insert into World (name, continent, area, population, gdp) values ('Albania', 'Europe', '28748', '2831741', '12960000000')
insert into World (name, continent, area, population, gdp) values ('Algeria', 'Africa', '2381741', '37100000', '188681000000')
insert into World (name, continent, area, population, gdp) values ('Andorra', 'Europe', '468', '78115', '3712000000')
insert into World (name, continent, area, population, gdp) values ('Angola', 'Africa', '1246700', '20609294', '100990000000')
"""
execute_formatted_sqlite(sql_schema)

In [44]:
sql = """--sql
SELECT
    w.name,
    w.population,
    w.area
FROM
    World w
WHERE
    w.area >= 3000000
    OR w.population >= 25000000
ORDER BY
    w.name;
"""
select(sql)

Unnamed: 0,name,population,area
0,Afghanistan,25500100,652230
1,Algeria,37100000,2381741


### 596. Classes With at Least 5 Students (Easy)

Write a solution to find all the classes that have at least five students.


In [45]:
sql_schema = """
Drop table If Exists Courses
Create table If Not Exists Courses (student varchar(255), class varchar(255))
insert into Courses (student, class) values ('A', 'Math')
insert into Courses (student, class) values ('B', 'English')
insert into Courses (student, class) values ('C', 'Math')
insert into Courses (student, class) values ('D', 'Biology')
insert into Courses (student, class) values ('E', 'Math')
insert into Courses (student, class) values ('F', 'Computer')
insert into Courses (student, class) values ('G', 'Math')
insert into Courses (student, class) values ('H', 'Math')
insert into Courses (student, class) values ('I', 'Math')
"""
execute_formatted_sqlite(sql_schema)

In [46]:
sql = """--sql
SELECT
    c.class
FROM
    Courses c
GROUP BY
    c.class
HAVING
    COUNT(*) >= 5
ORDER BY
    c.class;
"""
select(sql)

Unnamed: 0,class
0,Math


### 601. Human Traffic of Stadium (Hard)

Write a solution to display the records with three or more rows with consecutive id's, and the number of people is greater than or equal to 100 for each.

Return the result table ordered by visit_date in ascending order.


In [47]:
sql_schema = """
Drop table If Exists Stadium
Create table If Not Exists Stadium (id int, visit_date DATE NULL, people int)
insert into Stadium (id, visit_date, people) values ('1', '2017-01-01', '10')
insert into Stadium (id, visit_date, people) values ('2', '2017-01-02', '109')
insert into Stadium (id, visit_date, people) values ('3', '2017-01-03', '150')
insert into Stadium (id, visit_date, people) values ('4', '2017-01-04', '99')
insert into Stadium (id, visit_date, people) values ('5', '2017-01-05', '145')
insert into Stadium (id, visit_date, people) values ('6', '2017-01-06', '1455')
insert into Stadium (id, visit_date, people) values ('7', '2017-01-07', '199')
insert into Stadium (id, visit_date, people) values ('8', '2017-01-09', '188')
"""
execute_formatted_sqlite(sql_schema)

In [48]:
sql = """--sql
WITH
    stadium_rank AS (
        SELECT
            s.*,
            s.id - ROW_NUMBER() OVER (
                ORDER BY
                    s.id
            ) AS rnk
        FROM
            Stadium s
        WHERE
            s.people >= 100
    )
SELECT
    sr.id,
    sr.visit_date,
    sr.people
FROM
    stadium_rank sr
WHERE
    sr.rnk IN (
        SELECT
            rnk
        FROM
            stadium_rank
        GROUP BY
            rnk
        HAVING
            COUNT(*) >= 3
    )
ORDER BY
    sr.visit_date;
"""
select(sql)

Unnamed: 0,id,visit_date,people
0,5,2017-01-05,145
1,6,2017-01-06,1455
2,7,2017-01-07,199
3,8,2017-01-09,188


### 602. Friend Requests II: Who Has the Most Friends (Med.)

Write a solution to find the people who have the most friends and the most friends number.

The test cases are generated so that only one person has the most friends.


In [49]:
sql_schema = """
Drop table If Exists RequestAccepted
Create table If Not Exists RequestAccepted (requester_id int not null, accepter_id int null, accept_date date null)
insert into RequestAccepted (requester_id, accepter_id, accept_date) values ('1', '2', '2016/06/03')
insert into RequestAccepted (requester_id, accepter_id, accept_date) values ('1', '3', '2016/06/08')
insert into RequestAccepted (requester_id, accepter_id, accept_date) values ('2', '3', '2016/06/08')
insert into RequestAccepted (requester_id, accepter_id, accept_date) values ('3', '4', '2016/06/09')
"""
execute_formatted_sqlite(sql_schema)

In [50]:
sql = """--sql
SELECT
    u.id,
    COUNT(*) AS num
FROM
    (
        SELECT
            r.requester_id AS id
        FROM
            RequestAccepted r
        UNION ALL
        SELECT
            r.accepter_id AS id
        FROM
            RequestAccepted r
    ) u
GROUP BY
    u.id
ORDER BY
    num DESC
LIMIT
    1;
"""
select(sql)

Unnamed: 0,id,num
0,3,3


### 607. Sales Person (Easy)

Write a solution to find the names of all the salespersons who did not have any orders related to the company with the name "RED".


In [51]:
sql_schema = """
Drop table If Exists SalesPerson
Create table If Not Exists SalesPerson (sales_id int, name varchar(255), salary int, commission_rate int, hire_date date)
insert into SalesPerson (sales_id, name, salary, commission_rate, hire_date) values ('1', 'John', '100000', '6', '4/1/2006')
insert into SalesPerson (sales_id, name, salary, commission_rate, hire_date) values ('2', 'Amy', '12000', '5', '5/1/2010')
insert into SalesPerson (sales_id, name, salary, commission_rate, hire_date) values ('3', 'Mark', '65000', '12', '12/25/2008')
insert into SalesPerson (sales_id, name, salary, commission_rate, hire_date) values ('4', 'Pam', '25000', '25', '1/1/2005')
insert into SalesPerson (sales_id, name, salary, commission_rate, hire_date) values ('5', 'Alex', '5000', '10', '2/3/2007')
Drop table If Exists Company
Create table If Not Exists Company (com_id int, name varchar(255), city varchar(255))
insert into Company (com_id, name, city) values ('1', 'RED', 'Boston')
insert into Company (com_id, name, city) values ('2', 'ORANGE', 'New York')
insert into Company (com_id, name, city) values ('3', 'YELLOW', 'Boston')
insert into Company (com_id, name, city) values ('4', 'GREEN', 'Austin')
Drop table If Exists Orders
Create table If Not Exists Orders (order_id int, order_date date, com_id int, sales_id int, amount int)
insert into Orders (order_id, order_date, com_id, sales_id, amount) values ('1', '1/1/2014', '3', '4', '10000')
insert into Orders (order_id, order_date, com_id, sales_id, amount) values ('2', '2/1/2014', '4', '5', '5000')
insert into Orders (order_id, order_date, com_id, sales_id, amount) values ('3', '3/1/2014', '1', '1', '50000')
insert into Orders (order_id, order_date, com_id, sales_id, amount) values ('4', '4/1/2014', '1', '4', '25000')
"""
execute_formatted_sqlite(sql_schema)

In [52]:
sql = """--sql
SELECT
    s.name
FROM
    SalesPerson s
WHERE
    NOT EXISTS (
        SELECT
            1
        FROM
            Orders o
            INNER JOIN Company c ON o.com_id = c.com_id
        WHERE
            c.name = 'RED'
            AND o.sales_id = s.sales_id
    )
ORDER BY
    s.name;
"""
select(sql)

Unnamed: 0,name
0,Alex
1,Amy
2,Mark


### 608. Tree Node (Med.)

Each node in the tree can be one of three types:

- "Leaf": if the node is a leaf node.
- "Root": if the node is the root of the tree.
- "Inner": If the node is neither a leaf node nor a root node.

Write a solution to report the type of each node in the tree.


In [53]:
sql_schema = """
Drop table If Exists Tree
Create table If Not Exists Tree (id int, p_id int)
insert into Tree (id, p_id) values ('1', NULL)
insert into Tree (id, p_id) values ('2', '1')
insert into Tree (id, p_id) values ('3', '1')
insert into Tree (id, p_id) values ('4', '2')
insert into Tree (id, p_id) values ('5', '2')
"""
execute_formatted_sqlite(sql_schema)

In [54]:
sql = """--sql
SELECT
    t.id,
    CASE
        WHEN t.p_id IS NULL THEN 'Root'
        WHEN EXISTS (
            SELECT
                1
            FROM
                Tree
            WHERE
                t.id = Tree.p_id
        ) THEN 'Inner'
        ELSE 'Leaf'
    END AS
TYPE
FROM
    Tree t;
"""
select(sql)

Unnamed: 0,id,TYPE
0,1,Root
1,2,Inner
2,3,Leaf
3,4,Leaf
4,5,Leaf


### 610. Triangle Judgement (Easy)

Report for every three line segments whether they can form a triangle.


In [55]:
sql_schema = """
Drop table If Exists Triangle
Create table If Not Exists Triangle (x int, y int, z int)
insert into Triangle (x, y, z) values ('13', '15', '30')
insert into Triangle (x, y, z) values ('10', '20', '15')
"""
execute_formatted_sqlite(sql_schema)

In [56]:
sql = """--sql
SELECT
    t.x,
    t.y,
    t.z,
    CASE
        WHEN t.x < t.y + t.z
        AND t.y < t.x + t.z
        AND t.z < t.x + t.y THEN 'Yes'
        ELSE 'No'
    END AS triangle
FROM
    Triangle t;
"""
select(sql)

Unnamed: 0,x,y,z,triangle
0,13,15,30,No
1,10,20,15,Yes


### 619. Biggest Single Number (Easy)

A single number is a number that appeared only once in the MyNumbers table.

Find the largest single number. If there is no single number, report null.


In [57]:
sql_schema = """
Drop table If Exists MyNumbers
Create table If Not Exists MyNumbers (num int)
insert into MyNumbers (num) values ('8')
insert into MyNumbers (num) values ('8')
insert into MyNumbers (num) values ('3')
insert into MyNumbers (num) values ('3')
insert into MyNumbers (num) values ('1')
insert into MyNumbers (num) values ('4')
insert into MyNumbers (num) values ('5')
insert into MyNumbers (num) values ('6')
"""
execute_formatted_sqlite(sql_schema)

In [58]:
# Тут нужно все же использовать подзапрос с агрегатной функцией, чтобы в случае отсутствия уникальных
# значений вернулось именно NULL, т.к. если просто выбрать строки, например через DESC и LIMIT 1, то и
# вернется пустая строка, а не значение NULL, как задано в условии.
sql = """--sql
SELECT
    MAX(m.num) AS num
FROM
    (
        SELECT
            n.num
        FROM
            MyNumbers n
        GROUP BY
            n.num
        HAVING
            COUNT(*) = 1
    ) m;
"""
select(sql)

Unnamed: 0,num
0,6


### 620. Not Boring Movies (Easy)

Write a solution to report the movies with an odd-numbered ID and a description that is not "boring".

Return the result table ordered by rating in descending order.


In [59]:
sql_schema = """
Drop table If Exists cinema
Create table If Not Exists cinema (id int, movie varchar(255), description varchar(255), rating float(2, 1))
insert into cinema (id, movie, description, rating) values ('1', 'War', 'great 3D', '8.9')
insert into cinema (id, movie, description, rating) values ('2', 'Science', 'fiction', '8.5')
insert into cinema (id, movie, description, rating) values ('3', 'irish', 'boring', '6.2')
insert into cinema (id, movie, description, rating) values ('4', 'Ice song', 'Fantacy', '8.6')
insert into cinema (id, movie, description, rating) values ('5', 'House card', 'Interesting', '9.1')
"""
execute_formatted_sqlite(sql_schema)

In [60]:
sql = """--sql
SELECT
    c.*
FROM
    cinema c
WHERE
    c.id % 2 != 0
    AND c.description != 'boring'
ORDER BY
    c.rating DESC;
"""
select(sql)

Unnamed: 0,id,movie,description,rating
0,5,House card,Interesting,9.1
1,1,War,great 3D,8.9


### 626. Exchange Seats (Med.)

Write a solution to swap the seat id of every two consecutive students. If the number of students is odd, the id of the last student is not swapped.

Return the result table ordered by id in ascending order.


In [61]:
sql_schema = """
Drop table If Exists Seat
Create table If Not Exists Seat (id int, student varchar(255))
insert into Seat (id, student) values ('1', 'Abbot')
insert into Seat (id, student) values ('2', 'Doris')
insert into Seat (id, student) values ('3', 'Emerson')
insert into Seat (id, student) values ('4', 'Green')
insert into Seat (id, student) values ('5', 'Jeames')
"""
execute_formatted_sqlite(sql_schema)

In [62]:
# В CASE нужно использовать именно подзапрос для MAX(id), чтобы вытянуть именно максимальное значение
# из таблицы и по нему сравнивать. Т.к. если AND s.id != MAX(s.id), то без группировки агрегатная функция
# свернет результат всего блока CASE в одну строку и запрос вернет одну эту строку с максимальным id.
sql = """--sql
SELECT
    CASE
        WHEN s.id % 2 = 1
        AND s.id != (
            SELECT
                MAX(id)
            FROM
                Seat
        ) THEN s.id + 1
        WHEN s.id % 2 = 0 THEN s.id - 1
        ELSE s.id
    END AS id,
    s.student
FROM
    Seat s
ORDER BY
    id;
"""
select(sql)

Unnamed: 0,id,student
0,1,Doris
1,2,Abbot
2,3,Green
3,4,Emerson
4,5,Jeames


### 627. Swap Salary (Easy)

Write a solution to swap all 'f' and 'm' values (i.e., change all 'f' values to 'm' and vice versa) with a single update statement and no intermediate temporary tables.

Note that you must write a single update statement, do not write any select statement for this problem.


In [63]:
sql_schema = """
Drop table If Exists Salary
Create table If Not Exists Salary (id int, name varchar(100), sex char(1), salary int)
insert into Salary (id, name, sex, salary) values ('1', 'A', 'm', '2500')
insert into Salary (id, name, sex, salary) values ('2', 'B', 'f', '1500')
insert into Salary (id, name, sex, salary) values ('3', 'C', 'm', '5500')
insert into Salary (id, name, sex, salary) values ('4', 'D', 'f', '500')
"""
execute_formatted_sqlite(sql_schema)

In [64]:
sql = """--sql
UPDATE Salary
SET
    sex = CASE
        WHEN sex = 'f' THEN 'm'
        ELSE 'f'
    END;
"""
crud(sql)

### 1045. Customers Who Bought All Products (Med.)

Write a solution to report the customer ids from the Customer table that bought all the products in the Product table.


In [65]:
sql_schema = """
Drop table If Exists Customer
Create table If Not Exists Customer (customer_id int, product_key int)
insert into Customer (customer_id, product_key) values ('1', '5')
insert into Customer (customer_id, product_key) values ('2', '6')
insert into Customer (customer_id, product_key) values ('3', '5')
insert into Customer (customer_id, product_key) values ('3', '6')
insert into Customer (customer_id, product_key) values ('1', '6')
Drop table If Exists Product
Create table Product (product_key int)
insert into Product (product_key) values ('5')
insert into Product (product_key) values ('6')
"""
execute_formatted_sqlite(sql_schema)

In [66]:
sql = """--sql
SELECT
    c.customer_id
FROM
    Customer c
GROUP BY
    c.customer_id
HAVING
    COUNT(DISTINCT c.product_key) = (
        SELECT
            COUNT(*)
        FROM
            Product
    );
"""
select(sql)

Unnamed: 0,customer_id
0,1
1,3


### 1050. Actors and Directors Who Cooperated At Least Three Times (Easy)

Write a solution to find all the pairs (actor_id, director_id) where the actor has cooperated with the director at least three times.


In [67]:
sql_schema = """
Drop table If Exists ActorDirector
Create table If Not Exists ActorDirector (actor_id int, director_id int, timestamp int)
insert into ActorDirector (actor_id, director_id, timestamp) values ('1', '1', '0')
insert into ActorDirector (actor_id, director_id, timestamp) values ('1', '1', '1')
insert into ActorDirector (actor_id, director_id, timestamp) values ('1', '1', '2')
insert into ActorDirector (actor_id, director_id, timestamp) values ('1', '2', '3')
insert into ActorDirector (actor_id, director_id, timestamp) values ('1', '2', '4')
insert into ActorDirector (actor_id, director_id, timestamp) values ('2', '1', '5')
insert into ActorDirector (actor_id, director_id, timestamp) values ('2', '1', '6')
"""
execute_formatted_sqlite(sql_schema)

In [68]:
sql = """--sql
SELECT
    ad.actor_id,
    ad.director_id
FROM
    ActorDirector ad
GROUP BY
    ad.actor_id,
    ad.director_id
HAVING
    COUNT(*) >= 3;
"""
select(sql)

Unnamed: 0,actor_id,director_id
0,1,1


### 1068. Product Sales Analysis I (Easy)

Write a solution to report the product_name, year, and price for each sale_id in the Sales table.


In [69]:
sql_schema = """
Drop table If Exists Sales
Create table If Not Exists Sales (sale_id int, product_id int, year int, quantity int, price int)
insert into Sales (sale_id, product_id, year, quantity, price) values ('1', '100', '2008', '10', '5000')
insert into Sales (sale_id, product_id, year, quantity, price) values ('2', '100', '2009', '12', '5000')
insert into Sales (sale_id, product_id, year, quantity, price) values ('7', '200', '2011', '15', '9000')
Drop table If Exists Product
Create table If Not Exists Product (product_id int, product_name varchar(10))
insert into Product (product_id, product_name) values ('100', 'Nokia')
insert into Product (product_id, product_name) values ('200', 'Apple')
insert into Product (product_id, product_name) values ('300', 'Samsung')
"""
execute_formatted_sqlite(sql_schema)

In [70]:
sql = """--sql
SELECT
    p.product_name,
    s.year,
    s.price
FROM
    Sales s
    LEFT JOIN Product p ON s.product_id = p.product_id
ORDER BY
    s.year DESC,
    s.price DESC,
    p.product_name;
"""
select(sql)

Unnamed: 0,product_name,year,price
0,Apple,2011,9000
1,Nokia,2009,5000
2,Nokia,2008,5000


### 1070. Product Sales Analysis III (Med.)

Write a solution to find all sales that occurred in the first year each product was sold.

- For each product_id, identify the earliest year it appears in the Sales table.

- Return all sales entries for that product in that year.

Return a table with the following columns: product_id, first_year, quantity, and price.


In [71]:
sql_schema = """
Drop table If Exists Sales
Create table If Not Exists Sales (sale_id int, product_id int, year int, quantity int, price int)
insert into Sales (sale_id, product_id, year, quantity, price) values ('1', '100', '2008', '10', '5000')
insert into Sales (sale_id, product_id, year, quantity, price) values ('2', '100', '2009', '12', '5000')
insert into Sales (sale_id, product_id, year, quantity, price) values ('7', '200', '2011', '15', '9000')
"""
execute_formatted_sqlite(sql_schema)

In [72]:
# Локаничный вариант для относительно небольших данных:
sql = """--sql
SELECT
    s.product_id,
    s.year AS first_year,
    s.quantity,
    s.price
FROM
    Sales s
WHERE
    (s.product_id, s.year) IN (
        SELECT
            product_id,
            MIN(YEAR) AS min_year
        FROM
            Sales
        GROUP BY
            product_id
    );
"""
select(sql)

Unnamed: 0,product_id,first_year,quantity,price
0,100,2008,10,5000
1,200,2011,15,9000


In [73]:
# Но быстрее (особенно на больших данных) будет вариант через JOIN, чем сравнение кортежа через IN.
sql = """--sql
SELECT
    s.product_id,
    s.year AS first_year,
    s.quantity,
    s.price
FROM
    Sales s
    INNER JOIN (
        SELECT
            product_id,
            MIN(YEAR) AS min_year
        FROM
            Sales
        GROUP BY
            product_id
    ) m ON s.product_id = m.product_id
    AND s.year = m.min_year;
"""
select(sql)

Unnamed: 0,product_id,first_year,quantity,price
0,100,2008,10,5000
1,200,2011,15,9000


### 1075. Project Employees I (Easy)

Write an SQL query that reports the average experience years of all the employees for each project, rounded to 2 digits.


In [74]:
sql_schema = """
Drop table If Exists Project
Create table If Not Exists Project (project_id int, employee_id int)
insert into Project (project_id, employee_id) values ('1', '1')
insert into Project (project_id, employee_id) values ('1', '2')
insert into Project (project_id, employee_id) values ('1', '3')
insert into Project (project_id, employee_id) values ('2', '1')
insert into Project (project_id, employee_id) values ('2', '4')
Drop table If Exists Employee
Create table If Not Exists Employee (employee_id int, name varchar(10), experience_years int)
insert into Employee (employee_id, name, experience_years) values ('1', 'Khaled', '3')
insert into Employee (employee_id, name, experience_years) values ('2', 'Ali', '2')
insert into Employee (employee_id, name, experience_years) values ('3', 'John', '1')
insert into Employee (employee_id, name, experience_years) values ('4', 'Doe', '2')
"""
execute_formatted_sqlite(sql_schema)

In [75]:
sql = """--sql
SELECT
    p.project_id,
    ROUND(AVG(e.experience_years), 2) AS average_years
FROM
    Project p
    INNER JOIN Employee e ON p.employee_id = e.employee_id
GROUP BY
    p.project_id;
"""
select(sql)

Unnamed: 0,project_id,average_years
0,1,2.0
1,2,2.5


### 1084. Sales Analysis III (Easy)

Write a solution to report the products that were only sold in the first quarter of 2019. That is, between 2019-01-01 and 2019-03-31 inclusive.


In [76]:
sql_schema = """
Drop table If Exists Product
Create table If Not Exists Product (product_id int, product_name varchar(10), unit_price int)
insert into Product (product_id, product_name, unit_price) values ('1', 'S8', '1000')
insert into Product (product_id, product_name, unit_price) values ('2', 'G4', '800')
insert into Product (product_id, product_name, unit_price) values ('3', 'iPhone', '1400')
Drop table If Exists Sales
Create table If Not Exists Sales (seller_id int, product_id int, buyer_id int, sale_date date, quantity int, price int)
insert into Sales (seller_id, product_id, buyer_id, sale_date, quantity, price) values ('1', '1', '1', '2019-01-21', '2', '2000')
insert into Sales (seller_id, product_id, buyer_id, sale_date, quantity, price) values ('1', '2', '2', '2019-02-17', '1', '800')
insert into Sales (seller_id, product_id, buyer_id, sale_date, quantity, price) values ('2', '2', '3', '2019-06-02', '1', '800')
insert into Sales (seller_id, product_id, buyer_id, sale_date, quantity, price) values ('3', '3', '4', '2019-05-13', '2', '2800')
"""
execute_formatted_sqlite(sql_schema)

In [77]:
# HAVING s.sale_date BETWEEN '2019-01-01' AND '2019-03-31' может на первый взгляд показаться очевидным
# условием, однако ему будет соответствовать продукт который хотябы однажды был продав в первом квартале,
# а нам нужны которые ТОЛЬКО были проданы в первом квартале, т.е. у которых MIN дата продажы равна (или больше!)
# 2019-01-01, а MAX дата равна (или меньше!) 2019-03-31.
# P.S. Так же, если позволяет логия задачи, лучше использовать INNER JOIN, вместо LEFT JOIN - обычно быстрее.
# Т.к. LEFT заставляет оптимизатор сохранять строки для которых нет совпадений и подствалять в них NULL - лишняя работа.
# Кроме того, для PostgreSQL пришлось бы в GROUP BY добавить p.product_name или обернуть его в любую агрегатную
# функцию в SELECT - особенности СУБД.
sql = """--sql
SELECT
    s.product_id,
    p.product_name
FROM
    Sales s
    INNER JOIN Product p ON s.product_id = p.product_id
GROUP BY
    s.product_id
HAVING
    MIN(s.sale_date) >= '2019-01-01'
    AND MAX(s.sale_date) <= '2019-03-31';
"""
select(sql)

Unnamed: 0,product_id,product_name
0,1,S8


### 1141. User Activity for the Past 30 Days I (Easy)

Write a solution to find the daily active user count for a period of 30 days ending 2019-07-27 inclusively. A user was active on someday if they made at least one activity on that day.


In [78]:
# В SQLite нет типа ENUM (только базовые), но его работу можно сымитировать через CHECK(), предварительно
# преобразовав колонку к типу TEXT:
# вместо activity_type ENUM('open_session', 'end_session', 'scroll_down', 'send_message')
# использовать activity_type text CHECK(activity_type IN ('open_session', 'end_session', 'scroll_down', 'send_message')
sql_schema = """
Drop table If Exists Activity
Create table If Not Exists Activity (user_id int, session_id int, activity_date date, activity_type text CHECK(activity_type IN ('open_session', 'end_session', 'scroll_down', 'send_message')))
insert into Activity (user_id, session_id, activity_date, activity_type) values ('1', '1', '2019-07-20', 'open_session')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('1', '1', '2019-07-20', 'scroll_down')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('1', '1', '2019-07-20', 'end_session')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('2', '4', '2019-07-20', 'open_session')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('2', '4', '2019-07-21', 'send_message')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('2', '4', '2019-07-21', 'end_session')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('3', '2', '2019-07-21', 'open_session')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('3', '2', '2019-07-21', 'send_message')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('3', '2', '2019-07-21', 'end_session')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('4', '3', '2019-06-25', 'open_session')
insert into Activity (user_id, session_id, activity_date, activity_type) values ('4', '3', '2019-06-25', 'end_session')
"""
execute_formatted_sqlite(sql_schema)

In [79]:
# MySQL: activity_date BETWEEN DATE_SUB('2019-07-27', INTERVAL 29 DAY) AND '2019-07-27'
# PostgreSQL: activity_date BETWEEN '2019-07-27'::date - INTERVAL '29 days' AND '2019-07-27'
sql = """--sql
SELECT
    a.activity_date AS DAY,
    COUNT(DISTINCT a.user_id) AS active_users
FROM
    Activity a
WHERE
    a.activity_date BETWEEN DATE ('2019-07-27', '-29 day') AND '2019-07-27'
GROUP BY
    a.activity_date;
"""
select(sql)

Unnamed: 0,DAY,active_users
0,2019-07-20,2
1,2019-07-21,2


### 1148. Article Views I (Easy)

Write a solution to find all the authors that viewed at least one of their own articles.

Return the result table sorted by id in ascending order.


In [80]:
sql_schema = """
Drop table If Exists Views
Create table If Not Exists Views (article_id int, author_id int, viewer_id int, view_date date)
insert into Views (article_id, author_id, viewer_id, view_date) values ('1', '3', '5', '2019-08-01')
insert into Views (article_id, author_id, viewer_id, view_date) values ('1', '3', '6', '2019-08-02')
insert into Views (article_id, author_id, viewer_id, view_date) values ('2', '7', '7', '2019-08-01')
insert into Views (article_id, author_id, viewer_id, view_date) values ('2', '7', '6', '2019-08-02')
insert into Views (article_id, author_id, viewer_id, view_date) values ('4', '7', '1', '2019-07-22')
insert into Views (article_id, author_id, viewer_id, view_date) values ('3', '4', '4', '2019-07-21')
insert into Views (article_id, author_id, viewer_id, view_date) values ('3', '4', '4', '2019-07-21')
"""
execute_formatted_sqlite(sql_schema)

In [81]:
sql = """--sql
SELECT DISTINCT
    v.author_id AS id
FROM
    VIEWS v
WHERE
    v.author_id = v.viewer_id
ORDER BY
    v.author_id;
"""
select(sql)

Unnamed: 0,id
0,4
1,7


### 1158. Market Analysis I (Med.)

Write a solution to find for each user, the join date and the number of orders they made as a buyer in 2019.


In [82]:
sql_schema = """
Drop table If Exists Users
Create table If Not Exists Users (user_id int, join_date date, favorite_brand varchar(10))
insert into Users (user_id, join_date, favorite_brand) values ('1', '2018-01-01', 'Lenovo')
insert into Users (user_id, join_date, favorite_brand) values ('2', '2018-02-09', 'Samsung')
insert into Users (user_id, join_date, favorite_brand) values ('3', '2018-01-19', 'LG')
insert into Users (user_id, join_date, favorite_brand) values ('4', '2018-05-21', 'HP')
Drop table If Exists Orders
Create table If Not Exists Orders (order_id int, order_date date, item_id int, buyer_id int, seller_id int)
insert into Orders (order_id, order_date, item_id, buyer_id, seller_id) values ('1', '2019-08-01', '4', '1', '2')
insert into Orders (order_id, order_date, item_id, buyer_id, seller_id) values ('2', '2018-08-02', '2', '1', '3')
insert into Orders (order_id, order_date, item_id, buyer_id, seller_id) values ('3', '2019-08-03', '3', '2', '3')
insert into Orders (order_id, order_date, item_id, buyer_id, seller_id) values ('4', '2018-08-04', '1', '4', '2')
insert into Orders (order_id, order_date, item_id, buyer_id, seller_id) values ('5', '2018-08-04', '1', '3', '4')
insert into Orders (order_id, order_date, item_id, buyer_id, seller_id) values ('6', '2019-08-05', '2', '2', '4')
Drop table If Exists Items
Create table If Not Exists Items (item_id int, item_brand varchar(10))
insert into Items (item_id, item_brand) values ('1', 'Samsung')
insert into Items (item_id, item_brand) values ('2', 'Lenovo')
insert into Items (item_id, item_brand) values ('3', 'LG')
insert into Items (item_id, item_brand) values ('4', 'HP')
"""
execute_formatted_sqlite(sql_schema)

In [83]:
# Если позволяет логика запроса, лучше использовать вариант без подзапросов (оптимизатор выполняет меньше шагов).
sql = """--sql
SELECT
    u.user_id AS buyer_id,
    u.join_date,
    COALESCE(c.orders_in_2019, 0) AS orders_in_2019
FROM
    Users u
    LEFT JOIN (
        SELECT
            COUNT(*) AS orders_in_2019,
            o.buyer_id
        FROM
            Orders o
        WHERE
            o.order_date >= '2019-01-01'
            AND o.order_date <= '2019-12-31'
        GROUP BY
            o.buyer_id
    ) c ON u.user_id = c.buyer_id
ORDER BY
    u.user_id;
"""
select(sql)

Unnamed: 0,buyer_id,join_date,orders_in_2019
0,1,2018-01-01,1
1,2,2018-02-09,2
2,3,2018-01-19,0
3,4,2018-05-21,0


In [84]:
# Вариант через JOIN - будет работать быстрее и в целом локаничнее.
# P.S. При JOIN-ах COUNT() сам по себе возвращает 0, если нет совпадений, поэтому тут COALESCE() не нужен.
sql = """--sql
SELECT
    u.user_id AS buyer_id,
    u.join_date,
    COUNT(o.order_id) AS orders_in_2019
FROM
    Users u
    LEFT JOIN Orders o ON u.user_id = o.buyer_id
    AND o.order_date BETWEEN '2019-01-01' AND '2019-12-31'
GROUP BY
    u.user_id
ORDER BY
    u.user_id;
"""
select(sql)

Unnamed: 0,buyer_id,join_date,orders_in_2019
0,1,2018-01-01,1
1,2,2018-02-09,2
2,3,2018-01-19,0
3,4,2018-05-21,0


### 1164. Product Price at a Given Date (Med.)

Initially, all products have price 10.

Write a solution to find the prices of all products on the date 2019-08-16.


In [85]:
sql_schema = """
Drop table If Exists Products
Create table If Not Exists Products (product_id int, new_price int, change_date date)
insert into Products (product_id, new_price, change_date) values ('1', '20', '2019-08-14')
insert into Products (product_id, new_price, change_date) values ('2', '50', '2019-08-14')
insert into Products (product_id, new_price, change_date) values ('1', '30', '2019-08-15')
insert into Products (product_id, new_price, change_date) values ('1', '35', '2019-08-16')
insert into Products (product_id, new_price, change_date) values ('2', '65', '2019-08-17')
insert into Products (product_id, new_price, change_date) values ('3', '20', '2019-08-18')
"""
execute_formatted_sqlite(sql_schema)

In [86]:
# Решение "в лоб":
sql = """--sql
SELECT
    p2.product_id,
    p2.new_price AS price
FROM
    Products p2
WHERE
    (p2.product_id, p2.change_date) IN (
        SELECT
            p1.product_id,
            MAX(p1.change_date)
        FROM
            Products p1
        WHERE
            p1.change_date <= '2019-08-16'
        GROUP BY
            p1.product_id
    )
UNION
SELECT
    p3.product_id,
    10 AS price
FROM
    Products p3
GROUP BY
    p3.product_id
HAVING
    MIN(p3.change_date) > '2019-08-16'
ORDER BY
    product_id;
"""
select(sql)

Unnamed: 0,product_id,price
0,1,35
1,2,50
2,3,10


In [87]:
# Более оптимальное решение через коррелированный подзапросс с COALESCE() (своего рода имитация LEFT JOIN).
# MAX(change_date) симитировали по средствам ORDER BY DESC c LIMIT 1.
sql = """--sql
SELECT
    p1.product_id,
    COALESCE(
        (
            SELECT
                p2.new_price
            FROM
                Products p2
            WHERE
                p2.product_id = p1.product_id
                AND p2.change_date <= '2019-08-16'
            ORDER BY
                p2.change_date DESC
            LIMIT
                1
        ),
        10
    ) AS price
FROM
    (
        SELECT DISTINCT
            p.product_id
        FROM
            Products p
    ) p1
ORDER BY
    p1.product_id;
"""
select(sql)

Unnamed: 0,product_id,price
0,1,35
1,2,50
2,3,10


### 1174. Immediate Food Delivery II (Med.)

If the customer's preferred delivery date is the same as the order date, then the order is called immediate; otherwise, it is called scheduled.

The first order of a customer is the order with the earliest order date that the customer made. It is guaranteed that a customer has precisely one first order.

Write a solution to find the percentage of immediate orders in the first orders of all customers, rounded to 2 decimal places.

The result format is in the following example.


In [88]:
sql_schema = """
Drop table If Exists Delivery
Create table If Not Exists Delivery (delivery_id int, customer_id int, order_date date, customer_pref_delivery_date date)
insert into Delivery (delivery_id, customer_id, order_date, customer_pref_delivery_date) values ('1', '1', '2019-08-01', '2019-08-02')
insert into Delivery (delivery_id, customer_id, order_date, customer_pref_delivery_date) values ('2', '2', '2019-08-02', '2019-08-02')
insert into Delivery (delivery_id, customer_id, order_date, customer_pref_delivery_date) values ('3', '1', '2019-08-11', '2019-08-12')
insert into Delivery (delivery_id, customer_id, order_date, customer_pref_delivery_date) values ('4', '3', '2019-08-24', '2019-08-24')
insert into Delivery (delivery_id, customer_id, order_date, customer_pref_delivery_date) values ('5', '3', '2019-08-21', '2019-08-22')
insert into Delivery (delivery_id, customer_id, order_date, customer_pref_delivery_date) values ('6', '2', '2019-08-11', '2019-08-13')
insert into Delivery (delivery_id, customer_id, order_date, customer_pref_delivery_date) values ('7', '4', '2019-08-09', '2019-08-09')
"""
execute_formatted_sqlite(sql_schema)

In [89]:
sql = """--sql
WITH
    all_orders AS (
        SELECT
            d.order_date,
            d.customer_pref_delivery_date,
            ROW_NUMBER() OVER (
                PARTITION BY
                    d.customer_id
                ORDER BY
                    d.order_date
            ) AS num_order
        FROM
            Delivery d
    )
SELECT
    ROUND(
        100.0 * SUM(
            CASE
                WHEN a.order_date = a.customer_pref_delivery_date THEN 1
                ELSE 0
            END
        ) / COUNT(*),
        2
    ) AS immediate_percentage
FROM
    all_orders a
WHERE
    a.num_order = 1;
"""
select(sql)

Unnamed: 0,immediate_percentage
0,50.0


### 1179. Reformat Department Table (Easy)

Reformat the table such that there is a department id column and a revenue column for each month.


In [90]:
sql_schema = """
Drop table If Exists Department
Create table If Not Exists Department (id int, revenue int, month varchar(5))
insert into Department (id, revenue, month) values ('1', '8000', 'Jan')
insert into Department (id, revenue, month) values ('2', '9000', 'Jan')
insert into Department (id, revenue, month) values ('3', '10000', 'Feb')
insert into Department (id, revenue, month) values ('1', '7000', 'Feb')
insert into Department (id, revenue, month) values ('1', '6000', 'Mar')
"""
execute_formatted_sqlite(sql_schema)

In [91]:
# Опять же, стандарт SQL требует чтобы при использовании группировки, для остальных полей (не по которым
# выполняется группировка) использовалась агрегатная функция, чтобы движок понял какую из строк выбрать.
# Хотя как таковая она не входит в логигу решения задачи - подойдет любая (MAX() или MIN()).
sql = """--sql
SELECT
    d.id,
    MIN(
        CASE
            WHEN d.month = 'Jan' THEN revenue
        END
    ) AS Jan_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Feb' THEN revenue
        END
    ) AS Feb_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Mar' THEN revenue
        END
    ) AS Mar_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Apr' THEN revenue
        END
    ) AS Apr_Revenue,
    MIN(
        CASE
            WHEN d.month = 'May' THEN revenue
        END
    ) AS May_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Jun' THEN revenue
        END
    ) AS Jun_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Jul' THEN revenue
        END
    ) AS Jul_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Aug' THEN revenue
        END
    ) AS Aug_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Sep' THEN revenue
        END
    ) AS Sep_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Oct' THEN revenue
        END
    ) AS Oct_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Nov' THEN revenue
        END
    ) AS Nov_Revenue,
    MIN(
        CASE
            WHEN d.month = 'Dec' THEN revenue
        END
    ) AS Dec_Revenue
FROM
    Department d
GROUP BY
    d.id
ORDER BY
    d.id;
"""
select(sql)

Unnamed: 0,id,Jan_Revenue,Feb_Revenue,Mar_Revenue,Apr_Revenue,May_Revenue,Jun_Revenue,Jul_Revenue,Aug_Revenue,Sep_Revenue,Oct_Revenue,Nov_Revenue,Dec_Revenue
0,1,8000.0,7000.0,6000.0,,,,,,,,,
1,2,9000.0,,,,,,,,,,,
2,3,,10000.0,,,,,,,,,,


### 1193. Monthly Transactions I (Med.)

Write an SQL query to find for each month and country, the number of transactions and their total amount, the number of approved transactions and their total amount.


In [92]:
# И вноть, т.к. ENUM специфичный для MySQL тип данных (в SQLite нет встроенных перечисляемых типов и
# все значения храняться как TEXT, INTEGER, REAL, BLOB или NULL), поэтому заменяем его на TEXT через CHECK().
sql_schema = """
Drop table If Exists Transactions
Create table If Not Exists Transactions (id int, country varchar(4), state text CHECK(state IN ('approved', 'declined')), amount int, trans_date date)
insert into Transactions (id, country, state, amount, trans_date) values ('121', 'US', 'approved', '1000', '2018-12-18')
insert into Transactions (id, country, state, amount, trans_date) values ('122', 'US', 'declined', '2000', '2018-12-19')
insert into Transactions (id, country, state, amount, trans_date) values ('123', 'US', 'approved', '2000', '2019-01-01')
insert into Transactions (id, country, state, amount, trans_date) values ('124', 'DE', 'approved', '2000', '2019-01-07')
"""
execute_formatted_sqlite(sql_schema)

In [93]:
# Для PostgreSQL - TO_CHAR(t.trans_date, 'YYYY-MM'), для MySQL - DATE_FORMAT(t.trans_date, '%Y-%m').
# Так же в MySQL вместо COUNT(*) c FILTER () нужно использовать блок CASE WHEN с SUM() - универсальный вариант.
sql = """--sql
SELECT
    strftime ('%Y-%m', t.trans_date) AS MONTH,
    t.country,
    COUNT(*) AS trans_count,
    SUM(
        CASE
            WHEN t.state = 'approved' THEN 1
            ELSE 0
        END
    ) AS approved_count,
    SUM(t.amount) AS trans_total_amount,
    SUM(
        CASE
            WHEN t.state = 'approved' THEN t.amount
            ELSE 0
        END
    ) AS approved_total_amount
FROM
    TRANSACTIONS t
GROUP BY
    MONTH,
    t.country
ORDER BY
    MONTH,
    t.country;
"""
select(sql)

Unnamed: 0,MONTH,country,trans_count,approved_count,trans_total_amount,approved_total_amount
0,2018-12,US,2,1,3000,1000
1,2019-01,DE,1,1,2000,2000
2,2019-01,US,1,1,2000,2000


### 1204. Last Person to Fit in the Bus (Med.)

There is a queue of people waiting to board a bus. However, the bus has a weight limit of 1000 kilograms, so there may be some people who cannot board.

Write a solution to find the person_name of the last person that can fit on the bus without exceeding the weight limit. The test cases are generated such that the first person does not exceed the weight limit.

Note that only one person can board the bus at any given turn.


In [94]:
sql_schema = """
Drop table If Exists Queue
Create table If Not Exists Queue (person_id int, person_name varchar(30), weight int, turn int)
insert into Queue (person_id, person_name, weight, turn) values ('5', 'Alice', '250', '1')
insert into Queue (person_id, person_name, weight, turn) values ('4', 'Bob', '175', '5')
insert into Queue (person_id, person_name, weight, turn) values ('3', 'Alex', '350', '2')
insert into Queue (person_id, person_name, weight, turn) values ('6', 'John Cena', '400', '3')
insert into Queue (person_id, person_name, weight, turn) values ('1', 'Winston', '500', '6')
insert into Queue (person_id, person_name, weight, turn) values ('2', 'Marie', '200', '4')
"""
execute_formatted_sqlite(sql_schema)

In [95]:
# N.B.!: через оконую функцию можно SUM() применить не ко всей таблице, а для каждой строки по конкретному полю
# - как для кажкого последующего окна.
sql = """--sql
SELECT
    t.person_name
FROM
    (
        SELECT
            q.person_name,
            SUM(q.weight) OVER (
                ORDER BY
                    q.turn
            ) AS total_weight
        FROM
            Queue q
    ) t
WHERE
    t.total_weight <= 1000
ORDER BY
    t.total_weight DESC
LIMIT
    1;
"""
select(sql)

Unnamed: 0,person_name
0,John Cena


### 1211. Queries Quality and Percentage (Easy)

We define query quality as:

> The average of the ratio between query rating and its position.

We also define poor query percentage as:

> The percentage of all queries with rating less than 3.

Write a solution to find each query_name, the quality and poor_query_percentage.

Both quality and poor_query_percentage should be rounded to 2 decimal places.


In [96]:
sql_schema = """
Drop table If Exists Queries
Create table If Not Exists Queries (query_name varchar(30), result varchar(50), position int, rating int)
insert into Queries (query_name, result, position, rating) values ('Dog', 'Golden Retriever', '1', '5')
insert into Queries (query_name, result, position, rating) values ('Dog', 'German Shepherd', '2', '5')
insert into Queries (query_name, result, position, rating) values ('Dog', 'Mule', '200', '1')
insert into Queries (query_name, result, position, rating) values ('Cat', 'Shirazi', '5', '2')
insert into Queries (query_name, result, position, rating) values ('Cat', 'Siamese', '3', '3')
insert into Queries (query_name, result, position, rating) values ('Cat', 'Sphynx', '7', '4')
"""
execute_formatted_sqlite(sql_schema)

In [97]:
sql = """--sql
SELECT
    q.query_name,
    ROUND(SUM(1.0 * q.rating / q.position) / COUNT(*), 2) AS quality,
    ROUND(
        100.0 * COUNT(*) FILTER (
            WHERE
                q.rating < 3
        ) / COUNT(*),
        2
    ) AS poor_query_percentage
FROM
    Queries q
GROUP BY
    q.query_name;
"""
select(sql)

Unnamed: 0,query_name,quality,poor_query_percentage
0,Cat,0.66,33.33
1,Dog,2.5,33.33


### 1251. Average Selling Price (Easy)

Write a solution to find the average selling price for each product. average_price should be rounded to 2 decimal places. If a product does not have any sold units, its average selling price is assumed to be 0.


In [98]:
sql_schema = """
Drop table If Exists Prices
Create table If Not Exists Prices (product_id int, start_date date, end_date date, price int)
insert into Prices (product_id, start_date, end_date, price) values ('1', '2019-02-17', '2019-02-28', '5')
insert into Prices (product_id, start_date, end_date, price) values ('1', '2019-03-01', '2019-03-22', '20')
insert into Prices (product_id, start_date, end_date, price) values ('2', '2019-02-01', '2019-02-20', '15')
insert into Prices (product_id, start_date, end_date, price) values ('2', '2019-02-21', '2019-03-31', '30')
Drop table If Exists UnitsSold
Create table If Not Exists UnitsSold (product_id int, purchase_date date, units int)
insert into UnitsSold (product_id, purchase_date, units) values ('1', '2019-02-25', '100')
insert into UnitsSold (product_id, purchase_date, units) values ('1', '2019-03-01', '15')
insert into UnitsSold (product_id, purchase_date, units) values ('2', '2019-02-10', '200')
insert into UnitsSold (product_id, purchase_date, units) values ('2', '2019-03-22', '30')
"""
execute_formatted_sqlite(sql_schema)