# **<font color='crimson'>SQL. Merging tables</font>**

---

In [None]:
###

In [None]:
--1
SELECT
    t.user_id AS user_id_left
    ,u.user_id AS user_id_right
    ,t.order_id
    ,t.time
    ,t.action
    ,u.sex
    ,u.birth_date
FROM
    user_actions AS t
    INNER JOIN users AS u
        ON t.user_id = u.user_id
ORDER By
    t.user_id ASC;

In [None]:
###

In [None]:
--2
SELECT
    COUNT(DISTINCT t.user_id) AS users_count
FROM
    user_actions AS t
    INNER JOIN users AS u
        ON t.user_id = u.user_id;

In [None]:
###

In [None]:
--3
SELECT
    t.user_id AS user_id_left
    ,u.user_id AS user_id_right
    ,t.order_id
    ,t.time
    ,t.action
    ,u.sex
    ,u.birth_date
FROM
    user_actions AS t
    LEFT JOIN users AS u
        ON t.user_id = u.user_id
ORDER BY
    t.user_id ASC;

In [None]:
###

In [None]:
--4
SELECT
    COUNT(DISTINCT t.user_id) AS users_count
FROM
    user_actions AS t
    LEFT JOIN users AS u
        ON t.user_id = u.user_id;

In [None]:
###

In [None]:
--5
SELECT
    t.user_id AS user_id_left
    ,u.user_id AS user_id_right
    ,t.order_id
    ,t.time
    ,t.action
    ,u.sex
    ,u.birth_date
FROM
    user_actions AS t
    LEFT JOIN users AS u
        ON t.user_id = u.user_id
WHERE
    u.user_id IS NOT NULL
ORDER BY
    t.user_id ASC;

In [None]:
###

In [None]:
--6
WITH u AS (
    SELECT
        birth_date
        ,COUNT(user_id) AS users_count
    FROM
        users
    WHERE
        birth_date IS NOT NULL
    GROUP BY
        birth_date
),
c AS (
    SELECT
        birth_date
        ,COUNT(courier_id) AS couriers_count
    FROM
        couriers
    WHERE
        birth_date IS NOT NULL
    GROUP BY
        birth_date
)
SELECT
    u.birth_date AS users_birth_date
    ,u.users_count
    ,c.birth_date AS couriers_birth_date
    ,c.couriers_count
FROM u
    FULL JOIN c
        ON u.birth_date = c.birth_date
ORDER BY
    u.birth_date ASC
    ,c.birth_date ASC;

In [None]:
###

In [None]:
--7
--объединяем два запроса так, чтобы получить набор уникальных дат
WITH subquery AS (
    SELECT
        u.birth_date
    FROM
        users AS u
    WHERE
        u.birth_date IS NOT NULL
    UNION
    SELECT
        c.birth_date
    FROM
        couriers AS c
    WHERE
        c.birth_date IS NOT NULL
)
--вычислим количество уникальных дат
SELECT
    COUNT(DISTINCT t.birth_date) AS dates_count
FROM
    subquery AS t;

In [None]:
###

In [None]:
--8
--отбираем id первых 100 пользователей
WITH subquery AS (
    SELECT
        u.user_id
    FROM
    users AS u
    LIMIT 100)
--объединяем id первых 100 пользователей и названия товаров
SELECT
    u.user_id ,
    p.name
FROM
    subquery AS u
    CROSS JOIN products AS p
ORDER BY
    u.user_id ASC
    ,p.name ASC;

In [None]:
###

In [None]:
--9
SELECT
    u.user_id
    ,u.order_id
    ,o.product_ids
FROM
    user_actions AS u
    LEFT JOIN orders AS o
        ON u.order_id = o.order_id
ORDER BY
    u.user_id ASC
    ,o.order_id ASC
LIMIT 1000;

In [None]:
###

In [None]:
--10
--отбираем id отмененных заказов
WITH orders_cancel AS (
    SELECT
        DISTINCT t.order_id AS orders_id_canceled
    FROM
        user_actions AS t
    WHERE
        t.action = 'cancel_order'
)
--объединяем две таблицы, оставив только
--уникальные неотмененные заказы
SELECT
    u.user_id
    ,u.order_id
    ,o.product_ids
FROM
    user_actions AS u
    LEFT JOIN orders AS o
        ON u.order_id = o.order_id
WHERE
    u.order_id NOT IN (
        SELECT t.orders_id_canceled
        FROM orders_cancel AS t)
ORDER BY
    u.user_id ASC
    ,o.order_id ASC
LIMIT 1000;

In [None]:
###

In [None]:
--11
--отбираем id отмененных заказов
WITH orders_cancel AS (
    SELECT
        DISTINCT t.order_id AS orders_id_canceled
    FROM
        user_actions AS t
    WHERE
        t.action = 'cancel_order'
)
--объединяем две таблицы, оставив только
--уникальные неотмененные заказы
SELECT
    u.user_id
    ,ROUND(AVG(ARRAY_LENGTH(o.product_ids, 1)), 2) AS avg_order_size
FROM
    user_actions AS u
    LEFT JOIN orders AS o
        ON u.order_id = o.order_id
WHERE
    u.order_id NOT IN (
        SELECT t.orders_id_canceled
        FROM orders_cancel AS t)
GROUP BY
    u.user_id
ORDER BY
    u.user_id ASC
LIMIT 1000;

In [None]:
###

In [None]:
--12
--'разворачиваем' списки с товарами в каждом заказе
WITH subquery AS (
    SELECT
        t.order_id
        ,UNNEST(t.product_ids) AS product_id
    FROM
        orders AS t
)
--добавляем к subquery из таблицы products
--сведения о цене каждого товара
SELECT
    t.order_id
    ,t.product_id
    ,p.price
FROM
    subquery AS t
    LEFT JOIN products AS p
        ON t.product_id = p.product_id
ORDER BY
    t.order_id ASC
    ,t.product_id ASC
LIMIT 1000;

In [None]:
###

In [None]:
--13
--'разворачиваем' списки с товарами в каждом заказе
WITH subquery AS (
    SELECT
        t.order_id
        ,UNNEST(t.product_ids) AS product_id
    FROM
        orders AS t
)
--добавляем к subquery из таблицы products
--сведения о цене каждого товара
SELECT
    t.order_id
    ,SUM(p.price) AS order_price
FROM
    subquery AS t
    LEFT JOIN products AS p
        ON t.product_id = p.product_id
GROUP BY
    t.order_id
ORDER BY
    t.order_id ASC
LIMIT 1000;

In [None]:
###

In [None]:
--14
--отбираем id отмененных заказов
WITH orders_cancel AS (
    SELECT
        DISTINCT t.order_id AS orders_id_canceled
    FROM
        user_actions AS t
    WHERE
        t.action = 'cancel_order'
),
--'разворачиваем' списки с товарами в каждом заказе,
--оставив только уникальные неотмененные заказы
unnest_table AS (
    SELECT
        t.order_id
        ,UNNEST(t.product_ids) AS product_id
    FROM
        orders AS t
    WHERE
        t.order_id NOT IN (
        SELECT t.orders_id_canceled
        FROM orders_cancel AS t
    )
),
--объединяем таблицы,
--рассчитываем стоимость каждого заказа
--и количество товаров в каждом заказе
info_per_order AS (
    SELECT
        users.user_id
        ,users.order_id
        ,SUM(p.price) AS order_price
        ,COUNT(u.product_id) AS order_size
    FROM
        unnest_table AS u
        LEFT JOIN products AS p
            ON u.product_id = p.product_id
        LEFT JOIN user_actions AS users
            ON u.order_id = users.order_id
    GROUP BY
        users.user_id
        ,users.order_id
    ORDER BY
        users.user_id ASC
)
--рассчитываем агрегирующие метрики
--по заказам каждого пользователя
SELECT
    t.user_id
    ,COUNT(t.order_id) AS orders_count
    ,ROUND(AVG(t.order_size), 2) AS avg_order_size
    ,SUM(t.order_price) AS sum_order_value
    ,ROUND(AVG(t.order_price), 2) AS avg_order_value
    ,MIN(t.order_price) AS min_order_value
    ,MAX(t.order_price) AS max_order_value
FROM
    info_per_order AS t
GROUP BY
    t.user_id
ORDER BY
    t.user_id ASC
LIMIT 1000;

In [None]:
###

In [None]:
--15
--отбираем id отмененных заказов
WITH orders_cancel AS (
    SELECT
        DISTINCT t.order_id AS orders_id_canceled
    FROM
        user_actions AS t
    WHERE
        t.action = 'cancel_order'
),
--'разворачиваем' списки с товарами в каждом заказе,
--оставив только уникальные неотмененные заказы
unnest_table AS (
    SELECT
        t.order_id
        ,t.creation_time::DATE as date
        ,UNNEST(t.product_ids) AS product_id
    FROM
        orders AS t
    WHERE
        t.order_id NOT IN (
        SELECT t.orders_id_canceled
        FROM orders_cancel AS t
    )
)
--объединяем таблицы,
--рассчитываем стоимость каждого заказа
--и количество товаров в каждом заказе
SELECT
    u.date
    ,SUM(p.price)::DECIMAL AS revenue
FROM
    unnest_table AS u
    LEFT JOIN products AS p
        ON u.product_id = p.product_id
GROUP BY
    u.date
ORDER BY
    u.date ASC;

In [None]:
###

In [None]:
--16
--отбираем id заказов, доставленных в сентябре 2022 года
WITH orders_september AS (
    SELECT
        t.order_id
        ,t.action
        ,t.time
    FROM
        courier_actions AS t
    WHERE
        t.action = 'deliver_order'
        AND (t.time BETWEEN '2022-09-01' AND '2022-10-01')
    ),
--трансформируем списки с id заказанных товаров,
--содержащиеся в признаке product_ids,
--исключив заказы, не соответствующие условиям
unnest_table AS (
    SELECT
        DISTINCT t.order_id
        ,UNNEST(product_ids) AS product_id
    FROM
        orders AS t
    WHERE
        t.order_id IN (
            SELECT t.order_id FROM orders_september AS t)
    GROUP BY
        t.order_id
        ,UNNEST(product_ids)
)
--определяем 10 самых популярных товаров в таблице orders
SELECT
    p.name
    ,COUNT(t.product_id) AS times_purchased
FROM
    unnest_table AS t
    LEFT JOIN products AS p
        ON t.product_id = p.product_id
GROUP BY
    p.name
    ,t.product_id
ORDER BY
    COUNT(t.product_id) DESC
LIMIT 10;

In [None]:
###

In [None]:
--17
--рассчитываем долю отмененных заказов
--каждого пользователя
WITH users_cancel_rate AS (
    SELECT
        t.user_id
        ,ROUND(COUNT(t.order_id) FILTER (WHERE t.action = 'cancel_order') * 1.0
        / COUNT(t.order_id) FILTER (WHERE t.action = 'create_order'), 3) AS cancel_rate
    FROM
        user_actions AS t
    GROUP BY
        t.user_id
    ORDER BY
        t.user_id
),
--таблицу с расчитанной долей отмены заказов каждого пользовтаеля
--объединим с таблицей с информацией о пользователях
users_sex AS (
    SELECT
        t.user_id
        ,t.cancel_rate
        ,u.sex
    FROM
        users_cancel_rate AS t
        LEFT JOIN users AS u
            ON t.user_id = u.user_id
)
--рассчитываем среднее значение доли отмена заказов
--в зависимости от пола пользователя
SELECT
    COALESCE(t.sex, 'unknown') AS sex
    ,ROUND(AVG(t.cancel_rate), 3) AS avg_cancel_rate
FROM
    users_sex AS t
GROUP BY
    t.sex
ORDER BY
    t.sex ASC;

In [None]:
###

In [None]:
--18
--отфильтровываем из таблицы courier_actions
--только те заказы, которые были доставлены
WITH deliver_orders AS (
    SELECT
        t.order_id
        ,t.time AS time_deliver
    FROM
        courier_actions AS t
    WHERE
        t.action = 'deliver_order'
)
--объединим таблицы deliver_orders и orders,
--оставив только order_id из таблицы deliver_orders
SELECT
    t.order_id
FROM deliver_orders AS t
    INNER JOIN orders AS o
        ON t.order_id = o.order_id
ORDER BY
    t.time_deliver - o.creation_time DESC
LIMIT 10;

In [None]:
###

In [None]:
--19
--'разворачиваем' id товаров, представленные
--в признаке 'product_ids' таблицы orders
WITH product_id_list AS (
SELECT
    t.order_id
    ,UNNEST(t.product_ids) AS product_id
FROM
    orders AS t
)
--объединяем таблицы product_id_list и products
--для присовения каждому product_id названия товара
SELECT
    t.order_id
    ,ARRAY_AGG(p.name) AS product_names
FROM
    product_id_list AS t
    LEFT JOIN products AS p
        ON t.product_id = p.product_id
GROUP BY
    t.order_id
LIMIT 1000;

In [None]:
###

In [None]:
--20
--отберем самую позднюю регистрируемую в базе данных
--дату наблюдений активности пользователей
WITH last_date AS (
    SELECT
        MAX(t.time) AS max_time
    FROM
        user_actions AS t
),
--выясним, сколько товаров входит в самый большой заказ
max_size_order AS (
SELECT
    ARRAY_LENGTH(t.product_ids, 1) AS max_size
FROM
    orders AS t
ORDER BY
    ARRAY_LENGTH(t.product_ids, 1) DESC
LIMIT 1),
--отберем из таблицы orders id всех заказов
--с числом товаров, равным максимальному
orders_with_max_size AS (
SELECT
    t.order_id AS max_orders_id
FROM
    orders As t
WHERE
    ARRAY_LENGTH(t.product_ids, 1) = (
        SELECT t.max_size
        FROM max_size_order AS t)
)
--выведем информацию о тех, кто заказывал и доставлял
--самые большие заказы
SELECT
    t.order_id
    ,users.user_id
    ,DATE_PART(
        'year', AGE(
            (SELECT t.max_time FROM last_date AS t),
            users.birth_date))::INT AS user_age
    ,couriers.courier_id
    ,DATE_PART(
        'year', AGE(
            (SELECT t.max_time FROM last_date AS t),
            couriers.birth_date))::INT AS courier_age
FROM
    courier_actions As t
    LEFT JOIN user_actions AS u
        ON t.order_id = u.order_id
    LEFT JOIN users
        ON users.user_id = u.user_id
    LEFT JOIN couriers
        ON couriers.courier_id = t.courier_id
WHERE t.action = 'deliver_order'
    AND t.order_id IN (
        SELECT t.max_orders_id
        FROM orders_with_max_size AS t
        )
ORDER BY
    t.order_id;

In [None]:
###