# **<font color='crimson'>SQL. Data Aggregation and Grouping</font>**

---

# **Data Aggregation**

---

In [None]:
###

In [None]:
--1
SELECT
    DISTINCT t.user_id
FROM user_actions AS t
ORDER BY t.user_id ASC;

In [None]:
###

In [None]:
--2
SELECT DISTINCT
    t.courier_id,
    t.order_id
FROM
    courier_actions AS t
ORDER BY
    t.courier_id ASC,
    t.order_id ASC;

In [None]:
###

In [None]:
--3
SELECT
    MAX(t.price) AS max_price,
    MIN(t.price) AS min_price
FROM
    products AS t;

In [None]:
###

In [None]:
--4
SELECT
    COUNT(*) AS dates,
    COUNT(t.birth_date) AS dates_not_null
FROM users AS t;

In [None]:
###

In [None]:
--5
SELECT
    COUNT(user_id) AS users,
    COUNT(DISTINCT user_id) AS unique_users
FROM user_actions AS t;

In [None]:
###

In [None]:
--6
SELECT
    COUNT(courier_id) AS couriers
FROM
    couriers AS t
WHERE
    t.sex = 'female';

In [None]:
###

In [None]:
--7
SELECT
    MIN(t.time) AS first_delivery,
    MAX(t.time) AS last_delivery
FROM
    courier_actions AS t
WHERE
    t.action LIKE 'deliver_order';

In [None]:
###

In [None]:
--8
SELECT
    SUM(t.price) AS order_price
FROM
    products AS t
WHERE
    t.name LIKE 'сухарики'
    OR t.name LIKE 'чипсы'
    OR t.name LIKE 'энергетический напиток';

In [None]:
SELECT
    SUM(t.price) AS order_price
FROM
    products AS t
WHERE
    t.name IN (
        'сухарики',
        'чипсы',
        'энергетический напиток');

In [None]:
###

In [None]:
--9
SELECT
    COUNT(t.order_id) AS orders
FROM
    orders AS t
WHERE
    ARRAY_LENGTH(t.product_ids, 1) >=9;

In [None]:
###

In [None]:
--10
SELECT
    MIN(AGE(current_date, t.birth_date))::VARCHAR AS min_age
FROM
    couriers AS t
WHERE t.sex LIKE 'male';

In [None]:
###

In [None]:
--11
SELECT
    SUM(
        CASE
        WHEN t.name = 'сухарики' THEN t.price * 3
        WHEN t.name = 'чипсы' THEN t.price * 2
        WHEN t.name = 'энергетический напиток' THEN t.price
    END) AS order_price
FROM
    products AS t
WHERE
    t.name IN (
        'сухарики',
        'чипсы',
        'энергетический напиток');

In [None]:
###

In [None]:
--12
SELECT ROUND(AVG(t.price), 2) AS avg_price
FROM products AS t
WHERE (t.name LIKE '%чай%'
       OR t.name LIKE '%кофе%')
  AND t.name not LIKE '%иван-чай%'
  AND t.name not LIKE 'чайный гриб';

In [None]:
--13
SELECT
    (AGE(MAX(t.birth_date), MIN(t.birth_date)))::VARCHAR AS age_diff
FROM
    users AS t
WHERE
    t.sex = 'male';

In [None]:
###

In [None]:
--14
SELECT
    ROUND(AVG(ARRAY_LENGTH(t.product_ids, 1)), 2) AS avg_order_size
FROM
    orders AS t
WHERE DATE_PART('dow', t.creation_time) IN (6, 0);

In [None]:
###

In [None]:
--15
SELECT
    COUNT(DISTINCT t.user_id) AS unique_users,
    COUNT(DISTINCT t.order_id) AS unique_orders,
    ROUND((COUNT(DISTINCT t.order_id) * 1.0 /
        COUNT(DISTINCT t.user_id)), 2) AS orders_per_user
FROM
    user_actions AS t;

In [None]:
###

In [None]:
--16
SELECT
    (COUNT(DISTINCT t.user_id)
        - (COUNT(DISTINCT t.user_id)
        FILTER (WHERE t.action = 'cancel_order'))) AS users_count
FROM
    user_actions AS t;

In [None]:
###

In [None]:
--17
SELECT
    COUNT(t.order_id) AS orders,
    COUNT(t.order_id)
        FILTER (WHERE ARRAY_LENGTH(t.product_ids, 1) >=5)
        AS large_orders,
    ROUND(
        (COUNT(t.order_id)
        FILTER (WHERE ARRAY_LENGTH(t.product_ids, 1) >=5)) * 1.
        / COUNT(t.order_id), 2)
        AS large_orders_share
FROM
    orders AS t;

# **Data Grouping**

---

In [None]:
###

In [None]:
--1
SELECT
    t.sex,
    COUNT(t.courier_id) AS couriers_count
FROM
    couriers AS t
GROUP BY
    t.sex
ORDER BY
    COUNT(t.courier_id) ASC;

In [None]:
###

In [None]:
--2
SELECT
    t.action,
    COUNT(t.action) AS orders_count
FROM
    user_actions AS t
GROUP BY
    t.action
ORDER BY
    COUNT(t.action);

In [None]:
###

In [None]:
--3
SELECT
    DATE_TRUNC('month', creation_time) AS month,
    COUNT(order_id) AS orders_count
FROM
    orders AS t
GROUP BY
    DATE_TRUNC('month', creation_time)
ORDER BY
    DATE_TRUNC('month', creation_time) ASC;

In [None]:
###

In [None]:
--4
SELECT
    DATE_TRUNC('month', time) AS month,
    t.action,
    COUNT(t.order_id) AS orders_count
FROM
    user_actions AS t
GROUP BY
    DATE_TRUNC('month', time),
    t.action
ORDER BY
    DATE_TRUNC('month', time) ASC,
    t.action ASC;

In [None]:
###

In [None]:
--5
SELECT
    t.sex,
    MAX(DATE_PART('month', birth_date))::integer AS max_month
FROM
    users AS t
GROUP BY
    t.sex
ORDER BY
    t.sex ASC;

In [None]:
###

In [None]:
--6
SELECT
    t.sex,
    DATE_PART('month', MAX(birth_date))::integer AS max_month
FROM
    users AS t
GROUP BY
    t.sex
ORDER BY
    t.sex ASC;

In [None]:
###

In [None]:
--7
SELECT
    t.sex,
    DATE_PART('year', AGE(current_date, MIN(birth_date)))::INTEGER AS max_age
FROM
    users AS t
GROUP BY
    t.sex
ORDER BY
    DATE_PART('year', AGE(current_date, MIN(birth_date)))::INTEGER ASC;

In [None]:
###

In [None]:
--8
SELECT
    DATE_PART('year', AGE(current_date, t.birth_date))::INTEGER AS age,
    COUNT(t.user_id) AS users_count
FROM
    users AS t
GROUP BY
    1
ORDER BY
    1 ASC;

In [None]:
###

In [None]:
--9
SELECT
    DATE_PART('year', AGE(current_date, t.birth_date))::INTEGER AS age,
    t.sex,
    COUNT(t.user_id) AS users_count
FROM
    users AS t
WHERE
    t.birth_date IS NOT NULL
GROUP BY
    1, 2
ORDER BY
    1 ASC, 2 ASC;

In [None]:
###

In [None]:
--10
SELECT
    ARRAY_LENGTH(product_ids, 1) AS order_size,
    COUNT(1) AS orders_count
FROM
    orders AS t
WHERE
    creation_time BETWEEN '2022-08-29' AND '2022-09-05'
GROUP BY
    ARRAY_LENGTH(product_ids, 1)
ORDER BY
    ARRAY_LENGTH(product_ids, 1) ASC;

In [None]:
###

In [None]:
--11
SELECT
    ARRAY_LENGTH(product_ids, 1) AS order_size,
    COUNT(1) AS orders_count
FROM
    orders AS t
WHERE
    DATE_PART('isodow', creation_time) IN (1, 2, 3, 4, 5)
GROUP BY
    ARRAY_LENGTH(product_ids, 1)
HAVING
    COUNT(1) > 2000
ORDER BY
    ARRAY_LENGTH(product_ids, 1) ASC;

In [None]:
###

In [None]:
--12
SELECT
    t.user_id,
    COUNT(1) AS created_orders
FROM
    user_actions AS t
WHERE
    DATE_PART('month', t.time) = 8
    AND DATE_PART('year', t.time) = 2022
    AND t.action = 'create_order'
GROUP BY
    t.user_id
ORDER BY
    COUNT(1) DESC,
    t.user_id ASC
LIMIT 5;

In [None]:
###

In [None]:
--13
SELECT
    t.courier_id
FROM
    courier_actions AS t
WHERE
    DATE_PART('year', t.time) = 2022
    AND DATE_PART('month', t.time) = 9
    AND t.action = 'deliver_order'
GROUP BY
    t.courier_id
HAVING
    COUNT(DISTINCT t.order_id) = 1
ORDER BY
    t.courier_id;

In [None]:
###

In [None]:
--14
SELECT
    t.user_id
FROM
    user_actions AS t
WHERE
    t.action <> 'cancel_order'
GROUP BY
    t.user_id
HAVING
    MAX(t.time) < '2022-09-08'
ORDER BY
    t.user_id;

In [None]:
###

In [None]:
--15
WITH subquery AS (
    SELECT
        t.order_id,
        CASE
        WHEN ARRAY_LENGTH(t.product_ids, 1) IN (1, 2, 3) THEN 1
        WHEN ARRAY_LENGTH(t.product_ids, 1) IN (4, 5, 6) THEN 2
        WHEN ARRAY_LENGTH(t.product_ids, 1) >= 7 THEN 3
        ELSE 0 END AS order_size
    FROM
        orders AS t
)

SELECT
    CASE
    WHEN t.order_size = 1 THEN 'Малый'
    WHEN t.order_size = 2 THEN 'Средний'
    ELSE 'Большой' END AS order_size,
    COUNT(t.order_id) AS orders_count
FROM
    subquery AS t
GROUP BY
    1
ORDER BY
    COUNT(t.order_id) ASC;

In [None]:
SELECT
    CASE
    WHEN ARRAY_LENGTH(t.product_ids, 1) <= 3 THEN 'Малый'
    WHEN ARRAY_LENGTH(t.product_ids, 1) <= 6 THEN 'Средний'
    ELSE 'Большой'
    END AS order_size,
    COUNT(t.order_id) AS orders_count
FROM
    orders AS t
GROUP BY
    order_size
ORDER BY
    COUNT(t.order_id) ASC;

In [None]:
SELECT
    CASE
    WHEN ARRAY_LENGTH(t.product_ids, 1) BETWEEN 1 AND 3 THEN 'Малый'
    WHEN ARRAY_LENGTH(t.product_ids, 1) BETWEEN 4 AND 6 THEN 'Средний'
    ELSE 'Большой'
    END AS order_size,
    COUNT(t.order_id) AS orders_count
FROM
    orders AS t
GROUP BY
    order_size
ORDER BY
    COUNT(t.order_id) ASC;

In [None]:
SELECT
    CASE
    WHEN ARRAY_LENGTH(t.product_ids, 1) >= 7 THEN 'Большой'
    WHEN ARRAY_LENGTH(t.product_ids, 1) >= 4 THEN 'Средний'
    ELSE 'Малый'
    END AS order_size,
    COUNT(t.order_id) AS orders_count
FROM
    orders AS t
GROUP BY
    order_size
ORDER BY
    COUNT(t.order_id) ASC;

In [None]:
###

In [None]:
--16
SELECT
    CASE
    WHEN DATE_PART('year', AGE(current_date, t.birth_date)) <= 24 THEN '18-24'
    WHEN DATE_PART('year', AGE(current_date, t.birth_date)) <= 29 THEN '25-29'
    WHEN DATE_PART('year', AGE(current_date, t.birth_date)) <= 35 THEN '30-35'
    ELSE '36+' END AS group_age,
    COUNT(DISTINCT user_id) AS users_count
FROM
    users AS t
WHERE
    t.birth_date IS NOT NULL
GROUP BY 1
ORDER BY 1 ASC;

In [None]:
SELECT
    CASE
    WHEN DATE_PART('year', AGE(current_date, t.birth_date))
        BETWEEN 18 AND 24 THEN '18-24'
    WHEN DATE_PART('year', AGE(current_date, t.birth_date))
        BETWEEN 25 AND 29 THEN '25-29'
    WHEN DATE_PART('year', AGE(current_date, t.birth_date))
        BETWEEN 30 AND 35 THEN '30-35'
    ELSE '36+' END AS group_age,
    COUNT(DISTINCT user_id) AS users_count
FROM
    users AS t
WHERE
    t.birth_date IS NOT NULL
GROUP BY 1
ORDER BY 1 ASC;

In [None]:
###

In [None]:
--17
SELECT
    CASE
    WHEN DATE_PART('isodow', t.creation_time)
        BETWEEN 6 AND 7 THEN 'weekend'
    ELSE 'weekdays'
    END AS week_part,
    ROUND(AVG(ARRAY_LENGTH(t.product_ids, 1)), 2) AS avg_order_size
FROM
    orders AS t
GROUP BY
    (CASE
    WHEN DATE_PART('isodow', t.creation_time)
        BETWEEN 6 AND 7 THEN 'weekend'
    ELSE 'weekdays'
    END)
ORDER BY
    ROUND(AVG(ARRAY_LENGTH(t.product_ids, 1)), 2);

In [None]:
###

In [None]:
--18
SELECT
    t.user_id
    ,COUNT(t.order_id) FILTER (WHERE t.action = 'create_order') AS orders_count
    ,ROUND(COUNT(t.order_id) FILTER (WHERE t.action = 'cancel_order') * 1.0
    / COUNT(t.order_id) FILTER (WHERE t.action = 'create_order'), 2) AS cancel_rate
FROM
    user_actions AS t
GROUP BY
    t.user_id
HAVING
    COUNT(t.order_id) FILTER (WHERE t.action = 'create_order') > 3
    AND (COUNT(t.order_id) FILTER (WHERE t.action = 'cancel_order') * 1.0
    / COUNT(t.order_id) FILTER (WHERE t.action = 'create_order')) >= 0.5
ORDER BY
    t.user_id;

In [None]:
###

In [None]:
--19
SELECT
    DATE_PART('isodow', t.time)::int AS weekday_number
    ,TO_CHAR(t.time, 'Dy') AS weekday
    ,COUNT(DISTINCT t.order_id)
        FILTER (WHERE t.action = 'create_order') AS created_orders
    ,COUNT(DISTINCT t.order_id)
        FILTER (WHERE t.action = 'cancel_order') AS canceled_orders
    ,(COUNT(DISTINCT t.order_id)
        FILTER (WHERE t.action = 'create_order') -
        COUNT(DISTINCT t.order_id)
            FILTER (WHERE t.action = 'cancel_order')) AS actual_orders
    ,ROUND((COUNT(DISTINCT t.order_id)
        FILTER (WHERE t.action = 'create_order') -
        COUNT(DISTINCT t.order_id)
            FILTER (WHERE t.action = 'cancel_order')) * 1. /
        COUNT(DISTINCT t.order_id)
            FILTER (WHERE t.action = 'create_order'), 3) AS success_rate
FROM
    user_actions AS t
WHERE
    DATE_TRUNC('day', t.time) BETWEEN '2022-08-24' AND '2022-09-06'
GROUP BY
    DATE_PART('isodow', t.time)
    ,TO_CHAR(t.time, 'Dy')
ORDER BY
    DATE_PART('isodow', t.time) ASC;