Task 4

Overview: Create the following reports based on the gold layer data:
- Create a report that shows summarized number of sales and revenue for
each state
- Create a report that shows top 3 customers for each state based on the
spent amount and their most expensive item bought
- Create a report that shows sales by product category per month for all
customers who live in the state of NY for 3 different age group 0-30yo, 30-
60yo and above 60 yo

In [0]:
%sql

---------------
---- Task 4 ---
---------------

-- SUM num of sales and revenue by state
SELECT 
    c.cust_address_state_province AS state,
    COUNT(f.order_id) AS total_sales,
    SUM(f.unit_price * f.quantity) AS total_revenue
FROM de_pyspark_training_catalog.buddy_group_1.amanolov_gold_fact_orders_exam f
JOIN de_pyspark_training_catalog.buddy_group_1.amanolov_gold_dim_customers_exam c
    ON f.cust_sk = c.cust_sk
GROUP BY c.cust_address_state_province
ORDER BY total_sales DESC;

state,total_sales,total_revenue
WI,14,112335.07
NY,7,1074.67
IA,4,5565.02
MI,2,416.58
IN,2,14138.88
MN,1,115.62
PA,1,105.0


In [0]:
%sql

---------------
---- Task 4 ---
---------------

-- Top 3 customers for each state based on the spent amount and their most expensive item bought
WITH customer_sales AS (
    SELECT 
        c.cust_address_state_province AS state,
        c.cust_sk,
        c.cust_first_name,
        c.cust_last_name,
        SUM(f.unit_price * f.quantity) AS total_spent,
        MAX(f.unit_price) AS most_expensive_item
    FROM de_pyspark_training_catalog.buddy_group_1.amanolov_gold_fact_orders_exam f
    JOIN de_pyspark_training_catalog.buddy_group_1.amanolov_gold_dim_customers_exam c
        ON f.cust_sk = c.cust_sk
    GROUP BY c.cust_address_state_province, c.cust_sk, c.cust_first_name, c.cust_last_name
),

ranked_customers AS (
    SELECT 
        state,
        cust_first_name,
        cust_last_name,
        total_spent,
        most_expensive_item,
        ROW_NUMBER() OVER (PARTITION BY state ORDER BY total_spent DESC) AS rank
    FROM customer_sales
)

SELECT 
    state,
    cust_first_name,
    cust_last_name,
    total_spent,
    most_expensive_item
FROM ranked_customers
WHERE rank <= 3
ORDER BY state, rank;

state,cust_first_name,cust_last_name,total_spent,most_expensive_item
IA,Sivaji,Landis,5565.02,74.0
IN,Harrison,Pacino,14080.0,880.0
IN,Constantin,Welles,58.88,14.72
MI,Meg,Derek,227.2,45.44
MI,Kyle,Schneider,189.38,94.69
MN,Dheeraj,Alexander,115.62,19.27
NY,Lauren,Hershey,584.9,72.5
NY,Harry dean,Forrest,349.65,48.5
NY,Blake,Seignier,81.30000000000001,13.55
PA,Fred,Lithgow,105.0,35.0


In [0]:
%sql

---------------
---- Task 4 ---
---------------

-- Sales by product category per month for all customers
-- who live in the state of NY for 3 different age group 0-30yo, 30-60yo and above 60 yo

WITH customers_ny AS (
    SELECT 
        cust_sk,
        cust_first_name,
        cust_last_name,
        date_of_birth,
        cust_address_state_province,
        FLOOR(DATEDIFF(CURRENT_DATE, date_of_birth) / 365.25) AS age
    FROM de_pyspark_training_catalog.buddy_group_1.amanolov_gold_dim_customers_exam
    WHERE cust_address_state_province = 'NY'
),

customers_with_age_group AS (
    SELECT *,
           CASE 
               WHEN age <= 30 THEN '0-30'
               WHEN age > 30 AND age <= 60 THEN '31-60'
               ELSE '61+'
           END AS age_group
    FROM customers_ny
),

sales_per_category AS (
    SELECT 
        DATE_FORMAT(f.order_date, 'yyyy-MM') AS order_month,
        p.category_name,
        c.age_group,
        SUM(CAST(f.unit_price AS DOUBLE) * CAST(f.quantity AS DOUBLE)) AS total_sales
    FROM de_pyspark_training_catalog.buddy_group_1.amanolov_gold_fact_orders_exam f
    JOIN de_pyspark_training_catalog.buddy_group_1.amanolov_gold_dim_products_exam p
        ON f.product_sk = p.product_sk
    JOIN customers_with_age_group c
        ON f.cust_sk = c.cust_sk
    WHERE f.order_date IS NOT NULL 
    GROUP BY order_month, p.category_name, c.age_group
)

SELECT * 
FROM sales_per_category
ORDER BY order_month, category_name, age_group;

order_month,category_name,age_group,total_sales
2022-07,hardware3,61+,81.30000000000001
2022-09,hardware3,61+,155.65
2024-04,hardware1,61+,45.08
2024-08,hardware3,61+,149.9
2024-10,hardware2,61+,13.74
2025-01,hardware3,61+,435.0
2025-04,hardware2,61+,194.0
