In [1]:
%%sql
-- 1) Calculate total daily sales for each city and a 7-day rolling average.
SELECT
    f.order_date,
    l.city,
    SUM(f.sales_amount) AS total_sales,
    AVG(SUM(f.sales_amount)) OVER (
        PARTITION BY l.city
        ORDER BY f.order_date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS rolling_7day_avg
FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
JOIN `sweetcoffeetree`.`dim_locations` l
    ON f.location_id = l.location_id
GROUP BY
    f.order_date,
    l.city
ORDER BY
    l.city,
    f.order_date;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 2, Finished, Available, Finished)

<Spark SQL result set with 1000 rows and 4 fields>

In [2]:
%%sql
-- 2) For each month, rank products by total sales amount, with 1 being the highest.
WITH monthly_sales AS (
    SELECT
        DATE_TRUNC('month', f.order_date) AS sales_month,
        f.product_name,
        SUM(f.sales_amount) AS total_sales
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    GROUP BY
        DATE_TRUNC('month', f.order_date),
        f.product_name
)
SELECT
    sales_month,
    product_name,
    total_sales,
    RANK() OVER (PARTITION BY sales_month ORDER BY total_sales DESC) AS sales_rank
FROM monthly_sales
ORDER BY sales_month, sales_rank;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 3, Finished, Available, Finished)

<Spark SQL result set with 312 rows and 4 fields>

In [3]:
%%sql
-- 3) Find the locations in each season with the highest average discount, limited to top 3.
WITH season_discount AS (
    SELECT
        l.city,
        l.state,
        f.season,
        AVG(f.discount_percentage) AS avg_discount
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    JOIN `sweetcoffeetree`.`dim_locations` l
        ON f.location_id = l.location_id
    GROUP BY
        l.city,
        l.state,
        f.season
)
SELECT
    city,
    state,
    season,
    avg_discount,
    discount_rank
FROM (
    SELECT
        city,
        state,
        season,
        avg_discount,
        DENSE_RANK() OVER (PARTITION BY season ORDER BY avg_discount DESC) AS discount_rank
    FROM season_discount
) t
WHERE discount_rank <= 3
ORDER BY season, discount_rank;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 4, Finished, Available, Finished)

<Spark SQL result set with 12 rows and 5 fields>

In [4]:
%%sql
-- 4) Compare actual daily sales to standard_price and standard_cost, to show total margin.
--    Join on product_name and date range.
SELECT
    f.order_date,
    f.product_name,
    p.standard_price,
    p.standard_cost,
    SUM(f.quantity) AS total_quantity_sold,
    SUM(f.sales_amount) AS total_sales_amount,
    (p.standard_price - p.standard_cost) * SUM(f.quantity) AS theoretical_margin
FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
JOIN `sweetcoffeetree`.`dim_products` p
    ON f.product_name = p.name
    AND f.order_date BETWEEN p.from_date AND p.to_date
GROUP BY
    f.order_date,
    f.product_name,
    p.standard_price,
    p.standard_cost
ORDER BY
    f.order_date,
    f.product_name;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 5, Finished, Available, Finished)

<Spark SQL result set with 1000 rows and 7 fields>

In [5]:
%%sql
-- 5) Use a window function to calculate a 30-day rolling total quantity sold per city.
WITH daily_city_qty AS (
    SELECT
        f.order_date,
        l.city,
        SUM(f.quantity) AS daily_qty
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    JOIN `sweetcoffeetree`.`dim_locations` l
        ON f.location_id = l.location_id
    GROUP BY
        f.order_date,
        l.city
)
SELECT
    order_date,
    city,
    daily_qty,
    SUM(daily_qty) OVER (
        PARTITION BY city
        ORDER BY order_date
        ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
    ) AS rolling_30day_qty
FROM daily_city_qty
ORDER BY city, order_date;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 6, Finished, Available, Finished)

<Spark SQL result set with 1000 rows and 4 fields>

In [6]:
%%sql
-- 6) Create or replace a table that stores monthly revenue by product category.
CREATE OR REPLACE TABLE sweetcoffeetree.snowflake_comparison_3b_large AS
WITH monthly_cat AS (
    SELECT
        DATE_TRUNC('month', f.order_date) AS sales_month,
        p.category,
        SUM(f.sales_amount) AS monthly_revenue
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    JOIN `sweetcoffeetree`.`dim_products` p
        ON f.product_name = p.name
        AND f.order_date BETWEEN p.from_date AND p.to_date
    GROUP BY
        DATE_TRUNC('month', f.order_date),
        p.category
)
SELECT
    sales_month,
    category,
    monthly_revenue
FROM monthly_cat;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 7, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [7]:
%%sql
-- 7) Compare total sales by location in 2023 vs. 2024.
WITH yearly_sales AS (
    SELECT
        l.location_id,
        l.city,
        l.state,
        YEAR(f.order_date) AS sales_year,
        SUM(f.sales_amount) AS total_sales_year
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    JOIN `sweetcoffeetree`.`dim_locations` l
        ON f.location_id = l.location_id
    GROUP BY
        l.location_id,
        l.city,
        l.state,
        YEAR(f.order_date)
)
SELECT
    city,
    state,
    SUM(CASE WHEN sales_year = 2023 THEN total_sales_year ELSE 0 END) AS sales_2023,
    SUM(CASE WHEN sales_year = 2024 THEN total_sales_year ELSE 0 END) AS sales_2024,
    (SUM(CASE WHEN sales_year = 2024 THEN total_sales_year ELSE 0 END)
     - SUM(CASE WHEN sales_year = 2023 THEN total_sales_year ELSE 0 END)) AS yoy_diff
FROM yearly_sales
GROUP BY
    city,
    state
ORDER BY
    city,
    state;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 8, Finished, Available, Finished)

<Spark SQL result set with 4 rows and 5 fields>

In [8]:
%%sql
-- 8) For each city and quarter, rank subcategories by total sales amount.
WITH city_quarter_subcat AS (
    SELECT
        l.city,
        DATE_TRUNC('quarter', f.order_date) AS sales_quarter,
        p.subcategory,
        SUM(f.sales_amount) AS total_sales
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    JOIN `sweetcoffeetree`.`dim_locations` l
        ON f.location_id = l.location_id
    JOIN sweetcoffeetree.dim_products p
        ON f.product_name = p.name
        AND f.order_date BETWEEN p.from_date AND p.to_date
    GROUP BY
        l.city,
        DATE_TRUNC('quarter', f.order_date),
        p.subcategory
)
SELECT
    city,
    sales_quarter,
    subcategory,
    total_sales,
    RANK() OVER (PARTITION BY city, sales_quarter ORDER BY total_sales DESC) AS subcat_rank
FROM city_quarter_subcat
ORDER BY city, sales_quarter, subcat_rank;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 9, Finished, Available, Finished)

<Spark SQL result set with 96 rows and 5 fields>

In [9]:
%%sql
-- 9) Show average discount by day, and a running cumulative average discount per city.
WITH daily_discount AS (
    SELECT
        l.city,
        f.order_date,
        AVG(f.discount_percentage) AS avg_discount
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    JOIN `sweetcoffeetree`.`dim_locations` l
        ON f.location_id = l.location_id
    GROUP BY
        l.city,
        f.order_date
)
SELECT
    city,
    order_date,
    avg_discount,
    AVG(avg_discount) OVER (
        PARTITION BY city
        ORDER BY order_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_avg_discount
FROM daily_discount
ORDER BY city, order_date;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 10, Finished, Available, Finished)

<Spark SQL result set with 1000 rows and 4 fields>

In [10]:
%%sql
-- 10) Create/replace table for 90-day rolling count of distinct orders in each city.
CREATE OR REPLACE TABLE 
sweetcoffeetree.snowflake_comparison_3b_large AS
WITH daily_orders AS (
    SELECT
        f.order_date,
        l.city,
        COUNT(DISTINCT f.order_id) AS daily_distinct_orders
    FROM `sweetcoffeetree`.`facts_sales_3b_rows` f
    JOIN `sweetcoffeetree`.`dim_locations` l
        ON f.location_id = l.location_id
    GROUP BY
        f.order_date,
        l.city
)
SELECT
    order_date,
    city,
    daily_distinct_orders,
    SUM(daily_distinct_orders) OVER (
        PARTITION BY city
        ORDER BY order_date
        ROWS BETWEEN 89 PRECEDING AND CURRENT ROW
    ) AS rolling_90d_distinct_orders
FROM daily_orders
ORDER BY city, order_date;

StatementMeta(, d3f32bae-668f-4766-8172-75d986a2197d, 11, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>