* (1) [Session Analysis](./SQL/SessionAnalysis.sql): Summarize how customers interact with pages and products during their sessions 
* (2) [Product Performance](./SQL/ProductPerformance.sql): Summarize how customers interact with different product categories and individual products, which can be used to support marketing strategies, inventory management, and product development.
* (3) [Trend Analysis](./SQL/TrendAnalysis.sql): Track changes in the popularity of different product categories / products and colors to plan inventory and marketing for the season.



**(1) Session Analysis**

This section focuses on understanding how customers interact with pages and products during their sessions 

Key questions:

- On average, how many pages do users visit in a session?
- On average, how many products do users visit in a session?
- What percentage of sessions are single-page sessions?
- Which products are frequently reviewed together? (Find products that are frequently reviewed in the same session.)
- Impact of product price on session counts.
- Impact of model photography on session counts.


In [None]:
-- On average, how many pages do users visit in a session?

SELECT AVG(page_count) AS avg_page_count 
FROM(
	SELECT session_id, COUNT(page_num) AS page_count
	FROM clickstream
	GROUP BY session_id
) 

In [None]:
-- On average, how many products do users visit in a session?

SELECT AVG(product_count) as avg_product_count
FROM (
SELECT session_id, COUNT(DISTINCT(product_id)) as product_count
FROM clickstream
GROUP BY session_id
ORDER BY session_id
)

In [None]:
-- What percentage of sessions are single-page sessions?

SELECT 
    (COUNT(single_page_sessions.session_id) * 100.0 / COUNT(all_sessions.session_id)) AS percentage_single_page_sessions
FROM
    (SELECT session_id FROM clickstream GROUP BY session_id HAVING COUNT(page_num) = 1) AS single_page_sessions,
    (SELECT session_id FROM clickstream GROUP BY session_id) AS all_sessions;

In [None]:
-- Which products are frequently reviewed together? (Find products that are frequently reviewed in the same session.)
SELECT a.product_id AS product1, b.product_id AS product_2, count(*) as frequency
FROM (SELECT session_id, product_id FROM clickstream) a
JOIN (SELECT session_id, product_id FROM clickstream) b
ON a.session_id = b.session_id AND a.product_id < b.product_id
GROUP BY a.product_id, b.product_id
ORDER BY frequency DESC
LIMIT 10;

In [None]:
-- Impact of product price on session count
SELECT product_id,price, count(*) as counts
FROM clickstream
GROUP BY product_id,price
ORDER BY counts DESC

In [None]:
-- Impact of model photography on session count
SELECT model_photography_id, count(*) as counts
FROM clickstream
GROUP BY model_photography_id
ORDER BY counts DESC

**(2) Product Performance**

This analysis helps understand how customers interact with different product categories and individual products, which can be used to support marketing strategies, inventory management, and product development.

Key questions:

- What's the most popular product categoey?
- What 's the most popular product categoey in each country?
- What's the most popular product?
- What's the most popular product in each country?
- For each product category, which product is most popular? 
- How does the color preference vary by product category?



In [None]:
-- What's the most popular (frequently reviewed) product categoey?
SELECT category_id, COUNT(*) as category_count
FROM clickstream
GROUP BY category_id
ORDER BY category_count DESC
LIMIT 10;

In [None]:
-- What 's the most popular product categoey in each country?
WITH CatCounts AS (
	SELECT country_id, category_id, COUNT(*) as category_count
	FROM clickstream
	GROUP BY country_id, category_id
),
RankedCat AS (
	SELECT country_id, category_id, category_count,
	RANK() OVER (PARTITION BY country_id ORDER BY category_count DESC) as rank
	FROM CatCounts
)

SELECT con.country,cat.category,r.category_count
FROM RankedCat r
JOIN category_dim cat ON r.category_id = cat.category_id
JOIN country_dim con on r.country_id = con.country_id
WHERE r.rank = 1
ORDER BY con.country;


In [None]:
-- What's the most popular product?
SELECT product_id, COUNT(*) as product_count
FROM clickstream
GROUP BY product_id
ORDER BY product_count DESC
LIMIT 10;

In [None]:
-- What's the most popular product in each country?
WITH ProductCounts AS (
    SELECT country_id, product_id, COUNT(*) as product_count
    FROM clickstream
    GROUP BY country_id, product_id
),
RankedProducts AS (
    SELECT country_id, product_id, product_count,
    RANK() OVER (PARTITION BY country_id ORDER BY product_count DESC) as rank
    FROM ProductCounts
)
SELECT con.country,r.product_id, r.product_count
FROM RankedProducts r
LEFT JOIN country_dim con ON r.country_id = con.country_id
WHERE r.rank = 1;

In [None]:
-- For each product category, which product is most popular? 

WITH product_cat_counts AS (
	SELECT category_id, product_id, count(*) as product_count
	FROM clickstream
	GROUP BY category_id, product_id
),
product_cat_counts_rank AS (
	SELECT category_id, product_id,product_count,
	RANK() OVER (PARTITION BY category_id ORDER BY product_count DESC) as rank
	FROM product_cat_counts
)

SELECT cat.category,r.product_id, r.product_count
FROM product_cat_counts_rank r
LEFT JOIN category_dim cat ON r.category_id=cat.category_id
WHERE r.rank=1

In [None]:
-- How does the color preference vary by product category?

WITH cat_color_counts AS (
	SELECT category_id, color_id, count(*) as total_counts
	FROM clickstream
	GROUP BY category_id, color_id
	ORDER BY category_id, total_counts DESC
),
cat_color_counts_rank AS (
	SELECT category_id, color_id, total_counts, Rank() OVER (PARTITION BY category_id ORDER BY total_counts DESC) as rank
	FROM cat_color_counts
)

SELECT cat.category, col.color,r.total_counts
FROM cat_color_counts_rank r
LEFT JOIN category_dim cat ON r.category_id = cat.category_id 
LEFT JOIN color_dim col ON r.color_id = col.color_id
WHERE r.rank=1;


(3) Trend Analysis

Trend Analysis tracks changes in the popularity of different product categories / products and colors to plan inventory and marketing for the season.

Key questions:

- Monthly session trends (Number of website visits per month)
- Daily session trends (Number of website visits per day)
- Are there specific days of the week when session activity peaks?
- How do product preferences change from April to August?
- How do product preferences change daily?



In [None]:
-- Monthly session trends (Number of website visits per month)
SELECT month,COUNT(DISTINCT(session_id))
FROM clickstream
GROUP BY month
ORDER BY month

In [None]:
-- Daily session trends (Number of website visits per day)
SELECT date_id,COUNT(DISTINCT(session_id))
FROM clickstream
GROUP BY date_id
ORDER BY date_id

In [None]:
-- Are there specific days of the week when session activity peaks?

SELECT day_of_week,COUNT(day_of_week) AS day_of_week_count FROM calendar
RIGHT JOIN clickstream
ON clickstream.date_id=calendar.date_id
GROUP BY day_of_week
ORDER BY day_of_week_count DESC

In [None]:
-- How do product preference change from April to August?
SELECT month, product_id, count(*) AS total_count
FROM clickstream
WHERE month between 4 AND 8
GROUP BY month, product_id
ORDER BY month, product_id DESC;

In [None]:
-- How do product preferences change from April to August?
SELECT product_id, 
	SUM (CASE WHEN month = 4 THEN 1 ELSE 0 END) AS "April_views",
	SUM (CASE WHEN month = 5 THEN 1 ELSE 0 END) AS "May_views",
	SUM (CASE WHEN month = 6 THEN 1 ELSE 0 END) AS "June_views",
	SUM (CASE WHEN month = 7 THEN 1 ELSE 0 END) AS "July_views",
	SUM (CASE WHEN month = 8 THEN 1 ELSE 0 END) AS "August_views"
FROM clickstream
GROUP BY product_id
ORDER BY product_id

In [None]:
-- How do product preferences change daily?
WITH product_views AS (
	SELECT date_id, category_id,product_id, count(*) AS total_views
	FROM clickstream
	GROUP BY date_id, category_id,product_id
	ORDER BY date_id, category_id,product_id DESC
)
SELECT p.date_id, c.category, p.product_id, p.total_views
FROM product_views p
LEFT JOIN category_dim c
ON p.category_id = c.category_id;