Part 2: SQL Querying

In [7]:
engine = create_engine("sqlite:///sales_dw.db", echo=False)


#1. Find the total sales for each product
qyery = """
SELECT 
    p.ProductID,
    p.ProductName,
    SUM(f.TotalSales) AS TotalSales
FROM fact_sales f
JOIN dim_product p ON f.ProductID = p.ProductID
GROUP BY p.ProductID, p.ProductName
ORDER BY TotalSales DESC;"""

result_df = pd.read_sql_query(qyery, con=engine)
display(result_df)



Unnamed: 0,ProductID,ProductName,TotalSales
0,9,Gadget I,810.0
1,8,Widget H,432.0
2,10,Gizmo J,396.0
3,5,Widget E,204.0
4,3,Widget C,136.0
5,4,Gizmo D,130.0
6,6,Gadget F,126.0
7,2,Gadget B,90.0
8,1,Widget A,65.0
9,7,Gizmo G,48.0


In [8]:
#2. List the top 5 selling categories.
qyery = """
SELECT 
    p.Category,
    SUM(f.TotalSales) AS CategorySales
FROM fact_sales f
JOIN dim_product p ON f.ProductID = p.ProductID
GROUP BY p.Category
ORDER BY CategorySales DESC
LIMIT 5;
"""

result_df = pd.read_sql_query(qyery, con=engine)
display(result_df)

Unnamed: 0,Category,CategorySales
0,Gadgets,1091.0
1,Widgets,772.0
2,Gizmos,574.0


In [None]:
# 3. Find the product with the highest sale (volume and value) in each category for
# the last quarter (assume the last quarter parameters).

query = """

SELECT *  FROM (
    SELECT 
        p.Category,
        p.ProductName,
        SUM(f.TotalSales) AS TotalSales,
        ROW_NUMBER() OVER (PARTITION BY p.Category ORDER BY SUM(f.TotalSales) DESC) AS rn
    FROM fact_sales f
    JOIN dim_product p ON f.ProductID = p.ProductID
    JOIN dim_date d ON f.DateID = d.DateID
    WHERE d.Year = 2023 
    AND d.Quarter = 1
    GROUP BY p.Category, p.ProductName
    ) WHERE rn = 1
"""

result_df = pd.read_sql_query(query, con=engine)
display(result_df)

Unnamed: 0,Category,ProductName,TotalSales,rn
0,Gadgets,Gadget I,810.0,1
1,Gizmos,Gizmo J,396.0,1
2,Widgets,Widget H,432.0,1


In [10]:
# 4. Find the total sales made by each customer and list the top 3 customers based on TotalSales.

query = """
SELECT 
    c.CustomerID,
    c.CustomerName,
    SUM(f.TotalSales) AS TotalSales
FROM fact_sales f
JOIN dim_customer c ON f.CustomerID = c.CustomerID
GROUP BY c.CustomerID, c.CustomerName
ORDER BY TotalSales DESC
LIMIT 3;
"""

result_df = pd.read_sql_query(query, con=engine)
display(result_df)

Unnamed: 0,CustomerID,CustomerName,TotalSales
0,102,Jane Smith,335.0
1,101,John Doe,284.0
2,103,Michael Johnson,234.0
