


### Part-to-Whole Analysis

Objective:
- Analyze and contrast key metrics between different segments and timeframes
- Identify variations among distinct groupings
- Useful for A/B testing or regional comparisons.

Key SQL Functions:
- Aggregate functions (SUM(), AVG()) to consolidate data points
- Partitioning with window functions like SUM() OVER() to calculate totals

-- What is the sales distribution across product categories?


In [1]:
# Import required libraries
import pandas as pd
from sqlalchemy import create_engine
%load_ext sql
from IPython.display import Image, display

# Configure pandas display format
pd.options.display.float_format = '{:.2f}'.format

# Connect to PostgreSQL database with password
%sql postgresql://postgres:legacy@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Test the connection with a simple query
%sql SELECT version();

Unnamed: 0,version
0,"PostgreSQL 17.4 on x86_64-windows, compiled by..."


In [2]:
%%sql
WITH category_sales AS (SELECT
categoryname, 
sum (quantity * netprice * exchangerate) as sales
from sales s
left join product p on p.productkey = s.productkey
group by categoryname)

SELECT 
categoryname,
sales,
sum (sales) over () as total_sales,
(sales / sum (sales) over ()) *100 as perc_of_total
from category_sales
order by perc_of_total DESC


Unnamed: 0,categoryname,sales,total_sales,perc_of_total
0,Computers,90619022.05,206407538.58,43.9
1,Cell phones,32624265.72,206407538.58,15.81
2,Home Appliances,26607245.54,206407538.58,12.89
3,TV and Video,20466861.38,206407538.58,9.92
4,Cameras and camcorders,18520360.66,206407538.58,8.97
5,"Music, Movies and Audio Books",10588311.0,206407538.58,5.13
6,Audio,5312898.1,206407538.58,2.57
7,Games and Toys,1668574.13,206407538.58,0.81


In [4]:
import plotly.express as px

# Create horizontal bar chart
fig = px.bar(
    _,  # Using the result from SQL query
    x='sales',
    y='categoryname',
    orientation='h',  # Makes the bars horizontal
    title='Sales Distribution by Product Category',
    text='perc_of_total',  # Show percentage on the bars
    color='sales',  # Color bars based on sales value
    color_continuous_scale='Viridis'  # Choose a color scale
)

# Update layout
fig.update_layout(
    title_x=0.5,  # Center the title
    title_font_size=20,
    xaxis_title="Sales ($)",
    yaxis_title="Category",
    yaxis=dict(
        categoryorder='total ascending'  # Sort bars by value
    )
)

# Format the percentage text on bars
fig.update_traces(
    texttemplate='%{text:.1f}%',  # Show percentage with 1 decimal place
    textposition='auto'
)

fig.show()

### Pivoting the data

In [5]:
%%sql

SELECT
categoryname,
    count (distinct CASE WHEN  cost < 100  then productkey  END) AS below_100,
    count (distinct CASE WHEN  cost between  100 and 500 then  productkey END) AS btw_100_and_500,
    count (distinct CASE WHEN  cost > 1000   then productkey  END) AS above_1000
FROM product
GROUP BY
categoryname 
limit 10


Unnamed: 0,categoryname,below_100,btw_100_and_500,above_1000
0,Audio,110,5,0
1,Cameras and camcorders,176,181,0
2,Cell phones,144,141,0
3,Computers,375,213,0
4,Games and Toys,165,1,0
5,Home Appliances,292,273,14
6,"Music, Movies and Audio Books",90,0,0
7,TV and Video,36,174,0
