

#### Data Segmentation Analysis

Purpose:
- To group data into meaningful categories for targeted insights.
- For customer segmentation, product categorization, or regional analysis.

SQL Functions Used:
- CASE: Defines custom segmentation logic.
- GROUP BY: Groups data into segments.


Segment products into cost ranges and 
count how many products fall into each segment*/

In [None]:
# Import required libraries
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv
%load_ext sql
from IPython.display import Image, display

# Load environment variables
load_dotenv()

# Configure pandas display format
pd.options.display.float_format = '{:.2f}'.format

# Get database credentials from environment variables
DB_PASSWORD = os.getenv('DB_PASSWORD')

# Set the DATABASE_URL environment variable explicitly
os.environ['DATABASE_URL'] = f"postgresql://postgres:{DB_PASSWORD}@localhost:5432/contoso_100k"

# Connect using the environment variable
%sql ${DATABASE_URL}

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Test the connection with a simple query
%sql SELECT version();

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


Unnamed: 0,version
0,"PostgreSQL 17.4 on x86_64-windows, compiled by..."


In [3]:
%%sql
With product_segment AS (
select 
cost,
productkey,
productname,
case when cost <100 then 'Below 100'
    when cost BETWEEN 100  and  500  then '100 - 500'
    when cost  between 500  and 1000  then '500 - 1000'
    else  'Above 1000' end cost_range
from product                                  
)

Select 
cost_range,
COUNT(productkey) as total_products
from product_segment
group by cost_range
order by total_products DESC


Unnamed: 0,cost_range,total_products
0,Below 100,1388
1,100 - 500,988
2,500 - 1000,127
3,Above 1000,14


In [4]:
import plotly.express as px

# Create pie chart
fig = px.pie(
    _,  # Using the result from SQL query stored in _
    values='total_products',
    names='cost_range',
    title='Product Distribution by Cost Range',
    hole=0.3,  # Makes it a donut chart, remove this line for a regular pie chart
    color_discrete_sequence=px.colors.qualitative.Set3  # Using a nice color palette
)

# Update layout
fig.update_layout(
    title_x=0.5,  # Center the title
    title_font_size=20,
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Add percentage to labels
fig.update_traces(
    textposition='inside',
    textinfo='percent+label'
)

fig.show()