<a href="https://colab.research.google.com/github/dareoyeleke/sql_queries/blob/main/ADVANCED_SEGMENTATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [None]:
%%sql
SELECT
orderdate,
quantity,
netprice,
CASE
  WHEN quantity >= 2 AND netprice >= 50 THEN 'High Value Order' ELSE 'Standard Order' END AS order_type
FROM
  sales
LIMIT 10


Unnamed: 0,orderdate,quantity,netprice,order_type
0,2015-01-01,1,98.97,Standard Order
1,2015-01-01,1,659.78,Standard Order
2,2015-01-01,2,54.38,High Value Order
3,2015-01-01,4,286.69,High Value Order
4,2015-01-01,7,135.75,High Value Order
5,2015-01-01,3,434.3,High Value Order
6,2015-01-01,1,58.73,Standard Order
7,2015-01-01,3,74.99,High Value Order
8,2015-01-01,2,113.57,High Value Order
9,2015-01-01,1,499.45,Standard Order


In [None]:
%%sql
SELECT
orderdate,
quantity,
netprice,
CASE
  WHEN quantity >= 2 AND netprice >= 100 THEN 'Multiple High Value Order'
  WHEN netprice >= 100 THEN 'Single High Value Order'
  WHEN quantity >= 2 THEN 'Multiple Standard Items Order'
  ELSE 'Standard Order'
   END AS order_type
FROM
  sales
LIMIT 10

Unnamed: 0,orderdate,quantity,netprice,order_type
0,2015-01-01,1,98.97,Standard Order
1,2015-01-01,1,659.78,Single High Value Order
2,2015-01-01,2,54.38,Multiple Standard Items Order
3,2015-01-01,4,286.69,Multiple High Value Order
4,2015-01-01,7,135.75,Multiple High Value Order
5,2015-01-01,3,434.3,Multiple High Value Order
6,2015-01-01,1,58.73,Standard Order
7,2015-01-01,3,74.99,Multiple Standard Items Order
8,2015-01-01,2,113.57,Multiple High Value Order
9,2015-01-01,1,499.45,Single High Value Order


In [None]:
%%sql
SELECT
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS median
FROM
  sales s
WHERE
  orderdate BETWEEN '2022-01-01' AND '2023-12-31'

Unnamed: 0,median
0,398.0


In [None]:
%%sql
WITH median_value AS (SELECT
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS median
FROM
  sales s
WHERE
  orderdate BETWEEN '2022-01-01' AND '2023-12-31'
)
SELECT
p.categoryname AS category,
SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) < mv.median
    AND s.orderdate BETWEEN '2022-01-01' AND '2022-12-31'
  THEN(s.quantity * s.netprice * s.exchangerate) END ) AS low_net_revenue_2022,
SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) >= mv.median
    AND s.orderdate BETWEEN '2022-01-01' AND '2022-12-31'
  THEN(s.quantity * s.netprice * s.exchangerate) END ) AS high_net_revenue_2022,
SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) < mv.median
    AND s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
  THEN(s.quantity * s.netprice * s.exchangerate) END ) AS low_net_revenue_2023,
SUM(CASE WHEN (s.quantity * s.netprice * s.exchangerate) >= mv.median
    AND s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
  THEN(s.quantity * s.netprice * s.exchangerate) END ) AS high_net_revenue_2023
FROM
sales s
LEFT JOIN product p ON s.productkey = p.productkey,
median_value mv
GROUP BY
p.categoryname
ORDER BY
p.categoryname

Unnamed: 0,category,low_net_revenue_2022,high_net_revenue_2022,low_net_revenue_2023,high_net_revenue_2023
0,Audio,222337.83,544600.39,180251.13,508439.06
1,Cameras and camcorders,133004.54,2249528.02,104869.46,1878676.83
2,Cell phones,814449.53,7305215.55,729699.39,5272448.24
3,Computers,624340.42,17237873.07,590790.31,11060076.9
4,Games and Toys,231979.63,84147.67,206103.36,64271.6
5,Home Appliances,219797.07,6392649.61,176261.35,5743731.52
6,"Music, Movies and Audio Books",685808.49,2303488.8,574958.76,1605809.37
7,TV and Video,272338.29,5542998.32,164275.35,4247902.87


In [None]:
%%sql
WITH percentiles AS (
  SELECT
  PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS revenue_25th_percentile,
  PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS revenue_75th_percentile
  FROM
    sales s
  WHERE
    s.orderdate BETWEEN '2022-01-01' AND '2023-12-31'
)
SELECT
p.categoryname AS category,
SUM(s.quantity * s.netprice * s.exchangerate) AS total_revenue,
CASE
  WHEN(s.quantity * s.netprice * s.exchangerate) <= pctl.revenue_25th_percentile THEN '1-LOW'
  WHEN(s.quantity * s.netprice * s.exchangerate) >= pctl.revenue_75th_percentile THEN '3-HIGH'
  ELSE '2-MEDIUM'
  END AS revenue_tier
FROM
sales s
LEFT JOIN product p ON s.productkey = p.productkey,
percentiles pctl
GROUP BY
p.categoryname,revenue_tier
ORDER BY
p.categoryname, revenue_tier



Unnamed: 0,category,total_revenue,revenue_tier
0,Audio,267217.01,1-LOW
1,Audio,3832415.38,2-MEDIUM
2,Audio,1213265.71,3-HIGH
3,Cameras and camcorders,81032.92,1-LOW
4,Cameras and camcorders,3388546.1,2-MEDIUM
5,Cameras and camcorders,15050781.63,3-HIGH
6,Cell phones,410309.35,1-LOW
7,Cell phones,10338963.22,2-MEDIUM
8,Cell phones,21874993.15,3-HIGH
9,Computers,203207.06,1-LOW
