ENVIRONMENT SETUP

In [11]:
import pandas as pd
import sqlite3
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("=" * 70)
print("ADVANCED SQL QUERIES")
print("=" * 70)

conn = sqlite3.connect('../data/processed/ecommerce.db')

def run_query(query, title):
    """Execute Query and Display Result"""
    print(f"\n{'=' * 70}")
    print(title)
    print('=' * 70)
    result = pd.read_sql_query(query, conn)
    print(result.to_string(index=False))
    return result

print("\nDatabase connection established")

ADVANCED SQL QUERIES

Database connection established


QUERY 11: RUNNING TOTAL REVENUE BY MONTH

In [12]:
# Running Total
query11 = """
SELECT
    order_year,
    order_month,
    ROUND(SUM(p.total_payment_value), 2) as monthly_revenue,
    ROUND(SUM(SUM(p.total_payment_value)) OVER (
        ORDER BY order_year, order_month
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ), 2) as_returning_total_revenue
FROM orders o
JOIN order_payments p ON o.order_id = p.order_id
GROUP BY order_year, order_month
ORDER BY order_year, order_month
"""

result11 = run_query(query11, "QUERY 11: Running Total Revenue by Month")


QUERY 11: Running Total Revenue by Month
 order_year  order_month  monthly_revenue  as_returning_total_revenue
       2016           10         46566.71                    46566.71
       2016           12            19.62                    46586.33
       2017            1        127545.67                   174132.00
       2017            2        271298.65                   445430.65
       2017            3        414369.39                   859800.04
       2017            4        390952.18                  1250752.22
       2017            5        567066.73                  1817818.95
       2017            6        490225.60                  2308044.55
       2017            7        566403.93                  2874448.48
       2017            8        646000.61                  3520449.09
       2017            9        701169.99                  4221619.08
       2017           10        751140.27                  4972759.35
       2017           11       1153528.05       


QUERY 12: CUSTOMER LIFETIME VALUE SEGMENTATION (CTE)

In [14]:
query12 = """
WITH customer_ltv AS (
    SELECT
        c.customer_unique_id,
        c.customer_state,
        COUNT(DISTINCT o.order_id) as total_orders,
        ROUND(SUM(p.total_payment_value), 2) as lifetime_value,
        ROUND(AVG(p.total_payment_value), 2) as avg_order_value
    FROM customers c
    JOIN orders o ON c.customer_id = o.customer_id
    JOIN order_payments p ON o.order_id = p.order_id
    GROUP BY c.customer_unique_id, c.customer_state
),
ltv_segments AS(
    SELECT
        customer_unique_id,
        customer_state,
        total_orders,
        lifetime_value,
        avg_order_value,
        CASE
            WHEN lifetime_value >= 1000 THEN 'High Value'
            WHEN lifetime_value >= 500 THEN 'Medium Value'
            ELSE 'Low Value'
        END as value_segment
    FROM customer_ltv
)
SELECT
    value_segment,
    COUNT(*) as customer_count,
    ROUND(AVG(lifetime_value), 2) as avg_ltv,
    ROUND(AVG(total_orders), 2) as avg_orders,
    ROUND(SUM(lifetime_value), 2) as total_segment_revenue
FROM ltv_segments
GROUP BY value_segment
ORDER BY avg_ltv DESC
"""

result12 = run_query(query12, "QUERY 12: Customer Value Segmentation")


QUERY 12: Customer Value Segmentation
value_segment  customer_count  avg_ltv  avg_orders  total_segment_revenue
   High Value            1148  1584.78        1.11             1819322.15
 Medium Value            3116   679.49        1.13             2117295.97
    Low Value           89131   128.86        1.03            11485843.65


QUERY 13: MONTHLY CUSTOMER ACQUISITION COHORTS

In [18]:
query13 = """
WITH first_purchase AS (
    SELECT
        c.customer_unique_id,
        MIN(o.order_purchase_timestamp) as first_order_date,
        strftime('%Y=%m', MIN(o.order_purchase_timestamp)) as cohort_month
    FROM customers c
    JOIN orders o ON c.customer_id = o.customer_id
    GROUP BY c.customer_unique_id
)
SELECT
    cohort_month,
    COUNT(DISTINCT customer_unique_id) as customers_acquired,
    SUM(COUNT(DISTINCT customer_unique_id)) OVER (
        ORDER BY cohort_month
    ) as cumulative_customers
FROM first_purchase
GROUP BY cohort_month
ORDER BY cohort_month
"""

result13 = run_query(query13, "QUERY 13: Monthly Customer Acquisition Cohorts")


QUERY 13: Monthly Customer Acquisition Cohorts


cohort_month  customers_acquired  cumulative_customers
     2016=09                   1                     1
     2016=10                 262                   263
     2016=12                   1                   264
     2017=01                 717                   981
     2017=02                1628                  2609
     2017=03                2503                  5112
     2017=04                2256                  7368
     2017=05                3451                 10819
     2017=06                3037                 13856
     2017=07                3752                 17608
     2017=08                4057                 21665
     2017=09                4004                 25669
     2017=10                4328                 29997
     2017=11                7060                 37057
     2017=12                5338                 42395
     2018=01                6842                 49237
     2018=02                6288                 55525
     2018=

QUERY 14: ORDER BASKET SIZE ANALYSIS

In [20]:
query14 = """
WITH order_size AS (
    SELECT
        order_id,
        COUNT(DISTINCT product_id) as items_count,
        ROUND(SUM(price), 2) as order_value
    FROM order_items
    GROUP BY order_id
)
SELECT
    CASE
        WHEN items_count = 1 THEN '1 item'
        WHEN items_count BETWEEN 2 AND 3 THEN '2-3 items'
        WHEN items_count BETWEEN 4 ANd 5 THEN '4-5 items'
        ELSE '6+ items'
    END as basket_size,
    COUNT(*) as order_count,
    ROUND(AVG(order_value), 2) as avg_order_value,
    ROUND(SUM(order_value), 2) as total_revenue,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as pct_of_orders
FROM order_size
GROUP BY basket_size
ORDER BY
    CASE basket_size
        WHEN '1 item' THEN 1
        WHEN'2-3 items' THEN 2
        WHEN '4-5 items' THEN 3
        ELSE 4
    END
"""

result14 = run_query(query14, "QUERY 14: Order Basket Size Analysis")


QUERY 14: Order Basket Size Analysis
basket_size  order_count  avg_order_value  total_revenue  pct_of_orders
     1 item        95430           135.65    12944973.77          96.72
  2-3 items         3144           195.15      613551.12           3.19
  4-5 items           78           344.04       26835.13           0.08
   6+ items           14           448.83        6283.68           0.01


QUERY 15: STATE PERFORMANCE RANKINGS

In [23]:
query15 = """
SELECT
    c.customer_state,
    COUNT(DISTINCT o.order_id) as total_orders,
    ROUND(SUM(p.total_payment_value), 2) as total_revenue,
    ROUND(AVG(o.delivery_time_days), 1) as avg_delivery_days,
    RANK() OVER (ORDER BY SUM(p.total_payment_value) DESC) as revenue_rank,
    RANK() OVER (ORDER BY AVG(o.delivery_time_days) ASC) as delivery_rank
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
JOIN order_payments p ON o.order_id = p.order_id
WHERE o.delivery_time_days IS NOT NULL
GROUP BY c.customer_state
ORDER BY revenue_rank
"""

result15 = run_query(query15, "QUERY 15: State Performance Ranking")


QUERY 15: State Performance Ranking


customer_state  total_orders  total_revenue  avg_delivery_days  revenue_rank  delivery_rank
            SP         40493     5769081.27                8.3             1              1
            RJ         12350     2055690.45               14.8             2              7
            MG         11354     1819277.61               11.5             3              3
            RS          5344      861608.40               14.8             4              6
            PR          4923      781919.55               11.5             5              2
            SC          3546      595208.40               14.5             6              5
            BA          3256      591270.60               18.9             7             15
            DF          2080      346146.17               12.5             8              4
            GO          1957      334294.22               15.2             9              8
            ES          1995      317682.65               15.3            10    