In [34]:
import os
import urllib.parse
from dotenv import load_dotenv
from sqlalchemy import create_engine

# Load environment variables
load_dotenv()

db_user = os.getenv("DB_USER")
db_password = urllib.parse.quote_plus(os.getenv("DB_PASSWORD"))
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")

connection_string = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
engine = create_engine(connection_string)

%reload_ext sql
%sql engine
%config SqlMagic.displaylimit = 50

### Customer Segmentation (Based on LTV)
How does total customer value compare against customer volume across segments?

[Visualization of Customer Segmentation](assets/1_Customer_LTV_Classes.png)

This notebook analyzes customer LTV and segments them into Low (<25 Percentile), Medium (25-75 Percentile) and High Value (>75 Percentile) of the customers based on total revenue from those customers. 

- Low value customers (12372) only contributing nearly *$350* to the average LTV 
- Medium value customers (24743) contributing *$2693* to the average LTV
- High value customers (12372) contributing *$10946* to the average LTV 

In [40]:
%%sql
WITH customer_ltv AS (
SELECT
    customerkey,
    customer_name,
    SUM(revenue) as total_revenue
FROM cohort_analysis
GROUP BY customerkey, customer_name
ORDER BY customerkey, total_revenue DESC 
),
customer_segments AS (
SELECT 
    PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY total_revenue) as ltv_25,
    PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_revenue) as ltv_75
FROM customer_ltv
),segment_values AS (
SELECT  
    c.*,
    CASE 
        WHEN c.total_revenue < cs.ltv_25 THEN '1 - Low Value'
        WHEN c.total_revenue > cs.ltv_75 THEN '3 - High Value'
        ELSE '2 - Medium Value'
    END as customer_segment
FROM customer_ltv AS c,
    customer_segments AS cs
)

SELECT 
    customer_segment,
    ROUND(SUM(total_revenue)) AS total_ltv,
    COUNT(customerkey) AS customer_count,
    ROUND((SUM(total_revenue) / COUNT(customerkey))) AS avg_ltv
FROM segment_values
GROUP BY customer_segment
ORDER BY customer_segment DESC;


customer_segment,total_ltv,customer_count,avg_ltv
3 - High Value,135429277.0,12372,10946.0
2 - Medium Value,66636452.0,24743,2693.0
1 - Low Value,4341810.0,12372,351.0
