In [24]:
import os
import urllib.parse
from dotenv import load_dotenv
from sqlalchemy import create_engine

# Load environment variables
load_dotenv()

db_user = os.getenv("DB_USER")
db_password = urllib.parse.quote_plus(os.getenv("DB_PASSWORD"))
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")

connection_string = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
engine = create_engine(connection_string)

%reload_ext sql
%sql engine
%config SqlMagic.displaylimit = 50

## Customer Churn Analysis 

### Retained vs Churned Customers Data 
This analysis assumes the customers who churned within 6 months of the last purchase date. 

[Visualization of Customer Churn Analysis](assets/2_Customer_Retention_pie.png)

In [21]:
%%sql
WITH customer_last_purchase AS ( 
SELECT 
    customerkey,
    customer_name,
    orderdate,
    cohort_year,
    ROW_NUMBER() OVER (PARTITION BY customerkey ORDER BY orderdate DESC) as row_num,
    first_purchase_date
FROM cohort_analysis 
) ,
churned_customers AS (
    SELECT 
        customerkey,
        customer_name,
        cohort_year,
        orderdate AS last_purchase_date,
        CASE 
            WHEN orderdate < (SELECT(MAX(orderdate)) FROM sales) - INTERVAL '6' MONTH THEN 'Churned'
            ELSE 'Active'
        END AS retention_status
    FROM customer_last_purchase
    WHERE row_num = 1
        AND first_purchase_date < (SELECT(MAX(orderdate)) FROM sales) - INTERVAL '6' MONTH
)

SELECT 
    retention_status,
    COUNT(*) AS customer_count,
    SUM(COUNT(customerkey)) OVER () AS total_customers,
   ROUND( COUNT(*) / SUM(COUNT(customerkey))  OVER() , 2) AS retention_rate
FROM churned_customers
GROUP BY retention_status;

retention_status,customer_count,total_customers,retention_rate
Active,4441,46913,0.09
Churned,42472,46913,0.91


### Overall Customer Churn Analysis 
This analysis gives us a percentage of the total customers who churned in the cohort years.

[Visualization of Customer Churn Analysis](assets/3_Customer_Churn_Cohorts.png)

In [35]:
%%sql
WITH customer_last_purchase AS ( 
SELECT 
    customerkey,
    customer_name,
    orderdate,
    cohort_year,
    ROW_NUMBER() OVER (PARTITION BY customerkey ORDER BY orderdate DESC) as row_num,
    first_purchase_date
FROM cohort_analysis 
) ,
churned_customers AS (
    SELECT 
        customerkey,
        customer_name,
        cohort_year,
        orderdate AS last_purchase_date,
        CASE 
            WHEN orderdate < (SELECT(MAX(orderdate)) FROM sales) - INTERVAL '6' MONTH THEN 'Churned'
            ELSE 'Active'
        END AS retention_status
    FROM customer_last_purchase
    WHERE row_num = 1
        AND first_purchase_date < (SELECT(MAX(orderdate)) FROM sales) - INTERVAL '6' MONTH
)
SELECT 
    retention_status,
    cohort_year,
    COUNT(*) AS customer_count,
    SUM(COUNT(customerkey)) OVER (PARTITION BY cohort_year) AS total_customers,
    100*ROUND( COUNT(*) / SUM(COUNT(customerkey))  OVER () , 4) AS retention_rate
FROM churned_customers
GROUP BY cohort_year,retention_status;

retention_status,cohort_year,customer_count,total_customers,retention_rate
Active,2015,237,2825,0.51
Churned,2015,2588,2825,5.52
Active,2016,311,3397,0.66
Churned,2016,3086,3397,6.58
Active,2017,385,4068,0.82
Churned,2017,3683,4068,7.85
Active,2018,704,7446,1.5
Churned,2018,6742,7446,14.37
Active,2019,687,7755,1.46
Churned,2019,7068,7755,15.07


## Customer Retention Analysis 

### Year-over-Year (YoY) Customer Retention Rates 
This analysis shows the percentage of customers in each cohort year and how much percentage they represent of the total customers Churned or Active


In [27]:
%%sql
WITH customer_last_purchase AS ( 
SELECT 
    customerkey,
    customer_name,
    orderdate,
    cohort_year,
    ROW_NUMBER() OVER (PARTITION BY customerkey ORDER BY orderdate DESC) as row_num,
    first_purchase_date
FROM cohort_analysis 
) ,
churned_customers AS (
    SELECT 
        customerkey,
        customer_name,
        cohort_year,
        orderdate AS last_purchase_date,
        CASE 
            WHEN orderdate < (SELECT(MAX(orderdate)) FROM sales) - INTERVAL '6' MONTH THEN 'Churned'
            ELSE 'Active'
        END AS retention_status
    FROM customer_last_purchase
    WHERE row_num = 1
        AND first_purchase_date < (SELECT(MAX(orderdate)) FROM sales) - INTERVAL '6' MONTH
)

SELECT 
    retention_status,
    cohort_year,
    COUNT(*) AS customer_count,
    SUM(COUNT(customerkey)) OVER (PARTITION BY cohort_year) AS total_customers,
    100*ROUND( COUNT(*) / SUM(COUNT(customerkey))  OVER (PARTITION BY cohort_year) , 2) AS retention_rate
FROM churned_customers
GROUP BY cohort_year,retention_status;

retention_status,cohort_year,customer_count,total_customers,retention_rate
Active,2015,237,2825,8.0
Churned,2015,2588,2825,92.0
Active,2016,311,3397,9.0
Churned,2016,3086,3397,91.0
Active,2017,385,4068,9.0
Churned,2017,3683,4068,91.0
Active,2018,704,7446,9.0
Churned,2018,6742,7446,91.0
Active,2019,687,7755,9.0
Churned,2019,7068,7755,91.0
