### Cohort analysis

Overview:
- Analyzes customer behavior and retention over time
- Groups customers into cohorts based on first purchase/signup date
- Tracks how cohorts engage with the business across time periods


Key Functions:
- Date Functions: DATEADD(), DATEDIFF(), DATE_TRUNC()
- Window Functions: LAG(), FIRST_VALUE()
- Aggregation: COUNT(), SUM(), AVG()
- Essential Clauses: GROUP BY, PARTITION BY, ORDER BY


In [None]:
# Import required libraries
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv
%load_ext sql
from IPython.display import Image, display

# Load environment variables
load_dotenv()

# Configure pandas display format
pd.options.display.float_format = '{:.2f}'.format

# Get database credentials from environment variables
DB_PASSWORD = os.getenv('DB_PASSWORD')

# Set the DATABASE_URL environment variable explicitly
os.environ['DATABASE_URL'] = f"postgresql://postgres:{DB_PASSWORD}@localhost:5432/contoso_100k"

# Connect using the environment variable
%sql ${DATABASE_URL}

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Test the connection with a simple query
%sql SELECT version();

Unnamed: 0,version
0,"PostgreSQL 17.4 on x86_64-windows, compiled by..."


#### Analysis shows customers acquired in the business as yearly cohort and the netrevenue of thier transactions on a yearly basis

In [3]:
%%sql

WITH yearly_cohort AS (
Select DISTINCT customerkey,
EXTRACT(YEAR FROM MIN(orderdate) over (PARTITION BY customerkey)) AS cohort_year
from sales
)
SELECT 
y.cohort_year,
EXTRACT(YEAR FROM s.orderdate) AS purchase_year,
SUM(s.quantity * s.netprice * s.exchangerate) AS net_revenue
FROM sales s
Left JOIN yearly_cohort y on s.customerkey = y.customerkey
GROUP BY 
y.cohort_year,
purchase_year
limit 10

Unnamed: 0,cohort_year,purchase_year,net_revenue
0,2015,2015,7370979.48
1,2015,2016,392623.48
2,2015,2017,479841.31
3,2015,2018,1069850.87
4,2015,2019,1235991.48
5,2015,2020,386489.6
6,2015,2021,872845.99
7,2015,2022,1569787.72
8,2015,2023,1157633.91
9,2015,2024,356186.62
