# Retention Curves

In [None]:
%run 'SetupRetail.ipynb'

In [None]:
%%sql

with cte as (
select customer_id
,count(distinct(order_id)) as num_orders 
from orders group by customer_id 
order by num_orders desc
)
select num_orders, count(*) as cnt 
from cte group by num_orders 
order by num_orders;

How many customers in each period will go on to buy in following periods

In [None]:
%%sql --save retention_months --no-execute

with nums as (
  select generate_series as num from generate_series(0,500) 
),

cte1 as (select customer_id
,first_value(order_date) over cust_orders as first_order
,last_value(order_date) over cust_orders as last_order
,order_date
from orders 
window cust_orders as (
    partition by customer_id order by order_date asc
    rows between unbounded preceding and unbounded following
)
order by customer_id, order_date),

cte2 as (
select customer_id
,first_order
,last_order
,date_diff('month', first_order, last_order) as periods 
from cte1),

cte3 as (
select periods
,count(*) as cnt 
from cte2 
group by periods 
order by periods asc
)

select num, sum(cnt) as retained from cte3 join nums n on n.num <= periods group by num order by num; 

In [None]:
result = %sql select * from retention_months;
result.set_index("num").plot()