In [1]:
import pandas as pd
import numpy as np
import psycopg2
from sqlalchemy import create_engine, text as sql_text

In [2]:
conn_string = 'postgresql://postgres:postgres@localhost:5432/postgres'
postgres_engine = create_engine(conn_string)

In [3]:
query = '''
with
temp_01 as (
select o.order_id ,o.customer_id ,o.order_date 
	,lag(o.order_date) over (partition by o.customer_id order by o.order_date) as prev_order_date
from nw.orders o 
),
temp_02 as (
select t1.order_id ,t1.customer_id ,t1.order_date, t1.prev_order_date 
	,(t1.order_date - t1.prev_order_date)  as days_since_prev_order
from temp_01 t1
where t1.prev_order_date is not null
)
select * from temp_02
'''

df = pd.read_sql_query(sql=sql_text(query), con=postgres_engine.connect())
df.head(10)

Unnamed: 0,order_id,customer_id,order_date,prev_order_date,days_since_prev_order
0,10692,ALFKI,1997-10-03,1997-08-25,39
1,10702,ALFKI,1997-10-13,1997-10-03,10
2,10835,ALFKI,1998-01-15,1997-10-13,94
3,10952,ALFKI,1998-03-16,1998-01-15,60
4,11011,ALFKI,1998-04-09,1998-03-16,24
5,10625,ANATR,1997-08-08,1996-09-18,324
6,10759,ANATR,1997-11-28,1997-08-08,112
7,10926,ANATR,1998-03-04,1997-11-28,96
8,10507,ANTON,1997-04-15,1996-11-27,139
9,10535,ANTON,1997-05-13,1997-04-15,28


In [4]:
import plotly.express as px

fig = px.histogram(df, x='days_since_prev_order', nbins=100)
fig.show()

In [5]:
query = '''
with
temp_01 as (
select o.user_id , max(date_trunc('day', o.order_time))::date as max_ord_date
	, to_date('20161101', 'yyyymmdd') -  max(date_trunc('day', o.order_time))::date as recency
	, count(distinct o.order_id) as frequency
	, sum(oi.prod_revenue) as monetary
from ga.orders o 
	join ga.order_items oi on o.order_id = oi.order_id 
group by o.user_id 
),
temp_02 as (
select 'A' as grade, 1 as from_recency, 14 as to_recency, 5 as from_frequency, 9999 as to_frequency, 300.0 as from_monetary, 999999.0 as to_monetary
union all
select 'B', 15, 50, 3, 4, 50.0, 299.999
union all
select 'C', 51, 99999, 1, 2, 0.0, 49.999
),
temp_03 as (
select t1.*
	, t2a.grade as recency_grade, t2b.grade as frequency_grade, t2c.grade as monetary_grade
from temp_01 t1
	left join temp_02 t2a on t1.recency between t2a.from_recency and t2a.to_recency
	left join temp_02 t2b on t1.frequency between t2b.from_frequency and t2b.to_frequency
	left join temp_02 t2c on t1.monetary between t2c.from_monetary and t2c.to_monetary
),
temp_04 as (
select *
	, case when recency_grade = 'A' and frequency_grade in ('A', 'B') and monetary_grade = 'A' then 'A'
	       when recency_grade = 'B' and frequency_grade = 'A' and monetary_grade = 'A' then 'A'
	       when recency_grade = 'B' and frequency_grade in ('A', 'B', 'C') and monetary_grade = 'B' then 'B'
	       when recency_grade = 'C' and frequency_grade in ('A', 'B') and monetary_grade = 'B' then 'B'
	       when recency_grade = 'C' and frequency_grade = 'C' and monetary_grade = 'A' then 'B'
	       when recency_grade = 'C' and frequency_grade = 'C' and monetary_grade in ('B', 'C') then 'C'
	       when recency_grade in ('B', 'C') and monetary_grade = 'C' then 'C'
	       else 'C' end as total_grade
from temp_03
)
select total_grade ,'rfm_grade_' || recency_grade || frequency_grade || monetary_grade as rfm_classification
	, count(*) as grade_cnt
from temp_04
group by total_grade ,'rfm_grade_' || recency_grade || frequency_grade || monetary_grade
order by 1
'''

df = pd.read_sql_query(query, postgres_engine)
df.head(10)

Unnamed: 0,total_grade,rfm_classification,grade_cnt
0,A,rfm_grade_AAA,7
1,A,rfm_grade_ABA,8
2,A,rfm_grade_BAA,7
3,B,rfm_grade_CBB,11
4,B,rfm_grade_CCA,87
5,B,rfm_grade_BBB,13
6,B,rfm_grade_BCB,382
7,C,rfm_grade_ABB,1
8,C,rfm_grade_CBA,13
9,C,rfm_grade_BCC,425


In [6]:
import plotly.express as px

fig = px.treemap(
  df, 
  path=[
    px.Constant('total'), 
    'total_grade', 
    'rfm_classification'], 
    values='grade_cnt', 
    color='grade_cnt'
)
fig.show()