In [1]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine

In [2]:
conn_string = 'postgresql://postgres:postgres@localhost:5432/postgres'
postgres_engine = create_engine(conn_string)

In [3]:
query = '''
with 
temp_01 as (
select to_char(date_trunc('day', gs.visit_stime), 'yyyy-mm-dd') as day_date
	, count(distinct sess_id) as daily_sess_cnt
	, count(distinct gs.user_id) as daily_user_cnt	
from ga.ga_sess gs 
group by to_char(date_trunc('day', gs.visit_stime), 'yyyy-mm-dd') 
)
select *
	, 1.0 * daily_sess_cnt / daily_user_cnt as avg_daily_sess_by_users
from temp_01
'''

df = pd.read_sql_query(query, con=postgres_engine)
df.head(10)

Unnamed: 0,day_date,daily_sess_cnt,daily_user_cnt,avg_daily_sess_by_users
0,2016-08-01,1711,1569,1.090504
1,2016-08-02,2140,1961,1.09128
2,2016-08-03,2890,2657,1.087693
3,2016-08-04,3161,2947,1.072616
4,2016-08-05,2702,2492,1.08427
5,2016-08-06,1663,1572,1.057888
6,2016-08-07,1622,1520,1.067105
7,2016-08-08,2815,2559,1.100039
8,2016-08-09,2851,2625,1.086095
9,2016-08-10,2757,2546,1.082875


In [11]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{'secondary_y': True}]])

fig.add_trace(
  go.Scatter(
    x=df['day_date'],
    y=df['daily_user_cnt'],
    name='daily user count'
  ),
  secondary_y=False
)

# fig.add_trace(
#   go.Scatter(
#     x=df['day_date'],
#     y=df['daily_sess_cnt'],
#     name='daily session count'
#   ),
#   secondary_y=False
# )

fig.add_trace(
  go.Scatter(
    x=df['day_date'],
    y=df['avg_daily_sess_by_users'],
    name='daily average session per users '
  ),
  secondary_y=True
)

fig.update_yaxes(range=(1.0, 1.2), secondary_y=True)
fig.update_xaxes(type='category')
fig.show()

In [8]:
query = '''
select *
	, round(100.0 * da.dau / da.mau, 2) as stickiness
	, round(avg(100.0 * da.dau / da.mau) over (), 2) as avg_stickiness
from ga.daily_acquisitions da
where curr_date between to_date('2016-10-25', 'yyyy-mm-dd') and to_date('2016-10-31', 'yyyy-mm-dd')
'''

df = pd.read_sql_query(query, con=postgres_engine)
df.head(10)

Unnamed: 0,curr_date,dau,wau,mau,stickiness,avg_stickiness
0,2016-10-25,3843,22368,73687,5.22,4.58
1,2016-10-26,3662,22787,75715,4.84,4.58
2,2016-10-27,4153,23557,77364,5.37,4.58
3,2016-10-28,3921,23870,78758,4.98,4.58
4,2016-10-29,3598,24224,80082,4.49,4.58
5,2016-10-30,2867,24160,80693,3.55,4.58
6,2016-10-31,2960,24246,81587,3.63,4.58


In [9]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = go.Figure()

fig.add_trace(
    go.Scatter(
      x=df.curr_date,
      y= df.stickiness,
      name='daily stickiness'
    )
)
fig.add_trace(
    go.Scatter(
      x=df.curr_date,
      y= df.avg_stickiness,
      name='average stickiness'
    )
)
fig.show()

In [12]:
query = '''
with 
temp_01 as (
select gu.user_id , date_trunc('month', gs.visit_stime)::date as month_date , count(*)  as monthly_cnt
from ga.ga_sess gs  
	join ga.ga_users gu on gs.user_id = gu.user_id 
where gu.create_time <= (date_trunc('month', gu.create_time) + interval '1 month' - interval '1 day')::date - 2
group by gu.user_id , date_trunc('month', gs.visit_stime)::date
),
temp_02 as (
select t1.month_date
	, case when monthly_cnt = 1 then '0_only_first_session'
		   when monthly_cnt between 2 and 3 then '2_between_3'
		   when monthly_cnt between 4 and 8 then '4_between_8'
		   when monthly_cnt between 9 and 14 then '9_between_14'
		   when monthly_cnt between 15 and 25 then '15_between_25'
		   when monthly_cnt >= 26 then 'over_26' end as classification
	, count(*) as cnt
from temp_01 t1
group by t1.month_date
	, case when monthly_cnt = 1 then '0_only_first_session'
		   when monthly_cnt between 2 and 3 then '2_between_3'
		   when monthly_cnt between 4 and 8 then '4_between_8'
		   when monthly_cnt between 9 and 14 then '9_between_14'
		   when monthly_cnt between 15 and 25 then '15_between_25'
		   when monthly_cnt >= 26 then 'over_26' end
)
select t2.month_date
	, sum(case when t2.classification = '0_only_first_session' then t2.cnt else 0 end) as "0_only_first_session"
	, sum(case when t2.classification = '2_between_3' then t2.cnt else 0 end) as "2_between_3"
	, sum(case when t2.classification = '4_between_8' then t2.cnt else 0 end) as "4_between_8"
	, sum(case when t2.classification = '9_between_14' then t2.cnt else 0 end) as "9_between_14"
	, sum(case when t2.classification = '15_between_25' then t2.cnt else 0 end) as "15_between_25"
	, sum(case when t2.classification = 'over_26' then t2.cnt else 0 end) as "over_26"
from temp_02 t2
group by t2.month_date
'''

df = pd.read_sql_query(query, con=postgres_engine)
df.head()

Unnamed: 0,month_date,0_only_first_session,2_between_3,4_between_8,9_between_14,15_between_25,over_26
0,2016-10-01,70504.0,5977.0,892.0,70.0,14.0,7.0
1,2016-09-01,46714.0,5072.0,830.0,74.0,18.0,11.0
2,2016-08-01,50429.0,5714.0,981.0,94.0,25.0,16.0


In [13]:
import plotly.express as px

fig = px.bar(
    df, 
    x='month_date',
    y=['0_only_first_session', '2_between_3', '4_between_8', '9_between_14', '15_between_25', 'over_26'],
    title='monthly user session count distribution'
)
fig.show()

In [18]:
query = '''
with 
temp_00 as (
select gs.sess_id , gs.user_id , gs.channel_grouping , o.order_id , o.order_time , oi.product_id , oi.prod_revenue 
from ga.ga_sess gs 
	left join ga.orders o on gs.sess_id = o.sess_id 
	left join ga.order_items oi on o.order_id = oi.order_id 
where gs.visit_stime >= (to_date('20161101', 'yyyymmdd') - interval '30 days') and gs.visit_stime < to_date('20161101', 'yyyymmdd')
)
select channel_grouping 
	, sum(t0.prod_revenue) as ch_revenue
	, count(distinct t0.user_id) as ch_user_cnt
	, count(distinct case when t0.order_id is not null then t0.user_id end) as ch_ord_cnt
	, sum(t0.prod_revenue) / count(distinct t0.user_id) as ch_revenue_per_user
	, sum(t0.prod_revenue) / count(distinct case when t0.order_id is not null then t0.user_id end) as ch_ord_revenue_per_user
from temp_00 t0
group by t0.channel_grouping
order by ch_user_cnt desc
'''

df = pd.read_sql_query(query, con=postgres_engine)
df.head(10)

Unnamed: 0,channel_grouping,ch_revenue,ch_user_cnt,ch_ord_cnt,ch_revenue_per_user,ch_ord_revenue_per_user
0,Social,94.9,40001,3,0.002372,31.633333
1,Organic Search,32243.06,28181,283,1.144142,113.933074
2,Direct,44531.2,7278,142,6.118604,313.6
3,Referral,38889.38,6712,347,5.794008,112.073141
4,Paid Search,2811.7,1442,33,1.949861,85.20303
5,Affiliates,9.99,961,1,0.010395,9.99
6,Display,693.83,435,7,1.595011,99.118571


In [19]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [29]:
fig = make_subplots(specs=[[{'secondary_y': True}]])
fig.add_trace(
    go.Bar(
      x=df['channel_grouping'],
      y=df['ch_user_cnt'],
      name='user count per channel'
    ),
    # secondary_y=False
)
fig.add_trace(
    go.Scatter(
      x=df['channel_grouping'],
      y=df['ch_revenue'],
      name='channel revenue'
    ),
    secondary_y=True
)
fig.show()

In [35]:
fig = make_subplots(specs=[[{'secondary_y': True}]])

fig.add_trace(
    go.Bar(
      x=df['channel_grouping'],
      y=df['ch_user_cnt'],
      name='user count per channel',
      marker_color='lightsalmon'
    ),
    secondary_y=False
)
fig.add_trace(
    go.Bar(
      x=df['channel_grouping'],
      y=df['ch_ord_cnt'],
      name='order per channel',
      marker_color='indianred'
    ),
    secondary_y=False
)
fig.add_trace(
    go.Scatter(
      x=df['channel_grouping'],
      y=df['ch_revenue_per_user'],
      name='channel revenue per user'
    ),
    secondary_y=True
)
fig.update_layout(barmode='stack', xaxis_tickangle=-45)
fig.update_xaxes(type='category')
fig.show()

In [31]:
fig = make_subplots()
fig.add_trace(
    go.Scatter(
      x=df['channel_grouping'],
      y=df['ch_revenue_per_user'],
      name='channel revenue per user'
    )
)
fig.add_trace(
    go.Scatter(
      x=df['channel_grouping'],
      y=df['ch_ord_revenue_per_user'],
      name='channel order revenue per user'
    )
)
fig.show()

In [33]:
fig = make_subplots(specs=[[{'secondary_y': True}]])
fig.add_trace(
    go.Bar(
      x=df['channel_grouping'],
      y=df['ch_ord_cnt'],
      name='channel order count'
    )
)
fig.add_trace(
    go.Scatter(
      x=df['channel_grouping'],
      y=df['ch_revenue'],
      name='channel revenue'
    ),
    secondary_y=True
)
fig.show()