## **유저 로그데이터를 활용한 퍼널 분석**
by **김동윤**
***

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
df = pd.read_csv(r'실습4_ 사용자 행동 로그 데이터를 활용한 퍼널 분석\ecommerce_behavior.csv',encoding='utf8')
df

Unnamed: 0.1,Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session
0,0,2020-01-01 00:00:00 UTC,view,5809910,1602943681873052386,,grattol,5.24,595414620,4adb70bb-edbd-4981-b60f-a05bfd32683a
1,1,2020-01-01 00:00:09 UTC,view,5812943,1487580012121948301,,kinetics,3.97,595414640,c8c5205d-be43-4f1d-aa56-4828b8151c8a
2,2,2020-01-01 00:00:19 UTC,view,5798924,1783999068867920626,,zinger,3.97,595412617,46a5010f-bd69-4fbe-a00d-bb17aa7b46f3
3,3,2020-01-01 00:00:24 UTC,view,5793052,1487580005754995573,,,4.92,420652863,546f6af3-a517-4752-a98b-80c4c5860711
4,4,2020-01-01 00:00:25 UTC,view,5899926,2115334439910245200,,,3.92,484071203,cff70ddf-529e-4b0c-a4fc-f43a749c0acb
...,...,...,...,...,...,...,...,...,...,...
3851288,4264743,2020-01-31 23:59:44 UTC,view,5877031,1487580010100293687,,milv,3.49,564814969,fc7063a6-b45e-4863-babb-da4934b83388
3851289,4264744,2020-01-31 23:59:47 UTC,view,5870076,1783999064136745198,,grattol,5.71,583267679,2806ff10-08bc-4811-9ab7-af074fe22a88
3851290,4264745,2020-01-31 23:59:50 UTC,view,5813496,1487580005553668971,,,11.03,583267679,2806ff10-08bc-4811-9ab7-af074fe22a88
3851291,4264746,2020-01-31 23:59:52 UTC,view,5796984,1487580005671109489,,masura,1.73,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769


In [2]:
df.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3851293 entries, 0 to 3851292
Data columns (total 10 columns):
 #   Column         Non-Null Count    Dtype  
---  ------         --------------    -----  
 0   Unnamed: 0     3851293 non-null  int64  
 1   event_time     3851293 non-null  object 
 2   event_type     3851293 non-null  object 
 3   product_id     3851293 non-null  int64  
 4   category_id    3851293 non-null  int64  
 5   category_code  69532 non-null    object 
 6   brand          2249454 non-null  object 
 7   price          3851293 non-null  float64
 8   user_id        3851293 non-null  int64  
 9   user_session   3851293 non-null  object 
dtypes: float64(1), int64(4), object(5)
memory usage: 293.8+ MB


In [3]:
df = df.drop(columns=['Unnamed: 0','category_code'],axis=1)
df.head()

Unnamed: 0,event_time,event_type,product_id,category_id,brand,price,user_id,user_session
0,2020-01-01 00:00:00 UTC,view,5809910,1602943681873052386,grattol,5.24,595414620,4adb70bb-edbd-4981-b60f-a05bfd32683a
1,2020-01-01 00:00:09 UTC,view,5812943,1487580012121948301,kinetics,3.97,595414640,c8c5205d-be43-4f1d-aa56-4828b8151c8a
2,2020-01-01 00:00:19 UTC,view,5798924,1783999068867920626,zinger,3.97,595412617,46a5010f-bd69-4fbe-a00d-bb17aa7b46f3
3,2020-01-01 00:00:24 UTC,view,5793052,1487580005754995573,,4.92,420652863,546f6af3-a517-4752-a98b-80c4c5860711
4,2020-01-01 00:00:25 UTC,view,5899926,2115334439910245200,,3.92,484071203,cff70ddf-529e-4b0c-a4fc-f43a749c0acb


### **칼럼분석**
***
- **user_session**: 고객이 앱에 접속한 후 종료될 때까지를 한 세션으로 보고 한 세션의 시간이 너무 길어지면 자동으로 그 세션이 종료되기도 함
- **user_id**: 고객아이디
- **event_time**: 이벤트 발생 시각
- **event_type**: 이벤트 타입
    - view: 상품조회
    - cart: 장바구니 담기
    - remove_from_cart: 장바구니에서 제거
    - purchase: 상품구매
- **product_id**: 상품아이디
- **category_id**: 카테고리아이디
- **price**: 가격
- **brand**: 브랜드

## **질문만들기**
***
- DAU(일간 활성 사용자수)는 얼마일까?
    - 어느 요일에 가장 많이 방문하는가?
- 사이트 체류시간의 평균은?
    - 이벤트 타입별로 비교하기
- 퍼널분석
    - 어느 단계에서 유저의 이탈이 가장 많은가?

In [4]:
df['event_time'] = pd.to_datetime(df['event_time'])
df['date_ymd'] = df['event_time'].dt.date
df['date_ymd'] = df['date_ymd'].astype('datetime64[ns]')
df['dayofweek'] = df['date_ymd'].dt.dayofweek

df.head()

Unnamed: 0,event_time,event_type,product_id,category_id,brand,price,user_id,user_session,date_ymd,dayofweek
0,2020-01-01 00:00:00+00:00,view,5809910,1602943681873052386,grattol,5.24,595414620,4adb70bb-edbd-4981-b60f-a05bfd32683a,2020-01-01,2
1,2020-01-01 00:00:09+00:00,view,5812943,1487580012121948301,kinetics,3.97,595414640,c8c5205d-be43-4f1d-aa56-4828b8151c8a,2020-01-01,2
2,2020-01-01 00:00:19+00:00,view,5798924,1783999068867920626,zinger,3.97,595412617,46a5010f-bd69-4fbe-a00d-bb17aa7b46f3,2020-01-01,2
3,2020-01-01 00:00:24+00:00,view,5793052,1487580005754995573,,4.92,420652863,546f6af3-a517-4752-a98b-80c4c5860711,2020-01-01,2
4,2020-01-01 00:00:25+00:00,view,5899926,2115334439910245200,,3.92,484071203,cff70ddf-529e-4b0c-a4fc-f43a749c0acb,2020-01-01,2


### 일간 활성사용자수 구하기

In [5]:
dau_january_by_date = df.groupby('date_ymd',as_index=False)[['user_id']].nunique().rename(columns={'date_ymd':'날짜',
                                                                                           'user_id':'DAU'})
dau_january_by_date

Unnamed: 0,날짜,DAU
0,2020-01-01,11765
1,2020-01-02,14039
2,2020-01-03,15396
3,2020-01-04,16044
4,2020-01-05,16511
5,2020-01-06,15707
6,2020-01-07,17099
7,2020-01-08,18580
8,2020-01-09,19879
9,2020-01-10,18878


In [6]:
fig = px.line(dau_january_by_date,x='날짜',y='DAU',
        title='1월 일별 DAU 추이')
fig

  v = v.dt.to_pydatetime()


### **요일별 DAU추이**
***

### **DAU 칼럼을 df에 추가하여 데이터 manipulation 쉽도록 하기**
***

In [7]:
dau_january_by_date['요일'] = dau_january_by_date['날짜'].dt.dayofweek
dau_january_by_date

Unnamed: 0,날짜,DAU,요일
0,2020-01-01,11765,2
1,2020-01-02,14039,3
2,2020-01-03,15396,4
3,2020-01-04,16044,5
4,2020-01-05,16511,6
5,2020-01-06,15707,0
6,2020-01-07,17099,1
7,2020-01-08,18580,2
8,2020-01-09,19879,3
9,2020-01-10,18878,4


In [8]:
dau_by_day = dau_january_by_date.groupby('요일',as_index=False)[['DAU']].mean().round(2).replace({0:'월요일',1:'화요일',2:'수요일',3:'목요일',
                                                                           4:'금요일',5:'토요일',6:'일요일'})
dau_by_day

Unnamed: 0,요일,DAU
0,월요일,19284.75
1,화요일,19855.5
2,수요일,18425.2
3,목요일,18477.8
4,금요일,18195.8
5,토요일,17041.0
6,일요일,18146.25


### **요일별 DAU평균의 변화를 시각화하기**
***

In [9]:
fig = px.line(dau_by_day,x='요일',y='DAU',title='1월 요일별 DAU평균')
fig

### **[2] 사이트 체류시간 평균은?**
***
- **한세션의 마지막시간 - 시작시간을 체류시간으로 정의함**
- 구매한 사람,장바구니 담은 사람, 조회만 한 사람, 장바구니에서 제외한 사람 별로 분석 

In [10]:
df

Unnamed: 0,event_time,event_type,product_id,category_id,brand,price,user_id,user_session,date_ymd,dayofweek
0,2020-01-01 00:00:00+00:00,view,5809910,1602943681873052386,grattol,5.24,595414620,4adb70bb-edbd-4981-b60f-a05bfd32683a,2020-01-01,2
1,2020-01-01 00:00:09+00:00,view,5812943,1487580012121948301,kinetics,3.97,595414640,c8c5205d-be43-4f1d-aa56-4828b8151c8a,2020-01-01,2
2,2020-01-01 00:00:19+00:00,view,5798924,1783999068867920626,zinger,3.97,595412617,46a5010f-bd69-4fbe-a00d-bb17aa7b46f3,2020-01-01,2
3,2020-01-01 00:00:24+00:00,view,5793052,1487580005754995573,,4.92,420652863,546f6af3-a517-4752-a98b-80c4c5860711,2020-01-01,2
4,2020-01-01 00:00:25+00:00,view,5899926,2115334439910245200,,3.92,484071203,cff70ddf-529e-4b0c-a4fc-f43a749c0acb,2020-01-01,2
...,...,...,...,...,...,...,...,...,...,...
3851288,2020-01-31 23:59:44+00:00,view,5877031,1487580010100293687,milv,3.49,564814969,fc7063a6-b45e-4863-babb-da4934b83388,2020-01-31,4
3851289,2020-01-31 23:59:47+00:00,view,5870076,1783999064136745198,grattol,5.71,583267679,2806ff10-08bc-4811-9ab7-af074fe22a88,2020-01-31,4
3851290,2020-01-31 23:59:50+00:00,view,5813496,1487580005553668971,,11.03,583267679,2806ff10-08bc-4811-9ab7-af074fe22a88,2020-01-31,4
3851291,2020-01-31 23:59:52+00:00,view,5796984,1487580005671109489,masura,1.73,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4


In [11]:
df[df['user_session'] == 'dcf90ff3-a246-4b00-b39a-83e6444a0769']

Unnamed: 0,event_time,event_type,product_id,category_id,brand,price,user_id,user_session,date_ymd,dayofweek
3851137,2020-01-31 23:50:32+00:00,view,5809912,1602943681873052386,grattol,5.24,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851152,2020-01-31 23:50:58+00:00,cart,5809910,1602943681873052386,grattol,5.24,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851161,2020-01-31 23:51:51+00:00,view,5854812,1602943681873052386,grattol,5.24,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851169,2020-01-31 23:52:19+00:00,cart,5854812,1602943681873052386,grattol,5.24,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851200,2020-01-31 23:53:54+00:00,view,5899926,2115334439910245200,,3.92,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851218,2020-01-31 23:54:50+00:00,view,5789668,1487580005595612013,,2.68,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851235,2020-01-31 23:55:44+00:00,view,5859421,1487580005671109489,masura,2.37,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851241,2020-01-31 23:56:19+00:00,cart,5859421,1487580005671109489,masura,2.37,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851261,2020-01-31 23:58:22+00:00,view,5708979,1487580005671109489,masura,3.16,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4
3851272,2020-01-31 23:58:43+00:00,cart,5708979,1487580005671109489,masura,3.16,417102560,dcf90ff3-a246-4b00-b39a-83e6444a0769,2020-01-31,4


In [12]:
event_type_session_duration = df.groupby(['user_session'],as_index=False)[['event_time']].agg(['max','min'])
event_type_session_duration.columns = ['user_session','max','min']
event_type_session_duration['duration'] = event_type_session_duration['max'] - event_type_session_duration['min']
event_type_session_duration

Unnamed: 0,user_session,max,min,duration
0,0000061d-f3e9-484b-8c73-e54f355032a3,2020-01-16 03:30:41+00:00,2020-01-16 03:30:41+00:00,0 days 00:00:00
1,00000ac8-0015-4f12-996a-be2896323738,2020-01-24 22:22:20+00:00,2020-01-24 22:22:20+00:00,0 days 00:00:00
2,00001ca1-f2df-4572-b0b8-e752e2064aae,2020-01-01 19:09:23+00:00,2020-01-01 19:09:23+00:00,0 days 00:00:00
3,00002db7-16b6-4db2-bf8b-7a1cb6bd0e7f,2020-01-22 16:51:50+00:00,2020-01-22 16:51:50+00:00,0 days 00:00:00
4,00002f68-09b8-4db3-a092-aeff45fd13ad,2020-01-25 07:17:58+00:00,2020-01-25 07:17:58+00:00,0 days 00:00:00
...,...,...,...,...
911569,ffff7b96-9751-4eaa-806e-fe979cc00dc8,2020-01-25 11:32:02+00:00,2020-01-24 16:57:30+00:00,0 days 18:34:32
911570,ffff80e2-ad33-4704-9ffe-d6c612e9641f,2020-01-21 18:07:47+00:00,2020-01-21 18:07:47+00:00,0 days 00:00:00
911571,ffff8da3-b79a-48f2-888c-117f2d1a7793,2020-01-26 10:53:09+00:00,2020-01-26 10:53:09+00:00,0 days 00:00:00
911572,ffff9422-39ba-4cdf-afd1-a9d87bb3d79b,2020-01-13 09:55:09+00:00,2020-01-13 09:55:09+00:00,0 days 00:00:00


In [18]:
session_duration = pd.pivot_table(df,index=['user_session'],columns=['event_type'],values=['event_time'],aggfunc='count')
session_duration.reset_index(drop=False,inplace=True)
session_duration.columns = ['user_session','cart','purchase','remove_from_cart','view']
session_duration.fillna(0,inplace=True)
session_duration

Unnamed: 0,user_session,cart,purchase,remove_from_cart,view
0,0000061d-f3e9-484b-8c73-e54f355032a3,0.0,0.0,0.0,1.0
1,00000ac8-0015-4f12-996a-be2896323738,0.0,0.0,0.0,1.0
2,00001ca1-f2df-4572-b0b8-e752e2064aae,0.0,0.0,0.0,1.0
3,00002db7-16b6-4db2-bf8b-7a1cb6bd0e7f,0.0,0.0,0.0,1.0
4,00002f68-09b8-4db3-a092-aeff45fd13ad,0.0,0.0,0.0,1.0
...,...,...,...,...,...
911569,ffff7b96-9751-4eaa-806e-fe979cc00dc8,1.0,0.0,2.0,10.0
911570,ffff80e2-ad33-4704-9ffe-d6c612e9641f,0.0,0.0,0.0,1.0
911571,ffff8da3-b79a-48f2-888c-117f2d1a7793,0.0,0.0,0.0,1.0
911572,ffff9422-39ba-4cdf-afd1-a9d87bb3d79b,0.0,0.0,0.0,1.0


### 구매까지 한 세션 구하기

In [25]:
purchased = session_duration.loc[session_duration['purchase'] > 0,:]
purchased_sessions = purchased['user_session'].unique().tolist()
purchased_sessions

['0013b593-e8eb-4bcf-b117-441db7cd9c6d',
 '001955e7-9f9f-4aad-a585-0dc5b988fe2e',
 '001a3579-4bf6-45d7-b48d-1fbb7ecd40da',
 '001cfc10-201f-86fd-73b5-2f8fcfcf541d',
 '001f6463-08b9-4363-a079-21499489a1c1',
 '0028c9dd-e5f3-456c-a8fe-7dcc73c5e486',
 '002a3244-0830-4e47-b6c8-9a70b72baee1',
 '002b11ec-6b6b-4eb4-8609-550e1205491f',
 '002cc9f7-5056-4590-99a2-0441d5c5366e',
 '0033400f-3041-48b3-b6fa-9fdcab5521d0',
 '0037df26-4c23-40d3-98d4-71c4d4f51304',
 '003b0407-9189-4e02-8c33-08a1b5456df2',
 '00429d74-9276-41fe-909e-4e4ee687cf7e',
 '0043c90b-b11a-4fc2-9acf-cb175382822b',
 '00495446-cbd5-488e-9104-8cf9751211b9',
 '004a1efd-5206-45cb-b7eb-71dce7432fe3',
 '0055ecef-7400-ae63-df27-df8a5f959b9a',
 '00566b2d-63cb-413a-a637-178cebaba81a',
 '0056f4e1-f842-4366-8bba-2320f0ca8209',
 '005865c7-4b56-47cb-977b-e807bbab826c',
 '005fbc5b-7ee4-476c-a9b6-66e109d14663',
 '00611e99-349b-a14f-8a08-6c4095929d5d',
 '00612fa2-3bf5-03a0-d179-f2241fa6e8ab',
 '00621687-669d-4d85-b699-1ef86235e8ed',
 '006bc9fb-8f16-

### 구매는 하지 않고 장바구니 담기까지만 한 세션 구하기

In [26]:
cart = session_duration.loc[(session_duration['purchase'] == 0) & (session_duration['cart'] > 0),: ]
cart_sessions = cart['user_session'].unique().tolist()
cart_sessions

['0000b977-340c-45e1-9588-2bb26fda52eb',
 '00025691-523f-43b2-897d-e01420c36f4f',
 '00034f2c-f94b-416f-9d87-08cc02034112',
 '00035747-f915-42f1-8cef-e480ff0ce583',
 '00040372-9fc7-4759-b96e-050e7a8f5c99',
 '00046a07-02c0-4305-9e5f-da4770fcc2a5',
 '0004eb12-666c-4120-8a98-9130eeb6fba0',
 '0006073a-430f-416a-b461-824eb95b5f0e',
 '00067850-0e71-4d4b-919a-4cfba20a2888',
 '00075975-e6f6-417f-b7b7-257a56a5b941',
 '0007ce03-a85d-443f-81a4-75d49f63d6d1',
 '0007f7e4-e3d0-4916-8e6d-90c747e09309',
 '00087b1b-d032-4038-a18f-1ec97de40d9f',
 '00094ac0-2ed9-4b37-8caf-8d6ca92a5e3d',
 '000a74c8-9e30-46d6-9d86-0dbdfcd6e21b',
 '000af394-f357-4e73-8ba8-90c621a3cf78',
 '000b9597-9dbc-4dfd-89c2-59ac3be42340',
 '000c718f-f6e7-4248-8fe1-856a89db995b',
 '000d7ff7-842c-48d4-8da8-5e03aae04b6e',
 '000d96c6-3bcc-4d9d-9e45-b434e9ad695f',
 '000dcb52-dcee-662b-011c-a1e7db939bc1',
 '000e343a-b9a9-4572-a6f6-bf6259c7a284',
 '000f874d-5e09-4b10-ad77-1bdd6e16e6a4',
 '000f908b-cba7-4281-89a3-24fa19b0b0d6',
 '00104ce3-efd8-

### 그냥 보기만 한 인간들 구하기

In [27]:
cond = (session_duration['purchase'] == 0) & (session_duration['cart'] == 0) & (session_duration['view'] > 0) 
view = session_duration.loc[cond]
view_sessions = view['user_session'].unique().tolist()
view_sessions

['0000061d-f3e9-484b-8c73-e54f355032a3',
 '00000ac8-0015-4f12-996a-be2896323738',
 '00001ca1-f2df-4572-b0b8-e752e2064aae',
 '00002db7-16b6-4db2-bf8b-7a1cb6bd0e7f',
 '00002f68-09b8-4db3-a092-aeff45fd13ad',
 '0000601b-26a6-4d63-92da-6b6aebce0e98',
 '0000a8bd-7cb7-45ed-913a-e2aa0cd0f397',
 '0000b3cb-5422-4bf2-b8fe-5c1831d0dc1b',
 '0000d083-4f1b-4f1f-a64b-c237b93ace8c',
 '0000f916-a117-4e12-9f31-c46a0df12ee4',
 '0001186b-2bfc-46aa-a588-c4e229c54ba4',
 '00015af3-7d75-4b9a-a7da-7fd07601f13b',
 '00015c2b-4f4f-4adc-a107-f80d862d46c1',
 '00016afd-2d67-4e6e-8918-1243249acc34',
 '00017b33-a65a-41b5-bd28-a2bfdd71ca3e',
 '00017f26-9070-49d7-a809-cdfe0be5ef1d',
 '0001924c-a853-4087-990c-14e11887a4cf',
 '00019bed-3c6e-4d29-86de-75bafbdf978c',
 '0001b553-d53e-445a-bdb1-09da33d88004',
 '0001bf0b-23fe-4ff2-b151-f83aeb5e5d46',
 '0001d159-0c58-4b60-8e7a-d108f8aa64d2',
 '0001d5ff-c169-4bc4-b0e2-d45586a2f6d9',
 '0001de3e-3a52-44dd-879b-d67e0a45489a',
 '0002082a-f567-40a6-a75e-b298c990daf9',
 '00020d47-a6f5-

In [23]:
print(len(purchased)+len(view)+len(cart))
print(len(session_duration))

911574
911574


In [55]:
cond = event_type_session_duration['user_session'].isin(purchased_sessions)
purchased_mean_duration = event_type_session_duration.loc[cond,'duration'].mean()
purchased_mean_duration_days = purchased_mean_duration.components.days
purchased_mean_duration_hours = purchased_mean_duration.components.hours
purchased_mean_duration_minutes = purchased_mean_duration.components.minutes
purchased_durations = f'구매까지 한 세션의 평균 체류시간: {purchased_mean_duration_days}일 {purchased_mean_duration_hours}시간 {purchased_mean_duration_minutes}분'
purchased_durations

'구매까지 한 세션의 평균 체류시간: 0일 6시간 42분'

In [54]:
cond = event_type_session_duration['user_session'].isin(cart_sessions)
cart_mean_duration = event_type_session_duration.loc[cond,'duration'].mean()
cart_mean_duration_days = cart_mean_duration.components.days
cart_mean_duration_hours = cart_mean_duration.components.hours
cart_mean_duration_minutes = cart_mean_duration.components.minutes
cart_durations = f'구매는 안하고 장바구니 담기까지 한 세션의 평균 체류시간: {cart_mean_duration_days}일 {cart_mean_duration_hours}시간 {cart_mean_duration_minutes}분'
cart_durations

'구매는 안하고 장바구니 담기까지 한 세션의 평균 체류시간: 0일 1시간 57분'

In [53]:
cond = event_type_session_duration['user_session'].isin(view_sessions)
view_mean_duration = event_type_session_duration.loc[cond,'duration'].mean()
view_mean_duration_days = view_mean_duration.components.days
view_mean_duration_hours = view_mean_duration.components.hours
view_mean_duration_minutes = view_mean_duration.components.minutes
view_durations = f'조회만 한 세션의 평균 체류시간: {view_mean_duration_days}일 {view_mean_duration_hours}시간 {view_mean_duration_minutes}분'
view_durations

'조회만 한 세션의 평균 체류시간: 0일 0시간 38분'

In [62]:
durations = pd.DataFrame([[purchased_durations],
                          [cart_durations],
                          [view_durations]],columns=['체류시간'])
durations

Unnamed: 0,체류시간
0,구매까지 한 세션의 평균 체류시간: 0일 6시간 42분
1,구매는 안하고 장바구니 담기까지 한 세션의 평균 체류시간: 0일 1시간 57분
2,조회만 한 세션의 평균 체류시간: 0일 0시간 38분


## **[3] 퍼널분석**
***
- view, cart, purchase의 각 단계에서의 이탈율 시각화 및 분석

In [64]:
session_duration

Unnamed: 0,user_session,cart,purchase,remove_from_cart,view
0,0000061d-f3e9-484b-8c73-e54f355032a3,0.0,0.0,0.0,1.0
1,00000ac8-0015-4f12-996a-be2896323738,0.0,0.0,0.0,1.0
2,00001ca1-f2df-4572-b0b8-e752e2064aae,0.0,0.0,0.0,1.0
3,00002db7-16b6-4db2-bf8b-7a1cb6bd0e7f,0.0,0.0,0.0,1.0
4,00002f68-09b8-4db3-a092-aeff45fd13ad,0.0,0.0,0.0,1.0
...,...,...,...,...,...
911569,ffff7b96-9751-4eaa-806e-fe979cc00dc8,1.0,0.0,2.0,10.0
911570,ffff80e2-ad33-4704-9ffe-d6c612e9641f,0.0,0.0,0.0,1.0
911571,ffff8da3-b79a-48f2-888c-117f2d1a7793,0.0,0.0,0.0,1.0
911572,ffff9422-39ba-4cdf-afd1-a9d87bb3d79b,0.0,0.0,0.0,1.0


In [77]:
viewed_sessions = session_duration.query('view > 0')[['user_session']].agg('count')
carted_sessions = session_duration.query('cart > 0')[['user_session']].agg('count')
purchased_sessions_funnel = session_duration.query('purchase > 0')[['user_session']].agg('count')
funnel_df = pd.DataFrame({'조회':viewed_sessions,
              '장바구니 담기':carted_sessions,
              '구매':purchased_sessions_funnel}).T.reset_index(drop=False).rename(columns={'user_session':'횟수',
                                                                                         'index':'이벤트타입'})
funnel_df


Unnamed: 0,이벤트타입,횟수
0,조회,911574
1,장바구니 담기,155442
2,구매,22868


In [88]:
fig = px.funnel(funnel_df,x='이벤트타입',y='횟수',title='이벤트 타입별 고유 session 갯수 카운트')
fig

### **Retention Rate 분석**
***

In [96]:
purchase_retention = purchased_sessions_funnel/carted_sessions
cart_retention = carted_sessions/viewed_sessions
view_retention = 1.0
retention_rate = pd.DataFrame({'view_retention':view_retention,
                               'cart_retention':cart_retention,
                               'purchase_retention':purchase_retention
                               }).T.reset_index(drop=False).rename(columns={'index':'이벤트타입',
                                                                                                    'user_session':'retention_rate'})
retention_rate

Unnamed: 0,이벤트타입,retention_rate
0,view_retention,1.0
1,cart_retention,0.17052
2,purchase_retention,0.147116


In [109]:
fig = px.funnel(retention_rate,x='이벤트타입',y='retention_rate',title='이벤트 타입별 리텐션 비율')
fig.update_traces(texttemplate='%{value:,.2%}')