# [1] 퍼널 분석

In [10]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [12]:
import pandas as pd
import plotly.graph_objects as go

In [21]:
path = '/content/drive/MyDrive/새싹2기/python_DA/data/'

In [22]:
events = pd.read_csv(path + 'rocket_sample_events.csv')

## (1) 데이터 확인하기

In [25]:
events.shape

(20983, 6)

In [26]:
events.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20983 entries, 0 to 20982
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   user_id     20983 non-null  object 
 1   event_name  20983 non-null  object 
 2   event_date  20983 non-null  object 
 3   timestamp   20983 non-null  object 
 4   platform    20983 non-null  object 
 5   amount      556 non-null    float64
dtypes: float64(1), object(5)
memory usage: 983.7+ KB


In [27]:
events.isnull().sum()

Unnamed: 0,0
user_id,0
event_name,0
event_date,0
timestamp,0
platform,0
amount,20427


In [29]:
events.dtypes

Unnamed: 0,0
user_id,object
event_name,object
event_date,object
timestamp,object
platform,object
amount,float64


In [28]:
events.head()

Unnamed: 0,user_id,event_name,event_date,timestamp,platform,amount
0,rk_0001,visit,2024-01-30,2024-01-30 17:52:00.000000000,Android,
1,rk_0001,view_item,2024-01-30,2024-01-30 17:52:00.000000000,Android,
2,rk_0001,add_to_cart,2024-01-30,2024-01-30 17:51:00.000000000,Android,
3,rk_0001,visit,2024-01-10,2024-01-10 16:24:00.000000000,Android,
4,rk_0001,view_item,2024-01-10,2024-01-10 16:20:00.000000000,Android,


In [35]:
# 고객의 수
events['user_id'].nunique()

2000

In [37]:
# 이벤트의 수
events.shape[0]

20983

In [38]:
events['event_name'].value_counts()

Unnamed: 0_level_0,count
event_name,Unnamed: 1_level_1
visit,8998
view_item,6497
add_to_cart,3767
begin_checkout,1165
purchase,556


## (2) 퍼널 정의하기



In [39]:
# 퍼널 순서를 담고 있는 리스트 정의
FUNNEL_EVENTS = ['visit', 'view_item', 'add_to_cart', 'begin_checkout', 'purchase']

In [44]:
aggregated = events.groupby('event_name')['user_id'].nunique().reindex(FUNNEL_EVENTS)

# 데이터프레임으로 변환
funnel_df = aggregated.reset_index()
funnel_df.columns = ['stage', 'user_count']

In [45]:
funnel_df

Unnamed: 0,stage,user_count
0,visit,2000
1,view_item,1902
2,add_to_cart,1660
3,begin_checkout,863
4,purchase,478


> 단계별 전환율
수식 = (전환수 / 기회수)

- 기회수 : 이전 단계의 user count

In [46]:
# 단계별 전환율 (이전 단계 대비)
funnel_df['conversion_rate'] = (
    funnel_df['user_count'] / funnel_df['user_count'].shift(1) * 100
).round(1)

> 전체 전환율
수식 = (전환수/기회수) * 100

- 기회수 : 가장 첫번째 단계의 user count

In [48]:
# 전체 전환율 (첫 단계 대비)
funnel_df['overall_rate'] = (
    funnel_df['user_count'] / funnel_df['user_count'].iloc[0] * 100
).round(1)

In [49]:
# 이탈률
funnel_df['dropoff_rate'] = (100 - funnel_df['conversion_rate']).round(1)

In [50]:
funnel_df

Unnamed: 0,stage,user_count,conversion_rate,overall_rate,dropoff_rate
0,visit,2000,,100.0,
1,view_item,1902,95.1,95.1,4.9
2,add_to_cart,1660,87.3,83.0,12.7
3,begin_checkout,863,52.0,43.2,48.0
4,purchase,478,55.4,23.9,44.6


## (3) 퍼널 시각화

In [57]:
fig = go.Figure(go.Funnel(
    y = funnel_df['stage'],       # 단계명
    x = funnel_df['user_count'],  # 사용자 수

    textposition = "inside",
    textinfo = "value+percent initial+percent previous",

    opacity = 0.85,
    marker = {
        "color": ["#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F"],
        "line": {"width": 1, "color": "#333333"}
    },
    connector = {
        "line": {"color": "gray", "dash": "dot", "width": 2}
    }
))

fig.update_layout(
    title={
        'text': "사용자 구매 전환 퍼널 분석",
        'y': 0.9, 'x': 0.5,
        'xanchor': 'center', 'yanchor': 'top'
    },
    font={'family': "Arial, sans-serif", 'size': 12, 'color': "black"}
)

fig.show()

## (4) 병목 지점 식별

In [58]:
# 이탈률이 가장 높은 단계 찾기
bottleneck_idx = funnel_df['dropoff_rate'].idxmax()
bottleneck = funnel_df.loc[bottleneck_idx]

print(f"병목 단계: {bottleneck['stage']}")
print(f"이탈률: {bottleneck['dropoff_rate']}%")

병목 단계: begin_checkout
이탈률: 48.0%


##(5) 세그먼트별 퍼널 비교

In [59]:
events

Unnamed: 0,user_id,event_name,event_date,timestamp,platform,amount
0,rk_0001,visit,2024-01-30,2024-01-30 17:52:00.000000000,Android,
1,rk_0001,view_item,2024-01-30,2024-01-30 17:52:00.000000000,Android,
2,rk_0001,add_to_cart,2024-01-30,2024-01-30 17:51:00.000000000,Android,
3,rk_0001,visit,2024-01-10,2024-01-10 16:24:00.000000000,Android,
4,rk_0001,view_item,2024-01-10,2024-01-10 16:20:00.000000000,Android,
...,...,...,...,...,...,...
20978,rk_2000,visit,2024-05-27,2024-05-27 08:43:00.000000000,Web,
20979,rk_2000,view_item,2024-05-27,2024-05-27 08:44:00.000000000,Web,
20980,rk_2000,visit,2024-06-21,2024-06-21 14:50:00.000000000,Web,
20981,rk_2000,view_item,2024-06-21,2024-06-21 14:56:00.000000000,Web,


In [61]:
# 플랫폼별 퍼널 비교 예시
for platform in ['iOS', 'Android', 'Web']:
    platform_df = events[events['platform'] == platform]
    counts = platform_df.groupby('event_name')['user_id'].nunique().reindex(FUNNEL_EVENTS)
    print(f"\n[{platform}] 퍼널:")
    print(counts)
    cvr = round(100 * counts['purchase'] / counts['visit'],2)
    print(f"최종 전환율 : {cvr}%")


[iOS] 퍼널:
event_name
visit             830
view_item         785
add_to_cart       694
begin_checkout    405
purchase          237
Name: user_id, dtype: int64
최종 전환율 : 28.55%

[Android] 퍼널:
event_name
visit             704
view_item         676
add_to_cart       580
begin_checkout    280
purchase          152
Name: user_id, dtype: int64
최종 전환율 : 21.59%

[Web] 퍼널:
event_name
visit             466
view_item         441
add_to_cart       386
begin_checkout    178
purchase           89
Name: user_id, dtype: int64
최종 전환율 : 19.1%
