In [1]:
import pandas as pd
import numpy as np
from itertools import permutations

# 데이터 셋 생성

In [2]:
def arrayToDf(phase_name: str, array, col_name: str) -> pd.DataFrame: 
    
    """
    array를 받아서 원하는 범주에 할당하여 데이터프레임을 생성합니다.
    """
    
    dfFromArray = pd.DataFrame(data=array, index=[phase_name]*len(np_array), columns=[col_name]).reset_index()
    dfFromArray.columns = ['category', col_name]
    
    return dfFromArray
    

In [3]:
user_a = np.arange(100)
user_b = np.arange(70)
user_c = np.arange(50)
user_d = np.arange(30)

In [4]:
user_df = pd.DataFrame()

for np_array, category in zip([user_a, user_b, user_c, user_d], ['phase1', 'phase2', 'phase3', 'phase4']):
    user_df = pd.concat([user_df, arrayToDf(category, np_array, 'userId')])

In [5]:
user_df.head()

Unnamed: 0,category,userId
0,phase1,0
1,phase1,1
2,phase1,2
3,phase1,3
4,phase1,4


# 퍼널 1(순서대로 진행 되는 경우)

In [72]:
def make_funnel(df: pd.DataFrame, category: str, user_list: str, phase_list: list) -> pd.DataFrame:
    
    """
    
    """
    
    empty_list = []
    
    intersectArray = np.array(df.loc[(df[category] == phase_list[0]), user_list]) #array 초기화
    initialCnt = len(intersectArray)
    phase_name = phase_list[0] #시작 퍼널
    
    for phase in phase_list:
        
        phaseArray = np.array(df.loc[(df[category] == phase), user_list])
        intersectArray = np.intersect1d(intersectArray, phaseArray) #퍼널 통과 유저 array 
        intersectCnt = len(intersectArray)
        
        if phase == phase_list[0]:
            phase_name = phase_name
            
        else:
            phase_name += '->' + phase
        
        empty_list.append(["->".join(phase_list), phase_name, intersectCnt, intersectCnt/initialCnt])
            
    df = pd.DataFrame(data=empty_list, columns = ['퍼널이름', '퍼널단계', '유저수', '전환율'])
    
    return df
        

In [73]:
testDf = make_funnel(user_df, 'category', 'userId', ['phase1', 'phase2', 'phase3', 'phase4'])

In [6]:
np.array(user_df.loc[(user_df.category == 'phase4'), 'userId'])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [74]:
#한 번에 여러 스타일을 적용해야함.
testDf.style.format({'전환율':'{:,.1%}'.format})\
        .set_properties(**{'text-align': 'left'})\
        .bar(subset=['전환율'], width=100, align='left', vmin=0, vmax=1)\
        .set_table_styles(
                [{"selector": "", "props": [("border", "1px solid grey")]},
                 {"selector": "tbody td", "props": [("border", "1px solid grey")]},
                 {"selector": "th", "props": [("border", "1px solid grey")]}])

Unnamed: 0,퍼널이름,퍼널단계,유저수,전환율
0,phase1->phase2->phase3->phase4,phase1,100,100.0%
1,phase1->phase2->phase3->phase4,phase1->phase2,70,70.0%
2,phase1->phase2->phase3->phase4,phase1->phase2->phase3,50,50.0%
3,phase1->phase2->phase3->phase4,phase1->phase2->phase3->phase4,30,30.0%


# 퍼널 2(건너뛸 수 있는 경우)

In [149]:
def funnelNameList(cntPhase: int, check: list = []) -> list:

    """
    
    """
    
    empty_list = []
    
    array = [i for i in range(1, cntPhase)]
    
    for i in range(1, len(array)+1):
        empty_list.extend(permutations(array, i))
        
    sorted_list = list(set(["".join(sorted([str(k) for k in element])) for element in empty_list]))
    final_list = [[0] + [int(k) for k in list_a] for list_a in sorted_list if set(check).issubset([int(k) for k in list_a]) == True]
    
    return final_list

In [150]:
user_a = np.arange(100)
user_b = np.random.choice(range(10, 90), size=70, replace=False)
user_c = np.random.choice(range(10, 90), size=60, replace=False)
user_d = np.random.choice(range(10, 90), size=50, replace=False)

In [151]:
user_df = pd.DataFrame()

for np_array, category in zip([user_a, user_b, user_c, user_d], ['phase1', 'phase2', 'phase3', 'phase4']):
    user_df = pd.concat([user_df, arrayToDf(category, np_array, 'userId')])

In [152]:
funnelNameList(4, [1])

[[0, 1], [0, 1, 3], [0, 1, 2, 3], [0, 1, 2]]

In [153]:
phase_list = ['phase1', 'phase2', 'phase3', 'phase4']
testDF2 = pd.DataFrame()

for funnel in funnelNameList(4, [1]):
    
    df = make_funnel(user_df, 'category', 'userId', [phase_list[i] for i in funnel])
    
    testDF2 = pd.concat([testDF2, df])


In [154]:
testDF2['idx1'] = testDF2['퍼널이름'].map(lambda x : len(x))

In [158]:
testDF2 = testDF2.sort_values(by=['idx1'], ascending=[False]).iloc[:, [0, 1, 2, 3]]

In [159]:
testDF2.reset_index(drop=True).style.format({'전환율':'{:,.1%}'.format})\
        .set_properties(**{'text-align': 'left'})\
        .bar(subset=['전환율'], width=100, align='left', vmin=0, vmax=1)\
        .set_table_styles(
                [{"selector": "", "props": [("border", "1px solid grey")]},
                 {"selector": "tbody td", "props": [("border", "1px solid grey")]},
                 {"selector": "th", "props": [("border", "1px solid grey")]}])

Unnamed: 0,퍼널이름,퍼널단계,유저수,전환율
0,phase1->phase2->phase3->phase4,phase1,100,100.0%
1,phase1->phase2->phase3->phase4,phase1->phase2,70,70.0%
2,phase1->phase2->phase3->phase4,phase1->phase2->phase3,53,53.0%
3,phase1->phase2->phase3->phase4,phase1->phase2->phase3->phase4,35,35.0%
4,phase1->phase2->phase4,phase1,100,100.0%
5,phase1->phase2->phase4,phase1->phase2,70,70.0%
6,phase1->phase2->phase4,phase1->phase2->phase4,46,46.0%
7,phase1->phase2->phase3,phase1,100,100.0%
8,phase1->phase2->phase3,phase1->phase2,70,70.0%
9,phase1->phase2->phase3,phase1->phase2->phase3,53,53.0%
