In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [27]:
pd.options.display.max_rows= 50

In [2]:
FILES_DIR = './files/'
total = pd.read_csv(FILES_DIR + 'total.csv')

In [3]:
raw_data = total.copy()

In [4]:
raw_data.drop('Unnamed: 0', axis=1, inplace=True)

In [5]:
# 나이(age)가 17세 이하인 사용자를 찾아서 age(Group) 컬럼에 "00 ~ 17"이라는 값을 넣어줍니다
raw_data.loc[raw_data['age'] <= 17, 'age(Group)'] = "00 ~ 17"

# 위 방식에 따라 나이 설정 값으로 지정해줍니다
raw_data.loc[(raw_data['age'] >= 18) &\
               (raw_data['age'] <= 24), 'age(Group)'] = "18 ~ 24"

raw_data.loc[(raw_data['age'] >= 25) &\
               (raw_data['age'] <= 35), 'age(Group)'] = "25 ~ 35"

raw_data.loc[(raw_data['age'] >= 36) &\
               (raw_data['age'] <= 44), 'age(Group)'] = "36 ~ 44"

raw_data.loc[(raw_data['age'] >= 45) &\
               (raw_data['age'] <= 54), 'age(Group)'] = "45 ~ 54"

raw_data.loc[raw_data['age'] >= 55, 'age(Group)'] = "55 ~ 99"

# 데이터를 정리 후 아래 2개 컬럼 내용을 확인합니다
raw_data[['age','age(Group)']]

Unnamed: 0,age,age(Group)
0,54,45 ~ 54
1,54,45 ~ 54
2,54,45 ~ 54
3,54,45 ~ 54
4,54,45 ~ 54
...,...,...
5544525,38,36 ~ 44
5544526,32,25 ~ 35
5544527,28,25 ~ 35
5544528,34,25 ~ 35


In [21]:
raw_data.event_name.unique()

array(['click_item', 'add_to_cart', 'purchase_success', 'like_item'],
      dtype=object)

In [8]:
raw_data['category1_name'].unique()

array(['의류', '잡화', '가구/인테리어', '디지털/가전', '생활/건강', '식품', '성년의날', '스포츠/레저',
       '키즈', '컬처', '화장품/미용', '반려동물', '홈_OLD', '패션잡화_OLD', '여성패션_OLD',
       '라이프스타일_OLD', '뷰티_OLD'], dtype=object)

In [22]:
suc_df = raw_data[raw_data['event_name'] == 'purchase_success']

In [28]:
col_list = ['의류', '잡화', '가구/인테리어', '디지털/가전', '생활/건강', '식품', '성년의날', '스포츠/레저',
       '키즈', '컬처', '화장품/미용', '반려동물']
for i in col_list:
    avg_std = suc_df[suc_df['category1_name'] == i]
    print(f"{i} : {avg_std['price'].mean()}")
    print(f"{i} : {avg_std['price'].std()}")
    print('-' * 10)


의류 : 68901.74518211052
의류 : 47736.38380882577
----------
잡화 : 66989.297224891
잡화 : 71552.95251065829
----------
가구/인테리어 : 48420.62858750703
가구/인테리어 : 92756.37712311231
----------
디지털/가전 : 67902.64635887672
디지털/가전 : 147050.24607664515
----------
생활/건강 : 21833.75621272366
생활/건강 : 28441.860153150923
----------
식품 : 24389.168536671783
식품 : 23225.66346350335
----------
성년의날 : 60574.91821155943
성년의날 : 13682.678907831954
----------
스포츠/레저 : 46826.033891850726
스포츠/레저 : 63937.405363566584
----------
키즈 : 54079.76470588235
키즈 : 25918.091783304546
----------
컬처 : 15995.54243261263
컬처 : 17622.820213657444
----------
화장품/미용 : 22758.647168895972
화장품/미용 : 21667.97309165789
----------
반려동물 : 36115.82733812949
반려동물 : 35360.73354975525
----------


In [33]:
# pivot_table을 사용하여 index(세로)에는 brand_name 넣어주고,
# Columns(가로)에는 event_name 넣어줍니다
# 여기서 fill_value에 0을 넣어줘야, 데이터가 없을 경우 NaN이 아닌 0이 나옵니다
# 그리고 aggfunc에 len 를 넣어주면 click_item, add_to_cart, like_item, purchase_success 누적 개수가 나옵니다
table_by_brand = pd.pivot_table(suc_df,
                                index='category1_name',
                                columns='event_name',
                                values='price',
                                fill_value=0,
                                aggfunc='sum')

# add_to_cart, click_item, like_item, purchase_success 총 인원 수를 더해서 total이라는 새로운 컬럼을 추가합니다
# table_by_brand['total'] = table_by_brand['add_to_cart'] + table_by_brand['click_item'] + table_by_brand['like_item'] + table_by_brand['purchase_success']

# 위에서 추가한 total이라는 컬럼으로 completed(결제 완료) 컬럼을 나누면 결제 확률, 전환율(Conversion)이 나옵니다
table_by_brand['conversion'] = table_by_brand['purchase_success'] / table_by_brand['purchase_success'].sum()

# 결과를 확인합니다
table_by_brand.sort_values(by='conversion', ascending=False)

event_name,purchase_success,conversion
category1_name,Unnamed: 1_level_1,Unnamed: 2_level_1
의류,15105674105,0.577359
잡화,7251457446,0.27716
디지털/가전,1141307680,0.043622
가구/인테리어,860434570,0.032887
생활/건강,702872280,0.026865
화장품/미용,328384520,0.012551
스포츠/레저,245930330,0.0094
식품,206503090,0.007893
컬처,167937200,0.006419
성년의날,55547200,0.002123


In [54]:
actions = col_list  # Actions
mean = {'의류': 68901, '잡화': 66989, '가구/인테리어': 48420, '디지털/가전': 67902, '생활/건강': 21833, '식품': 24389, '성년의날': 60574,
        '스포츠/레저': 46826, '키즈': 54079, '컬처': 15995, '화장품/미용': 22758, '반려동물': 36115} # Mean rewards (actual values)
sd = {'의류': 47736, '잡화': 71552, '가구/인테리어': 92756, '디지털/가전': 147050, '생활/건강': 28441, '식품': 23225, '성년의날': 13682,
        '스포츠/레저': 63937, '키즈': 25918, '컬처': 17622, '화장품/미용': 21667, '반려동물': 35360}  # Standard deviation of rewards

first_rewards = {'의류': 0.577359, '잡화': 0.277160, '가구/인테리어': 0.032887, '디지털/가전': 0.043622, '생활/건강': 0.026865, '식품': 0.007893, '성년의날': 0.002123,
        '스포츠/레저': 0.009400, '키즈': 0.001757, '컬처': 0.006419, '화장품/미용': 0.012551, '반려동물': 0.001919}  # Rewards = R_1
first_counts = {'의류': 0, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0,
        '스포츠/레저': 0, '키즈': 0, '컬처': 0, '화장품/미용': 0, '반려동물': 0}  # Counts = 1
first_values = {'의류': 0.577359, '잡화': 0.277160, '가구/인테리어': 0.032887, '디지털/가전': 0.043622, '생활/건강': 0.026865, '식품': 0.007893, '성년의날': 0.002123,
        '스포츠/레저': 0.009400, '키즈': 0.001757, '컬처': 0.006419, '화장품/미용': 0.012551, '반려동물': 0.001919}  # Values = Q_1 (a)
first = {"first_rewards": first_rewards, "first_counts": first_counts, "first_values": first_values}

first


{'first_rewards': {'의류': 0.577359,
  '잡화': 0.27716,
  '가구/인테리어': 0.032887,
  '디지털/가전': 0.043622,
  '생활/건강': 0.026865,
  '식품': 0.007893,
  '성년의날': 0.002123,
  '스포츠/레저': 0.0094,
  '키즈': 0.001757,
  '컬처': 0.006419,
  '화장품/미용': 0.012551,
  '반려동물': 0.001919},
 'first_counts': {'의류': 0,
  '잡화': 1,
  '가구/인테리어': 0,
  '디지털/가전': 0,
  '생활/건강': 0,
  '식품': 0,
  '성년의날': 0,
  '스포츠/레저': 0,
  '키즈': 0,
  '컬처': 0,
  '화장품/미용': 0,
  '반려동물': 0},
 'first_values': {'의류': 0.577359,
  '잡화': 0.27716,
  '가구/인테리어': 0.032887,
  '디지털/가전': 0.043622,
  '생활/건강': 0.026865,
  '식품': 0.007893,
  '성년의날': 0.002123,
  '스포츠/레저': 0.0094,
  '키즈': 0.001757,
  '컬처': 0.006419,
  '화장품/미용': 0.012551,
  '반려동물': 0.001919}}

In [46]:
def greedy(n_iter, actions, first, seed):
    np.random.seed(seed)
    rewards = first['first_rewards']
    counts = first['first_counts']
    values = first['first_values']
    
    for t in range(2, n_iter + 1):
        best = actions[np.argmax(values)]  # 행동 = A_t
        R = np.random.normal(loc=mean[best], scale=sd[best])  # R_t
        rewards[best] += R
        counts[best] += 1
        values[best] = rewards[best] / counts[best]  # 가치 계산
        
        if t % 10 == 0:
            print(f"{t}번째")
            print(counts)
    
    return {'counts': counts, 'rewards': rewards, 'values': values}

greedy(100, actions, first, seed=5)

10번째
{'의류': 9, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0, '스포츠/레저': 0, '키즈': 0, '컬쳐': 0, '화장품/미용': 0, '반려동물': 0}
20번째
{'의류': 19, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0, '스포츠/레저': 0, '키즈': 0, '컬쳐': 0, '화장품/미용': 0, '반려동물': 0}
30번째
{'의류': 29, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0, '스포츠/레저': 0, '키즈': 0, '컬쳐': 0, '화장품/미용': 0, '반려동물': 0}
40번째
{'의류': 39, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0, '스포츠/레저': 0, '키즈': 0, '컬쳐': 0, '화장품/미용': 0, '반려동물': 0}
50번째
{'의류': 49, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0, '스포츠/레저': 0, '키즈': 0, '컬쳐': 0, '화장품/미용': 0, '반려동물': 0}
60번째
{'의류': 59, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0, '스포츠/레저': 0, '키즈': 0, '컬쳐': 0, '화장품/미용': 0, '반려동물': 0}
70번째
{'의류': 69, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 0, '스포츠/레저': 0, '키즈': 0, '컬쳐': 0, '화장품/미용': 0, '반려동물': 0}
80번째
{'의류': 79, '잡화': 1, '가구

{'counts': {'의류': 99,
  '잡화': 1,
  '가구/인테리어': 0,
  '디지털/가전': 0,
  '생활/건강': 0,
  '식품': 0,
  '성년의날': 0,
  '스포츠/레저': 0,
  '키즈': 0,
  '컬쳐': 0,
  '화장품/미용': 0,
  '반려동물': 0},
 'rewards': {'의류': 7234315.373460824,
  '잡화': 0.27716,
  '가구/인테리어': 0.032887,
  '디지털/가전': 0.043622,
  '생활/건강': 0.026865,
  '식품': 0.007893,
  '성년의날': 0.002123,
  '스포츠/레저': 0.0094,
  '키즈': 0.001757,
  '컬쳐': 0.006419,
  '화장품/미용': 0.012551,
  '반려동물': 0.001919},
 'values': {'의류': 73073.89266122044,
  '잡화': 0.27716,
  '가구/인테리어': 0.032887,
  '디지털/가전': 0.043622,
  '생활/건강': 0.026865,
  '식품': 0.007893,
  '성년의날': 0.002123,
  '스포츠/레저': 0.0094,
  '키즈': 0.001757,
  '컬쳐': 0.006419,
  '화장품/미용': 0.012551,
  '반려동물': 0.001919}}

In [49]:
def eGreedy(n_iter, actions, first, eps=0.3, seed=None):
    np.random.seed(seed)
    rewards = first['first_rewards']
    counts = first['first_counts']
    values = first['first_values']

    for t in range(2, n_iter + 1):
        if eps < np.random.uniform():
            best = actions[np.argmax(values)]
        else:
            best = np.random.choice(actions, 1)
            
        R = np.random.normal(loc=mean[best], scale=sd[best])
        rewards[best] += R
        counts[best] += 1
        values[best] = rewards[best] / counts[best]
        
        if t % 10 == 0:
            print(f"{t}번째")
            print(counts)
    
    return {'counts': counts, 'rewards': rewards, 'values': values}

eGreedy(100, actions, first, seed=5)

TypeError: unhashable type: 'numpy.ndarray'

In [55]:
def eGreedy(n_iter, actions, first, eps=0.3, seed=None):
    np.random.seed(seed)
    rewards = first['first_rewards']
    counts = first['first_counts']
    values = first['first_values']

    for t in range(2, n_iter + 1):
        if eps < np.random.uniform():
            best = actions[np.argmax(values)]
        else:
            best = np.random.choice(actions, 1)[0]
            
        R = np.random.normal(loc=mean[best], scale=sd[best])
        rewards[best] += R
        counts[best] += 1
        values[best] = rewards[best] / counts[best]
        
        if t % 10 == 0:
            print(f"{t}번째")
            print(counts)
    
    return {'counts': counts, 'rewards': rewards, 'values': values}


eGreedy(100, actions, first, seed=5)


10번째
{'의류': 6, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 0, '생활/건강': 0, '식품': 0, '성년의날': 2, '스포츠/레저': 0, '키즈': 0, '컬처': 0, '화장품/미용': 0, '반려동물': 1}
20번째
{'의류': 13, '잡화': 1, '가구/인테리어': 0, '디지털/가전': 1, '생활/건강': 0, '식품': 1, '성년의날': 3, '스포츠/레저': 0, '키즈': 0, '컬처': 0, '화장품/미용': 0, '반려동물': 1}
30번째
{'의류': 20, '잡화': 1, '가구/인테리어': 1, '디지털/가전': 1, '생활/건강': 0, '식품': 1, '성년의날': 3, '스포츠/레저': 1, '키즈': 1, '컬처': 0, '화장품/미용': 0, '반려동물': 1}
40번째
{'의류': 28, '잡화': 1, '가구/인테리어': 1, '디지털/가전': 1, '생활/건강': 0, '식품': 1, '성년의날': 3, '스포츠/레저': 1, '키즈': 1, '컬처': 2, '화장품/미용': 0, '반려동물': 1}
50번째
{'의류': 35, '잡화': 2, '가구/인테리어': 1, '디지털/가전': 1, '생활/건강': 0, '식품': 1, '성년의날': 3, '스포츠/레저': 1, '키즈': 3, '컬처': 2, '화장품/미용': 0, '반려동물': 1}
60번째
{'의류': 42, '잡화': 2, '가구/인테리어': 3, '디지털/가전': 1, '생활/건강': 0, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 3, '컬처': 2, '화장품/미용': 0, '반려동물': 1}
70번째
{'의류': 51, '잡화': 2, '가구/인테리어': 3, '디지털/가전': 1, '생활/건강': 0, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 3, '컬처': 2, '화장품/미용': 1, '반려동물': 1}
80번째
{'의류': 59, '잡화': 2, '가구

{'counts': {'의류': 73,
  '잡화': 3,
  '가구/인테리어': 4,
  '디지털/가전': 1,
  '생활/건강': 2,
  '식품': 2,
  '성년의날': 3,
  '스포츠/레저': 1,
  '키즈': 5,
  '컬처': 3,
  '화장품/미용': 2,
  '반려동물': 1},
 'rewards': {'의류': 5284528.100590315,
  '잡화': 312845.89228966116,
  '가구/인테리어': 251319.1601529097,
  '디지털/가전': 196972.64416646864,
  '생활/건강': 39942.82285813012,
  '식품': 82061.41757219384,
  '성년의날': 168033.72564409178,
  '스포츠/레저': 44113.842877096075,
  '키즈': 286483.94288043294,
  '컬처': 62396.366678748,
  '화장품/미용': 90607.86924709514,
  '반려동물': 35031.703560589965},
 'values': {'의류': 72390.79589849747,
  '잡화': 104281.96409655373,
  '가구/인테리어': 62829.790038227424,
  '디지털/가전': 196972.64416646864,
  '생활/건강': 19971.41142906506,
  '식품': 41030.70878609692,
  '성년의날': 56011.24188136393,
  '스포츠/레저': 44113.842877096075,
  '키즈': 57296.78857608659,
  '컬처': 20798.788892916,
  '화장품/미용': 45303.93462354757,
  '반려동물': 35031.703560589965}}

In [56]:
def UCB(n_iter, actions, first, control=2, seed=None):
    np.random.seed(seed)
    
    rewards = first['first_rewards']
    counts = first['first_counts']
    values = first['first_values']
    
    for t in range(2, n_iter + 1):
        # UCB algorithm for action selection
        best = max(actions, key=lambda a: values[a] + control * np.sqrt(np.log(t) / counts[a]))
        
        # Reward update
        R = np.random.normal(loc=mean[best], scale=sd[best])
        rewards[best] += R
        
        counts[best] += 1
        values[best] = rewards[best] / counts[best]
        
        if t % 10 == 0:
            print(f"{t}번째")
            print(counts)
        
    return {'counts': counts, 'rewards': rewards, 'values': values}

UCB(100, actions, first, seed=5)


10번째
{'의류': 73, '잡화': 3, '가구/인테리어': 4, '디지털/가전': 10, '생활/건강': 2, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 5, '컬처': 3, '화장품/미용': 2, '반려동물': 1}
20번째
{'의류': 73, '잡화': 6, '가구/인테리어': 4, '디지털/가전': 17, '생활/건강': 2, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 5, '컬처': 3, '화장품/미용': 2, '반려동물': 1}
30번째
{'의류': 79, '잡화': 6, '가구/인테리어': 4, '디지털/가전': 21, '생활/건강': 2, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 5, '컬처': 3, '화장품/미용': 2, '반려동물': 1}
40번째
{'의류': 89, '잡화': 6, '가구/인테리어': 4, '디지털/가전': 21, '생활/건강': 2, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 5, '컬처': 3, '화장품/미용': 2, '반려동물': 1}
50번째
{'의류': 99, '잡화': 6, '가구/인테리어': 4, '디지털/가전': 21, '생활/건강': 2, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 5, '컬처': 3, '화장품/미용': 2, '반려동물': 1}
60번째
{'의류': 109, '잡화': 6, '가구/인테리어': 4, '디지털/가전': 21, '생활/건강': 2, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 5, '컬처': 3, '화장품/미용': 2, '반려동물': 1}
70번째
{'의류': 119, '잡화': 6, '가구/인테리어': 4, '디지털/가전': 21, '생활/건강': 2, '식품': 2, '성년의날': 3, '스포츠/레저': 1, '키즈': 5, '컬처': 3, '화장품/미용': 2, '반려동물': 1}
80번째
{'의류': 129, '

{'counts': {'의류': 149,
  '잡화': 6,
  '가구/인테리어': 4,
  '디지털/가전': 21,
  '생활/건강': 2,
  '식품': 2,
  '성년의날': 3,
  '스포츠/레저': 1,
  '키즈': 5,
  '컬처': 3,
  '화장품/미용': 2,
  '반려동물': 1},
 'rewards': {'의류': 11017511.095676152,
  '잡화': 423214.2194532541,
  '가구/인테리어': 251319.1601529097,
  '디지털/가전': 1484321.1073728981,
  '생활/건강': 39942.82285813012,
  '식품': 82061.41757219384,
  '성년의날': 168033.72564409178,
  '스포츠/레저': 44113.842877096075,
  '키즈': 286483.94288043294,
  '컬처': 62396.366678748,
  '화장품/미용': 90607.86924709514,
  '반려동물': 35031.703560589965},
 'values': {'의류': 73943.0274877594,
  '잡화': 70535.70324220901,
  '가구/인테리어': 62829.790038227424,
  '디지털/가전': 70681.95749394753,
  '생활/건강': 19971.41142906506,
  '식품': 41030.70878609692,
  '성년의날': 56011.24188136393,
  '스포츠/레저': 44113.842877096075,
  '키즈': 57296.78857608659,
  '컬처': 20798.788892916,
  '화장품/미용': 45303.93462354757,
  '반려동물': 35031.703560589965}}

In [60]:
def Thompson(n_iter, actions, first, probs, seed=None):
    np.random.seed(seed)
    
    n_actions = len(actions)
    rewards = np.where(first['first_rewards'] > 0, 1, 0)
    counts = first['first_counts']
    values = first['first_values']
    
    for t in range(2, n_iter + 1):
        B = np.random.beta(rewards + 1, counts - rewards + 1)
        P = np.bincount(np.argmax(B, axis=1), minlength=n_actions) / B.shape[0]
        tmp = np.round(P * 1, 0)
        
        rewards += np.random.binomial(n_actions, probs)
        counts += tmp
    
    return dict(zip(actions, counts))

np.random.seed(5)
probs = np.array([
    np.sum(np.random.normal(mean['의류'], sd['의류'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['잡화'], sd['잡화'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['가구/인테리어'], sd['가구/인테리어'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['디지털/가전'], sd['디지털/가전'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['생활/건강'], sd['생활/건강'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['식품'], sd['식품'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['성년의날'], sd['성년의날'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['스포츠/레저'], sd['스포츠/레저'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['키즈'], sd['키즈'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['컬처'], sd['컬처'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['화장품/미용'], sd['화장품/미용'], size=1000) > 0) / 1000,
    np.sum(np.random.normal(mean['반려동물'], sd['반려동물'], size=1000) > 0) / 1000
])

probs = dict(zip(actions, probs))
probs
Thompson(100, actions, first, probs, seed=5)


TypeError: '>' not supported between instances of 'dict' and 'int'

In [None]:
['의류', '잡화', '가구/인테리어', '디지털/가전', '생활/건강', '식품', '성년의날', '스포츠/레저',
       '키즈', '컬처', '화장품/미용', '반려동물']