# Thompson Sampling for Slot Machines

In [98]:
import numpy as np
import os
# import torch

[seed](https://dacon.io/codeshare/2363)

In [99]:
class config:
    seed = 1021

In [100]:
def seed_everything(seed: int = 1021):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

In [101]:
seed_everything(config.seed)

In [102]:
conversionRates = [0.15, 0.04, 0.13, 0.11, 0.05] # 5개 슬롯머신의 승률
N = 2000 # 샘플 수
d = len(conversionRates) 

In [103]:
seed_everything(config.seed)
X = np.zeros((N, d)) # 2000 X 5

for i in range(N):
    for j in range(d):
        # X의 한 행
        if np.random.rand() < conversionRates[j]:
            # conversionRates[0] = 0.15이면 100번 중 0.15보다 작을 샘플은 15개(2000번이면 300개)
            X[i][j] = 1 # win
            
print(X)
print(np.sum([X[i][0] for i in range(N)])) # similar to 300

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0.]]
288.0


In [104]:
# 승패를 셀 배열
nPosReward = np.zeros(d)
nNegReward = np.zeros(d)

In [105]:
# beta distribution으로 best slot machine 선택 & 승패 update
for i in range(N):
    selected = 0 # 어떤 slot machine 선택했는지
    maxRandom = 0 # 모든 슬롯머신에서 가장 놓은 beta distribution approximation을 얻기 위해

In [109]:
# 톰슨 샘플링의 핵심: beta distribution에서 난수 받아서 전체 slot 머신에서 가장 높은 값 찾기
seed_everything(config.seed)
for j in range(d):
    randomBeta = np.random.beta(nPosReward[j] +1, nNegReward[j] +1)
    if randomBeta > maxRandom:
        maxRandom = randomBeta
        selected = j

In [110]:
maxRandom

0.8330824185165057

In [111]:
randomBeta

0.8330824185165057

In [112]:
# win or lose?
if X[i][selected] == 1:
    nPosReward[selected] += 1
else:
    nNegReward[selected] += 1

In [113]:
# flag the best slot machine
nSelected = nPosReward + nNegReward
for i in range(d):
    print("Machine number "+str(i+1)+" was selected "+str(nSelected[i]) + " times")
print("Conclusion: Best machine is machine number " + str(np.argmax(nSelected) +1))

Machine number 1 was selected 0.0 times
Machine number 2 was selected 0.0 times
Machine number 3 was selected 0.0 times
Machine number 4 was selected 0.0 times
Machine number 5 was selected 1.0 times
Conclusion: Best machine is machine number 5


In [119]:
# total code
seed_everything(config.seed)

conversionRates = [0.15, 0.04, 0.13, 0.11, 0.05]
N = 2000 
d = len(conversionRates) 

nPosReward = np.zeros(d)
nNegReward = np.zeros(d)

for i in range(N):
    selected = 0 
    maxRandom = 0 
    for j in range(d):
        randomBeta = np.random.beta(nPosReward[j] +1, nNegReward[j] +1)
        if randomBeta > maxRandom:
            maxRandom = randomBeta
            selected = j
    
    if X[i][selected] == 1:
        nPosReward[selected] += 1
    else:
        nNegReward[selected] += 1
        
nSelected = nPosReward + nNegReward

for i in range(d):
    print("Machine number "+str(i+1)+" was selected "+str(nSelected[i]) + " times")
print("Conclusion: Best machine is machine number " + str(np.argmax(nSelected) +1))

Machine number 1 was selected 524.0 times
Machine number 2 was selected 54.0 times
Machine number 3 was selected 1062.0 times
Machine number 4 was selected 281.0 times
Machine number 5 was selected 79.0 times
Conclusion: Best machine is machine number 3


In [120]:
# Models comparison

import numpy as np
import pandas as pd

N = [200, 1000, 5000] # number of samples
D = 20 # slot machine number
convRanges = [(0., 0.1), (0., 0.3), (0., 0.5)] # conversionRate range

results = list()
for n in N: # variations of sample numbers
    for ranges in convRanges: # variations of conversionRate range
        results.append([])
        for d  in range(3, D + 1): # variations of slot machine number
            p1 = 0
            p2 = 0

            for rounds in range(1000): # 1000 tests
                
                conversionRates = list() # initialize conversionRates
                for i in range(d):
                    conversionRates.append(np.random.uniform(low = ranges[0], high = ranges[1]))
                    
                X = np.zeros((n,d))
                for i in range(n):
                    for j in range(d):
                        if np.random.rand() < conversionRates[j]:
                            X[i][j] = 1
                
                nPosReward = np.zeros(d)
                nNegReward = np.zeros(d)
                
                for i in range(n):
                    selected = 0
                    maxRandom = 0
                    
                    for j in range(d):
                        randomBeta = np.random.beta(nPosReward[j] + 1, nNegReward[j] + 1)
                        if randomBeta > maxRandom:
                            maxRandom = randomBeta
                            selected = j
                        
                    if X[i][selected] == 1:
                        nPosReward[selected] += 1
                    else:
                        nNegReward[selected] += 1
                
                nSelected = nPosReward + nNegReward
                
                left = n - max(nSelected)
                
                countStandard = np.zeros(d)
                
                x = int(left / d)
                for i in range(x):
                    for j in range(d):
                        if X[i][j] == 1:
                            countStandard[j] += 1
                
                bestStandard = np.argmax(countStandard)
                bestReal = np.argmax(conversionRates)
                bestTS = np.argmax(nSelected)

                if bestTS == bestReal:
                    p1 += 1
                if bestStandard == bestReal:
                    p2 += 1
                
            print('N = ' + str(n) + ' d = ' + str(d) + ' range = ' + str(ranges) + ' | result Thompson Sampling = ' + str(p1) + ' result Standard solution = ' + str(p2))
            results.append([n, ranges, d, p1, p2])

N = 200 d = 3 range = (0.0, 0.1) | result Thompson Sampling = 648 result Standard solution = 572
N = 200 d = 4 range = (0.0, 0.1) | result Thompson Sampling = 534 result Standard solution = 487
N = 200 d = 5 range = (0.0, 0.1) | result Thompson Sampling = 470 result Standard solution = 425
N = 200 d = 6 range = (0.0, 0.1) | result Thompson Sampling = 411 result Standard solution = 367
N = 200 d = 7 range = (0.0, 0.1) | result Thompson Sampling = 394 result Standard solution = 347
N = 200 d = 8 range = (0.0, 0.1) | result Thompson Sampling = 311 result Standard solution = 315
N = 200 d = 9 range = (0.0, 0.1) | result Thompson Sampling = 283 result Standard solution = 265
N = 200 d = 10 range = (0.0, 0.1) | result Thompson Sampling = 258 result Standard solution = 248
N = 200 d = 11 range = (0.0, 0.1) | result Thompson Sampling = 273 result Standard solution = 231
N = 200 d = 12 range = (0.0, 0.1) | result Thompson Sampling = 250 result Standard solution = 180
N = 200 d = 13 range = (0.0

In [122]:
df = pd.DataFrame(results)
df.to_excel('ch5_results.xlsx', sheet_name = 'Result', index = False)

In [123]:
df.head(3)

Unnamed: 0,0,1,2,3,4
0,,,,,
1,200.0,"(0.0, 0.1)",3.0,648.0,572.0
2,200.0,"(0.0, 0.1)",4.0,534.0,487.0


In [125]:
df.tail(3)

Unnamed: 0,0,1,2,3,4
168,5000.0,"(0.0, 0.5)",18.0,749.0,501.0
169,5000.0,"(0.0, 0.5)",19.0,739.0,468.0
170,5000.0,"(0.0, 0.5)",20.0,703.0,447.0


In [124]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 171 entries, 0 to 170
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       162 non-null    float64
 1   1       162 non-null    object 
 2   2       162 non-null    float64
 3   3       162 non-null    float64
 4   4       162 non-null    float64
dtypes: float64(4), object(1)
memory usage: 6.8+ KB


In [None]:
# visualization with matplotlib
import matplotlib.pyplot as plt

