## Chapter 5

### predictor.py

In [1]:
# Thompson Sampling for Slot Machines

# Importing the libraries
import numpy as np

# Setting conversion rates and the number of samples
conversionRates = [0.15, 0.04, 0.13, 0.11, 0.05]
N = 10000
d = len(conversionRates)

# Creating the dataset
X = np.zeros((N, d))
for i in range(N):
    for j in range(d):
        if np.random.rand() < conversionRates[j]:
            X[i][j] = 1

# Making arrays to count our losses and wins
nPosReward = np.zeros(d)
nNegReward = np.zeros(d)

# Taking our best slot machine through beta distibution and updating its losses and wins
for i in range(N):
    selected = 0
    maxRandom = 0
    for j in range(d):
        randomBeta = np.random.beta(nPosReward[j] + 1, nNegReward[j] + 1)
        if randomBeta > maxRandom:
            maxRandom = randomBeta
            selected = j
    if X[i][selected] == 1:
        nPosReward[selected] += 1
    else:
        nNegReward[selected] += 1

# Showing which slot machine is considered the best
nSelected = nPosReward + nNegReward 
for i in range(d):
    print('Machine number ' + str(i + 1) + ' was selected ' + str(nSelected[i]) + ' times and got rewards ' + str(nPosReward[i]))
print('Conclusion: Best machine is machine number ' + str(np.argmax(nSelected) + 1) + ' and total rewards is ' + str(np.sum(nPosReward)))

print('-------------------------------')

# Let's try UCB1 algo
sum_rewards_ucb = [0] * 9
numbers_of_selected_ucb = [0] * 9

for n in range(N):
    strategy_us = 0
    max_ucb = sum_rewards_ucb[0] / max(numbers_of_selected_ucb[0],1) + np.sqrt(2*np.log(max(n,1))/max(numbers_of_selected_ucb[0],1))
    for i in range(1, d):
        i_ucb = sum_rewards_ucb[i] / max(numbers_of_selected_ucb[i],1) + np.sqrt(2*np.log(max(n,1))/max(numbers_of_selected_ucb[i],1))
        if i_ucb > max_ucb:
            max_ucb = i_ucb
            strategy_us = i

    sum_rewards_ucb[strategy_us] = sum_rewards_ucb[strategy_us] + X[n, strategy_us]
    numbers_of_selected_ucb[strategy_us] = numbers_of_selected_ucb[strategy_us] + 1

# Showing which slot machine is considered the best
for i in range(d):
    print('Machine number ' + str(i + 1) + ' was selected ' + str(numbers_of_selected_ucb[i]) + ' times and got rewards ' + str(sum_rewards_ucb[i]))
print('Conclusion: Best machine is machine number ' + str(np.argmax(numbers_of_selected_ucb) + 1) + ' and total rewards is ' + str(np.sum(sum_rewards_ucb)))


Machine number 1 was selected 8608.0 times and got rewards 1293.0
Machine number 2 was selected 64.0 times and got rewards 1.0
Machine number 3 was selected 1075.0 times and got rewards 125.0
Machine number 4 was selected 144.0 times and got rewards 9.0
Machine number 5 was selected 109.0 times and got rewards 5.0
Conclusion: Best machine is machine number 1 and total rewards is 1433.0
-------------------------------
Machine number 1 was selected 4138 times and got rewards 604.0
Machine number 2 was selected 652 times and got rewards 28.0
Machine number 3 was selected 2689 times and got rewards 344.0
Machine number 4 was selected 1917 times and got rewards 216.0
Machine number 5 was selected 604 times and got rewards 22.0
Conclusion: Best machine is machine number 1 and total rewards is 1214.0


### comparison.py

In [1]:
# Models comparison

import numpy as np
import pandas as pd

N = [200, 1000, 5000]
D = 20
convRanges = [(0., 0.1), (0., 0.3), (0., 0.5)]

results = list()
for n in N:
    for ranges in convRanges:
        results.append([])
        for d  in range(3, D + 1):
            p1 = 0
            p2 = 0

            for rounds in range(1000):
                
                conversionRates = list()
                for i in range(d):
                    conversionRates.append(np.random.uniform(low = ranges[0], high = ranges[1]))
                    
                X = np.zeros((n,d))
                for i in range(n):
                    for j in range(d):
                        if np.random.rand() < conversionRates[j]:
                            X[i][j] = 1
                
                nPosReward = np.zeros(d)
                nNegReward = np.zeros(d)
                
                for i in range(n):
                    selected = 0
                    maxRandom = 0
                    
                    for j in range(d):
                        randomBeta = np.random.beta(nPosReward[j] + 1, nNegReward[j] + 1)
                        if randomBeta > maxRandom:
                            maxRandom = randomBeta
                            selected = j
                        
                    if X[i][selected] == 1:
                        nPosReward[selected] += 1
                    else:
                        nNegReward[selected] += 1
                
                nSelected = nPosReward + nNegReward
                
                left = n - max(nSelected)
                
                countStandard = np.zeros(d)
                
                x = int(left / d)
                for i in range(x):
                    for j in range(d):
                        if X[i][j] == 1:
                            countStandard[j] += 1
                
                bestStandard = np.argmax(countStandard)
                bestReal = np.argmax(conversionRates)
                bestTS = np.argmax(nSelected)

                if bestTS == bestReal:
                    p1 += 1
                if bestStandard == bestReal:
                    p2 += 1
                
            print('N = ' + str(n) + ' d = ' + str(d) + ' range = ' + str(ranges) + ' | result Thompson Sampling = ' + str(p1) + ' result Standard solution = ' + str(p2))
            results.append([n, ranges, d, p1, p2])
                
df = pd.DataFrame(results)
df.to_excel('results.xlsx', sheet_name = 'Result', index = False)

N = 200 d = 3 range = (0.0, 0.1) | result Thompson Sampling = 650 result Standard solution = 581
N = 200 d = 4 range = (0.0, 0.1) | result Thompson Sampling = 506 result Standard solution = 463
N = 200 d = 5 range = (0.0, 0.1) | result Thompson Sampling = 482 result Standard solution = 429
N = 200 d = 6 range = (0.0, 0.1) | result Thompson Sampling = 405 result Standard solution = 393
N = 200 d = 7 range = (0.0, 0.1) | result Thompson Sampling = 361 result Standard solution = 315
N = 200 d = 8 range = (0.0, 0.1) | result Thompson Sampling = 340 result Standard solution = 294
N = 200 d = 9 range = (0.0, 0.1) | result Thompson Sampling = 300 result Standard solution = 274
N = 200 d = 10 range = (0.0, 0.1) | result Thompson Sampling = 280 result Standard solution = 232
N = 200 d = 11 range = (0.0, 0.1) | result Thompson Sampling = 258 result Standard solution = 224
N = 200 d = 12 range = (0.0, 0.1) | result Thompson Sampling = 223 result Standard solution = 231
N = 200 d = 13 range = (0.0

N = 1000 d = 15 range = (0.0, 0.3) | result Thompson Sampling = 479 result Standard solution = 352
N = 1000 d = 16 range = (0.0, 0.3) | result Thompson Sampling = 459 result Standard solution = 324
N = 1000 d = 17 range = (0.0, 0.3) | result Thompson Sampling = 431 result Standard solution = 319
N = 1000 d = 18 range = (0.0, 0.3) | result Thompson Sampling = 404 result Standard solution = 299
N = 1000 d = 19 range = (0.0, 0.3) | result Thompson Sampling = 423 result Standard solution = 284
N = 1000 d = 20 range = (0.0, 0.3) | result Thompson Sampling = 402 result Standard solution = 285
N = 1000 d = 3 range = (0.0, 0.5) | result Thompson Sampling = 903 result Standard solution = 792
N = 1000 d = 4 range = (0.0, 0.5) | result Thompson Sampling = 873 result Standard solution = 724
N = 1000 d = 5 range = (0.0, 0.5) | result Thompson Sampling = 818 result Standard solution = 688
N = 1000 d = 6 range = (0.0, 0.5) | result Thompson Sampling = 806 result Standard solution = 648
N = 1000 d = 7