In [1]:
import pandas as pd
import random
import numpy as np

In [2]:
df = pd.read_csv("Ads_Clicks.csv")

In [3]:
df.head()

Unnamed: 0,Ad 1,Ad 2,Ad 3,Ad 4,Ad 5,Ad 6,Ad 7,Ad 8,Ad 9,Ad 10
0,1,0,0,0,1,0,0,0,1,0
1,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0


## Epsilon Greedy

In [26]:
class EpsilonGreedy:
    def __init__(self, epsilon: float) -> None:
        self.values = [0 for _ in range(10)]
        self.iterations = 0
        self.epsilon = epsilon

    def update(self, xs: [int]):
        is_greedy = random.choices([False, True], [self.epsilon, 1 - self.epsilon])[0]

        arm = self.best_arm() if is_greedy else self.random_arm()
        reward = xs[arm]
        
        self.iterations += 1

        self.values[arm] += reward
        
    def best_arm(self) -> int:
        return np.argmax(self.values)

    def random_arm(self) -> int:
        return random.randint(0, 9)

    def get_result(self) -> [int]:
        return self.values

In [27]:
eg = EpsilonGreedy(0.01)

counter = 0
for xs in df.values:
    counter +=1 

    eg.update(xs)
    if counter > 2000:
        break

print(eg.get_result())

[339, 1, 0, 0, 1, 0, 0, 1, 0, 0]


In [28]:
eg = EpsilonGreedy(0.3)

counter = 0
for xs in df.values:
    counter +=1 

    eg.update(xs)
    if counter > 2000:
        break

print(eg.get_result())

[236, 6, 5, 5, 11, 1, 2, 8, 6, 4]


## UCB

In [38]:
class UCB:
    def __init__(self, c: float) -> None:
        self.values = np.zeros(10)
        self.iterations = 0
        self.actions = np.zeros(10)
        self.c = c

    def update(self, xs: [int]):
        arm = self.get_arm()
        reward = xs[arm]

        self.actions[arm] += 1
        self.iterations += 1

        self.values[arm] += reward
        
    def get_arm(self) -> int:
        avg = self.values / self.actions
        arm = np.argmax(self.values + self.c * np.sqrt(np.log(self.iterations + 1) / self.actions))
        return arm

    def get_result(self) -> [int]:
        return self.values

In [42]:
ucb = UCB(1.5)

counter = 0
for xs in df.values:
    counter +=1 

    ucb.update(xs)
    if counter > 2000:
        break

print(ucb.get_result())

[  1.   0.   0.   0. 540.   0.   0.   0.   0.   1.]


  avg = self.values / self.actions
  arm = np.argmax(self.values + self.c * np.sqrt(np.log(self.iterations + 1) / self.actions))
  arm = np.argmax(self.values + self.c * np.sqrt(np.log(self.iterations + 1) / self.actions))
