In [298]:
from math import sqrt, log
import pandas as pd

In [299]:
data = pd.DataFrame({
    "Pull": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,25, 26, 27,28, 29, 30, 31, 32, 33,34, 35,36, 37, 38, 39, 40, 41,42,43,44,45,46,47,48,49,50],        # Pull number
    "Machine": ["A", "B", "C", "A", "B", "C", "B", "B", "A","C", "B", "A", "C", "B", "A", "A","C", "A", "B", "C", "A", "B", "C","A","B","C","A","A","A","A","A","A","A","A","A","A", "A","A","B", "A", "C","C","C","C","B","C","A","B","A","C"], # Which machine was played
    "Reward": [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 , 0, 0, 4, 0,1, 0, 1, 1, 0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0]         # Reward received from that machine
})

In [300]:
def calculate_ucb(df):
    results = []
    total_pulls = len(df)
    machines = sorted(df["Machine"].unique())

    for machine in machines:
        machine_data = df[df["Machine"] == machine]
        n_j = len(machine_data)
        avg_reward = machine_data["Reward"].mean()
        exploration_bonus = sqrt((2 * log(total_pulls)) / n_j) if n_j > 0 else float("inf")
        ucb_score = avg_reward + exploration_bonus
        results.append({
            "Machine": machine,
            "Pulls": n_j,
            "Avg Reward": round(avg_reward, 2),
            "Exploration Bonus": round(exploration_bonus, 4),
            "UCB Score": round(ucb_score, 4)
        })
    return pd.DataFrame(results).sort_values(by="UCB Score", ascending=False)

In [301]:
ucb_table = calculate_ucb(data)
print("📊 UCB Results Table")
print(ucb_table)

📊 UCB Results Table
  Machine  Pulls  Avg Reward  Exploration Bonus  UCB Score
0       A     24        0.33             0.5710     0.9043
1       B     12        0.08             0.8075     0.8908
2       C     14        0.14             0.7476     0.8904


In [302]:
best_machine = ucb_table.iloc[0]["Machine"]
print(f"\n🎯 Recommended next machine to play: {best_machine}")


🎯 Recommended next machine to play: A


## REFLECTION


After 50 spins, the machine with the highest number of pulls and the highest average reward is Machine A, the candy store machine, which has 24 pulls and an average reward of 0.33. In comparison, Machine B, the party bonus machine, has only 12 pulls and an average reward of 0.08, while Machine C, the fruit smoothie machine, has 14 pulls and an average reward of 0.14.