<a href="https://colab.research.google.com/github/anaysingh/RL_Lab_21CSU011/blob/main/thompson_sampling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random

# Number of arms (slot machines)
num_arms = 5

# True reward probabilities for each arm (unknown in practice)
true_probs = [0.9, 0.8, 0.7, 0.6, 0.5]

# Initialize variables for Thompson Sampling
num_pulls = [0] * num_arms
num_successes = [0] * num_arms

# Number of rounds or time steps
num_rounds = 1000

# Run Thompson Sampling
for t in range(num_rounds):
    # Sample from the Beta distribution for each arm
    sampled_means = [random.betavariate(num_successes[i] + 1, num_pulls[i] - num_successes[i] + 1) for i in range(num_arms)]

    # Choose the arm with the highest sampled mean
    chosen_arm = np.argmax(sampled_means)

    # Simulate pulling the chosen arm and observing the reward (1 for success, 0 for failure)
    reward = int(np.random.rand() < true_probs[chosen_arm])

    # Update statistics
    num_pulls[chosen_arm] += 1
    num_successes[chosen_arm] += reward

    # Print the current best arm and its estimated success probability
    best_arm = np.argmax(true_probs)
    estimated_probs = [num_successes[i] / num_pulls[i] if num_pulls[i] > 0 else 0 for i in range(num_arms)]
    print(f"Round {t + 1}: Chose Arm {chosen_arm + 1}, True Best Arm: {best_arm + 1}, Estimated Probabilities: {estimated_probs}")

# Print the final estimated success probabilities for each arm
estimated_probs = [num_successes[i] / num_pulls[i] if num_pulls[i] > 0 else 0 for i in range(num_arms)]
print("Final Estimated Probabilities:")
for i in range(num_arms):
    print(f"Arm {i + 1}: {estimated_probs[i]}")


Round 1: Chose Arm 1, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 0, 0, 0]
Round 2: Chose Arm 3, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 1.0, 0, 0]
Round 3: Chose Arm 3, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 1.0, 0, 0]
Round 4: Chose Arm 4, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 1.0, 1.0, 0]
Round 5: Chose Arm 4, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 1.0, 0.5, 0]
Round 6: Chose Arm 3, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 1.0, 0.5, 0]
Round 7: Chose Arm 3, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 0.75, 0.5, 0]
Round 8: Chose Arm 5, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 0.75, 0.5, 0.0]
Round 9: Chose Arm 4, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 0.75, 0.6666666666666666, 0.0]
Round 10: Chose Arm 3, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 0.8, 0.6666666666666666, 0.0]
Round 11: Chose Arm 3, True Best Arm: 1, Estimated Probabilities: [0.0, 0, 0.6666666666666666, 0.6