# Multi-Armed Bandit Model in Talyn

This notebook demonstrates how to model and simulate multi-armed bandit environments in Talyn, including agent strategies and regret tracking.


## 1. Define Reward Distributions for Each Arm


In [None]:
import numpy as np
n_arms = 3
true_means = [0.2, 0.5, 0.8]
def pull(arm):
    return np.random.binomial(1, true_means[arm])
print('Sample rewards:', [pull(a) for a in range(n_arms)])


## 2. Simulate Agent Strategies (ε-Greedy, Thompson Sampling)


In [None]:
def epsilon_greedy(eps=0.1, steps=200):
    counts = np.zeros(n_arms)
    values = np.zeros(n_arms)
    rewards = []
    for t in range(steps):
        if np.random.rand() < eps:
            arm = np.random.choice(n_arms)
        else:
            arm = np.argmax(values)
        reward = pull(arm)
        counts[arm] += 1
        values[arm] += (reward - values[arm]) / counts[arm]
        rewards.append(reward)
    return rewards, values
rewards, values = epsilon_greedy()
print('Final estimated values:', values)


In [None]:
def thompson_sampling(steps=200):
    alpha = np.ones(n_arms)
    beta = np.ones(n_arms)
    rewards = []
    for t in range(steps):
        samples = np.random.beta(alpha, beta)
        arm = np.argmax(samples)
        reward = pull(arm)
        alpha[arm] += reward
        beta[arm] += 1 - reward
        rewards.append(reward)
    return rewards, alpha, beta
rewards_ts, alpha, beta = thompson_sampling()
print('Final alpha:', alpha)
print('Final beta:', beta)


## 3. Track Regret Over Time


In [None]:
optimal = max(true_means)
regret = np.cumsum([optimal - r for r in rewards])
import matplotlib.pyplot as plt
plt.plot(regret, label='ε-Greedy')
regret_ts = np.cumsum([optimal - r for r in rewards_ts])
plt.plot(regret_ts, label='Thompson Sampling')
plt.title('Cumulative Regret', fontsize=14)
plt.xlabel('Step', fontsize=12)
plt.ylabel('Regret', fontsize=12)
plt.legend()
plt.show()


## 4. Visualize Posterior Belief Over Arms (Thompson Sampling)


In [None]:
x = np.linspace(0, 1, 100)
for i in range(n_arms):
    plt.plot(x, np.random.beta(alpha[i], beta[i], 100), label=f'Arm {i}')
plt.title('Posterior Samples for Each Arm', fontsize=14)
plt.xlabel('Mean Reward', fontsize=12)
plt.ylabel('Density', fontsize=12)
plt.legend()
plt.show()


---

This notebook demonstrated bandit models in Talyn. Next, we'll explore causal inference.
