In [None]:
#| echo: false
import sys
print(sys.executable)
print(sys.path)

#| context: setup

import numpy as np
import json

class Arm:
    def __init__(self, id):
        self.id = id
        self.q = np.random.normal(0, 1)
        self.pulls = 0
        self.total_reward = 0

    def pull(self):
        self.pulls += 1
        reward = np.random.normal(self.q, 1)
        self.total_reward += reward
        return reward

    @property
    def average_reward(self):
        return 0 if self.pulls == 0 else self.total_reward / self.pulls

class Bandit:
    def __init__(self, num_arms):
        self.arms = [Arm(i+1) for i in range(num_arms)]
        self.total_pulls = 0
        self.total_reward = 0

    def pull_arm(self, arm_id, times=1):
        arm = self.arms[arm_id - 1]
        total_reward = 0
        actual_pulls = min(times, 1000 - self.total_pulls)
        for _ in range(actual_pulls):
            reward = arm.pull()
            total_reward += reward
            self.total_pulls += 1
            self.total_reward += reward
        return total_reward

    def reset(self):
        for arm in self.arms:
            arm.q = np.random.normal(0, 1)
            arm.pulls = 0
            arm.total_reward = 0
        self.total_pulls = 0
        self.total_reward = 0

    def get_state(self):
        return {
            'arms': [{'id': arm.id, 'average_reward': arm.average_reward, 'pulls': arm.pulls} for arm in self.arms],
            'total_pulls': self.total_pulls,
            'total_reward': self.total_reward
        }

bandit = Bandit(10)

def pull_arm(arm_id, times):
    reward = bandit.pull_arm(arm_id, times)
    return json.dumps({'reward': reward, 'state': bandit.get_state()})

def reset_bandit():
    bandit.reset()
    return json.dumps(bandit.get_state())

<div id="bandit-container">
  <table id="bandit-table">
    <thead>
      <tr>
        <th>Arm</th>
        <th>Average Reward</th>
        <th>Pulls</th>
        <th>Actions</th>
      </tr>
    </thead>
    <tbody id="arms-container">
    </tbody>
  </table>
  <div id="stats-container"></div>
  <button id="reset-button">Reset</button>
</div>
<div id="debug-output"></div>

<script>
document.addEventListener('DOMContentLoaded', (event) => {
  const debug = (message) => {
    document.getElementById('debug-output').innerHTML += message + '<br>';
  };

  debug('Script started');

  const armsContainer = document.getElementById('arms-container');
  const statsContainer = document.getElementById('stats-container');
  const resetButton = document.getElementById('reset-button');

  function updateStats(state) {
    statsContainer.innerHTML = `
      <p>Total Pulls: ${state.total_pulls}</p>
      <p>Total Reward: ${state.total_reward.toFixed(2)}</p>
      <p>Average Reward per Pull: ${(state.total_reward / state.total_pulls || 0).toFixed(2)}</p>
    `;
  }

  function createArmRows(state) {
    armsContainer.innerHTML = '';
    state.arms.forEach(arm => {
      const row = document.createElement('tr');
      
      const armIdCell = document.createElement('td');
      armIdCell.textContent = `Arm ${arm.id}`;
      
      const avgRewardCell = document.createElement('td');
      avgRewardCell.id = `arm-avg-${arm.id}`;
      avgRewardCell.textContent = arm.average_reward.toFixed(2);
      
      const pullsCell = document.createElement('td');
      pullsCell.id = `arm-pulls-${arm.id}`;
      pullsCell.textContent = arm.pulls;
      
      const actionsCell = document.createElement('td');
      ['1', '10', '100', 'Max'].forEach(pullAmount => {
        const button = document.createElement('button');
        button.textContent = pullAmount;
        button.onclick = () => {
          const pulls = pullAmount === 'Max' ? 1000 : parseInt(pullAmount);
          pywebvis.pull_arm(arm.id, pulls).then(result => {
            const data = JSON.parse(result);
            updateStats(data.state);
            createArmRows(data.state);
            debug(`Pulled arm ${arm.id} ${pulls} times, total reward: ${data.reward.toFixed(2)}`);
          });
        };
        actionsCell.appendChild(button);
      });
      
      row.appendChild(armIdCell);
      row.appendChild(avgRewardCell);
      row.appendChild(pullsCell);
      row.appendChild(actionsCell);
      
      armsContainer.appendChild(row);
    });
    debug('Arm rows updated');
  }

  resetButton.onclick = () => {
    pywebvis.reset_bandit().then(result => {
      const state = JSON.parse(result);
      updateStats(state);
      createArmRows(state);
      debug('Bandit reset');
    });
  };

  pywebvis.reset_bandit().then(result => {
    const state = JSON.parse(result);
    createArmRows(state);
    updateStats(state);
    debug('Initial setup complete');
  });
});
</script>

<style>
#bandit-container {
  font-family: Arial, sans-serif;
  width: 100%;
  padding: 20px;
  box-sizing: border-box;
}

#bandit-table {
  width: 100%;
  border-collapse: collapse;
  margin-bottom: 20px;
}

#bandit-table th, #bandit-table td {
  border: 1px solid #ddd;
  padding: 8px;
  text-align: left;
}

#bandit-table th {
  background-color: #f2f2f2;
}

#bandit-table button {
  margin-right: 5px;
}

#stats-container {
  background-color: #f0f0f0;
  padding: 15px;
  border-radius: 5px;
  margin-bottom: 20px;
}

#reset-button {
  display: block;
  margin: 0 auto;
  padding: 10px 20px;
  font-size: 18px;
}

#debug-output {
  margin-top: 20px;
  padding: 10px;
  background-color: #f0f0f0;
  border: 1px solid #ccc;
}
</style>