In [1]:
# --- Offline simulation: probability of each action over 5 rounds (single slot) ---

from typing import Any, Dict, Iterable, Optional, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from obp.dataset import OpenBanditDataset
from plotly.subplots import make_subplots

SEED = 7
rng = np.random.RandomState(SEED)

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)



### Load the data

In [2]:

# --- Load OBD (Random / all as example) ---
ds = OpenBanditDataset(behavior_policy="random", campaign="all")
bf = ds.obtain_batch_bandit_feedback()

print("bandit_feedback keys:", bf.keys())
print(f"Rounds: {bf['n_rounds']:,} | n_actions: {bf['n_actions']} | len_list: {ds.len_list}")

# Build a DataFrame for convenience
df = pd.DataFrame({
    "action": bf["action"],
    "position": bf["position"],
    "reward": bf["reward"],
    "pscore": bf["pscore"],
})

df.head()

INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


bandit_feedback keys: dict_keys(['n_rounds', 'n_actions', 'action', 'position', 'reward', 'pscore', 'context', 'action_context'])
Rounds: 10,000 | n_actions: 80 | len_list: 3


Unnamed: 0,action,position,reward,pscore
0,14,2,0,0.0125
1,14,2,0,0.0125
2,27,2,0,0.0125
3,48,1,0,0.0125
4,36,1,0,0.0125


In [3]:
import pandas as pd
from thompson import TSMultiBanditSimulator
from data_processing import initialize_probability_distribution
from plotting import compare_pmf_across_rounds


In [4]:

n_actions = 80
sim = TSMultiBanditSimulator(
    df,
    n_actions=n_actions,
    n_positions=3,
    seed=2025,
    pmf_interval=1,      # store snapshots every round (P2 knob)
    pmf_mc_draws=10000,  # Monte Carlo draws per snapshot
)

# Optional: explicit uniform PMF at start (already stored by History.log_uniform_init())
init_pmf_df = initialize_probability_distribution(n_actions, 3, "uniform")

# Run a few rounds
sim.run(n_rounds=100)

# Assertions/checks
assert sim.after_first_round_is_non_uniform()  # Check: post-round-1 PMF != uniform

# Access tidy histories
pmf = sim.history.pmf_df()
actions = sim.history.actions_df()
rewards = sim.history.rewards_df()


In [5]:
compare_pmf_across_rounds(pmf, position=0, rounds=[0, 1, 90], phase="post")

compare_pmf_across_rounds(pmf, position=1, rounds=[0, 1, 90], phase="post")

# Compare snapshots for position 1 at rounds 0 (init), 1, and 5
compare_pmf_across_rounds(pmf, position=2, rounds=[0, 1, 90], phase="post")