In [16]:
import pandas as pd
from obp.dataset import OpenBanditDataset
import numpy as np
ds = OpenBanditDataset(behavior_policy="random", campaign="all")
bf = ds.obtain_batch_bandit_feedback()
print(bf.keys())  # expect: n_rounds, n_actions, action, position, reward, pscore, context, action_context

INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


dict_keys(['n_rounds', 'n_actions', 'action', 'position', 'reward', 'pscore', 'context', 'action_context'])


# Basic shape sanity

In [None]:

n = bf["n_rounds"]
print("n_rounds:", n, "| n_actions:", bf["n_actions"], "| len_list:", ds.len_list)

for k in ["action", "position", "reward", "pscore"]:
    arr = bf[k]
    print(f"{k}: shape={getattr(arr, 'shape', None)}, dtype={getattr(arr, 'dtype', type(arr))}")

# --- Assertions (fail fast if something’s off) ---
assert isinstance(n, int) and n > 0
for k in ["action", "position", "reward", "pscore"]:
    assert len(bf[k]) == n, f"{k} length != n_rounds"

assert bf["pscore"].min() > 0, "Found zero/negative propensities"
assert set(np.unique(bf["position"])) <= {0,1,2}, "Positions must be 0-based {0,1,2}"
assert ds.len_list == 3, "OBD should have 3 slots"


n_rounds: 10000 | n_actions: 80 | len_list: 3
action: shape=(10000,), dtype=int64
position: shape=(10000,), dtype=int64
reward: shape=(10000,), dtype=int64
pscore: shape=(10000,), dtype=float64


In [22]:
# Contexts can be None in some versions; print if present
ctx = bf.get("context", None)
actx = bf.get("action_context", None)
print("context:", None if ctx is None else ctx.shape)
print("action_context:", None if actx is None else actx.shape)

context: (10000, 20)
action_context: (80, 4)


In [12]:
df = pd.DataFrame({
    "action": bf["action"],
    "position": bf["position"],
    "reward": bf["reward"],
    "pscore": bf["pscore"],
})
df.head()

Unnamed: 0,action,position,reward,pscore
0,14,2,0,0.0125
1,14,2,0,0.0125
2,27,2,0,0.0125
3,48,1,0,0.0125
4,36,1,0,0.0125


In [20]:
print("\nHead:\n", df.head())
print("DF shape:", df.shape)
print("CTR (overall):", df["reward"].mean())
print("CTR by slot (0-based):\n", df.groupby("position")["reward"].mean())


Head:
    action  position  reward  pscore
0      14         2       0  0.0125
1      14         2       0  0.0125
2      27         2       0  0.0125
3      48         1       0  0.0125
4      36         1       0  0.0125
DF shape: (10000, 4)
CTR (overall): 0.0038
CTR by slot (0-based):
 position
0    0.003913
1    0.004103
2    0.003368
Name: reward, dtype: float64


In [13]:
df.shape

(10000, 4)

In [14]:
bf.keys()

dict_keys(['n_rounds', 'n_actions', 'action', 'position', 'reward', 'pscore', 'context', 'action_context'])

### Making Sure the CTRs match using CSV vs OpenBanditDataset class 

In [15]:
# CSV (Random/all)
csv = pd.read_csv("../zr-obp/obd/random/all/all.csv", index_col=0)
csv["pos0"] = csv["position"] - 1
ctr_csv = csv.groupby("pos0")["click"].mean().rename("ctr_csv")

# OBP loader (Random/all)
ds = OpenBanditDataset(behavior_policy="random", campaign="all")
bf = ds.obtain_batch_bandit_feedback()
ctr_bf = (pd.DataFrame({"pos0": bf["position"], "click": bf["reward"]})
          .groupby("pos0")["click"].mean().rename("ctr_bf"))

print(pd.concat([ctr_csv, ctr_bf], axis=1))

INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


       ctr_csv    ctr_bf
pos0                    
0     0.003913  0.003913
1     0.004103  0.004103
2     0.003368  0.003368
