In [52]:
# notebooks/01_quickstart.ipynb
from obp.dataset import OpenBanditDataset
from obp.policy import BernoulliTS
from obp.ope import OffPolicyEvaluation, InverseProbabilityWeighting as IPW
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### Load the logs from Zozotown
For each visitor, the site showed 3 random items out of thousands (note that behavior policy selected is random)


In [53]:
ds = OpenBanditDataset(behavior_policy="random", campaign="all")

bf = ds.obtain_batch_bandit_feedback()

df = pd.DataFrame({
    "action": bf["action"],
    "position": bf["position"],
    "reward": bf["reward"],
    "pscore": bf["pscore"],
})
df

INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


Unnamed: 0,action,position,reward,pscore
0,14,2,0,0.0125
1,14,2,0,0.0125
2,27,2,0,0.0125
3,48,1,0,0.0125
4,36,1,0,0.0125
...,...,...,...,...
9995,2,1,0,0.0125
9996,45,1,0,0.0125
9997,32,2,0,0.0125
9998,13,1,0,0.0125


In [54]:
# # Calculate average reward per action per position and pivot to wide format
# avg_reward_per_action_position = df.groupby(['position', 'action'])['reward'].mean().reset_index()

# # Pivot to have one row per action with separate columns for each position
# avg_reward_pivot = avg_reward_per_action_position.pivot(index='action', columns='position', values='reward')

# # Rename columns to have descriptive names
# avg_reward_pivot.columns = [f'avg_reward_position_{int(pos)}' for pos in avg_reward_pivot.columns]

# # Reset index to make action a regular column
# avg_reward_pivot = avg_reward_pivot.reset_index()

# # Fill NaN values with 0 or keep as NaN (depending on your preference)
# # avg_reward_pivot = avg_reward_pivot.fillna(0)  # Uncomment if you want to fill NaN with 0

# avg_reward_pivot['source'] = 'dataloader/random/all'

# avg_reward_pivot.to_csv('empirical_ctr_dataloader_random_all.csv')

# display(avg_reward_pivot.style.bar(align='mid', color=['red', 'lightgreen']))

In [62]:
def calculate_empirical_ctr_by_position(behavior_policy="random", campaign="all", 
                                        fill_nan=False, save_csv=True, display_result=True,
                                        include_counts=True):
    """
    Calculate empirical CTR by action and position for a given behavior policy.
    
    Parameters:
    - behavior_policy: str, behavior policy to analyze (e.g., "random", "bts")
    - campaign: str, campaign to analyze (e.g., "all", "men", "women")
    - fill_nan: bool, whether to fill NaN values with 0
    - save_csv: bool, whether to save results to CSV
    - display_result: bool, whether to display the styled results
    - include_counts: bool, whether to include numerator and denominator columns
    
    Returns:
    - pandas.DataFrame: Pivoted CTR data with one row per action, including counts if requested
    """
    
    # Load dataset
    ds = OpenBanditDataset(behavior_policy=behavior_policy, campaign=campaign)
    bf = ds.obtain_batch_bandit_feedback()
    
    # Create dataframe
    df = pd.DataFrame({
        "action": bf["action"],
        "position": bf["position"],
        "reward": bf["reward"],
        "pscore": bf["pscore"],
    })
    
    print(f"Loaded {behavior_policy}/{campaign} dataset:")
    print(f"  - Shape: {df.shape}")
    print(f"  - Unique actions: {df['action'].nunique()}")
    print(f"  - Overall CTR: {df['reward'].mean():.4f}")
    
    # Calculate statistics per action per position
    stats_per_action_position = df.groupby(['position', 'action'])['reward'].agg([
        'mean',  # Average (CTR)
        'sum',   # Numerator (total clicks)
        'count'  # Denominator (total impressions)
    ]).reset_index()
    
    # Rename columns for clarity
    stats_per_action_position.columns = ['position', 'action', 'avg_reward', 'sum_reward', 'count_reward']
    
    # Pivot average rewards to wide format
    avg_reward_pivot = stats_per_action_position.pivot(index='action', columns='position', values='avg_reward')
    avg_reward_pivot.columns = [f'avg_reward_position_{int(pos)}' for pos in avg_reward_pivot.columns]
    avg_reward_pivot = avg_reward_pivot.reset_index()
    
    if include_counts:
        # Pivot sum (numerator) to wide format
        sum_reward_pivot = stats_per_action_position.pivot(index='action', columns='position', values='sum_reward')
        sum_reward_pivot.columns = [f'sum_reward_position_{int(pos)}' for pos in sum_reward_pivot.columns]
        sum_reward_pivot = sum_reward_pivot.reset_index()
        
        # Pivot count (denominator) to wide format  
        count_reward_pivot = stats_per_action_position.pivot(index='action', columns='position', values='count_reward')
        count_reward_pivot.columns = [f'count_position_{int(pos)}' for pos in count_reward_pivot.columns]
        count_reward_pivot = count_reward_pivot.reset_index()
        
        # Merge all pivot tables
        result_df = avg_reward_pivot.merge(sum_reward_pivot, on='action', how='outer')
        result_df = result_df.merge(count_reward_pivot, on='action', how='outer')
        
        # Convert sum and count columns to integers (handling NaN values)
        sum_cols = [col for col in result_df.columns if col.startswith('sum_reward_position_')]
        count_cols = [col for col in result_df.columns if col.startswith('count_position_')]
        
        for col in sum_cols + count_cols:
            result_df[col] = result_df[col].fillna(0).astype(int)
    else:
        result_df = avg_reward_pivot
    
    # Fill NaN values if requested
    if fill_nan:
        result_df = result_df.fillna(0)
    
    # Add source information
    result_df['source'] = f'dataloader/{behavior_policy}/{campaign}'
    
    # Save to CSV if requested
    if save_csv:
        filename = f'empirical_ctr_dataloader_{behavior_policy}_{campaign}.csv'
        result_df.to_csv(filename, index=False)
        print(f"Saved results to: {filename}")
    
    # Display styled results if requested
    if display_result:
        print(f"\nEmpirical CTR by Action and Position ({behavior_policy}/{campaign}):")
        # Only style the average reward columns for the bar chart
        avg_cols = [col for col in result_df.columns if col.startswith('avg_reward_position_')]
        if avg_cols:
            display(result_df.style.bar(subset=avg_cols, align='mid', color=['red', 'lightgreen']))
        else:
            display(result_df)
    
    return result_df

# Example usage with random policy (your original code) - now with counts
random_ctr = calculate_empirical_ctr_by_position(
    behavior_policy="random", 
    campaign="all",
    fill_nan=False,
    save_csv=True,
    display_result=False,
    include_counts=True  # Now includes numerator and denominator
)


INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


Loaded random/all dataset:
  - Shape: (10000, 4)
  - Unique actions: 80
  - Overall CTR: 0.0038
Saved results to: empirical_ctr_dataloader_random_all.csv


In [65]:
bts_ctr = calculate_empirical_ctr_by_position(
    behavior_policy="bts", 
    campaign="all",
    fill_nan=False,
    save_csv=True,
    display_result=False,
    include_counts=True  # Now includes numerator and denominator
)

INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


Loaded bts/all dataset:
  - Shape: (10000, 4)
  - Unique actions: 80
  - Overall CTR: 0.0042
Saved results to: empirical_ctr_dataloader_bts_all.csv


In [70]:
def compute_ctr_lift(baseline_ctr_df, treatment_ctr_df, baseline_name="Baseline", treatment_name="Treatment"):
    """
    Compute CTR lift between two policies.
    
    Parameters:
    - baseline_ctr_df: DataFrame with CTR data for baseline policy (e.g., random)
    - treatment_ctr_df: DataFrame with CTR data for treatment policy (e.g., BTS)
    - baseline_name: Name of baseline policy for display
    - treatment_name: Name of treatment policy for display
    
    Returns:
    - dict: Dictionary with lift metrics
    """
    
    # Find available positions dynamically
    baseline_positions = set()
    treatment_positions = set()
    
    for col in baseline_ctr_df.columns:
        if col.startswith('sum_reward_position_'):
            pos = int(col.split('_')[-1])
            baseline_positions.add(pos)
    
    for col in treatment_ctr_df.columns:
        if col.startswith('sum_reward_position_'):
            pos = int(col.split('_')[-1])
            treatment_positions.add(pos)
    
    # Use intersection of available positions
    available_positions = sorted(baseline_positions.intersection(treatment_positions))
    
    baseline_total_clicks = 0
    baseline_total_impressions = 0
    treatment_total_clicks = 0
    treatment_total_impressions = 0
    
    # Calculate totals across all available positions
    for pos in available_positions:
        sum_col = f'sum_reward_position_{pos}'
        count_col = f'count_position_{pos}'
        
        if sum_col in baseline_ctr_df.columns and count_col in baseline_ctr_df.columns:
            baseline_total_clicks += baseline_ctr_df[sum_col].sum()
            baseline_total_impressions += baseline_ctr_df[count_col].sum()
            
        if sum_col in treatment_ctr_df.columns and count_col in treatment_ctr_df.columns:
            treatment_total_clicks += treatment_ctr_df[sum_col].sum()
            treatment_total_impressions += treatment_ctr_df[count_col].sum()
    
    # Calculate overall CTRs
    baseline_overall_ctr = baseline_total_clicks / baseline_total_impressions if baseline_total_impressions > 0 else 0
    treatment_overall_ctr = treatment_total_clicks / treatment_total_impressions if treatment_total_impressions > 0 else 0
    
    # Calculate lift metrics
    absolute_lift = treatment_overall_ctr - baseline_overall_ctr
    relative_lift = (treatment_overall_ctr / baseline_overall_ctr - 1) * 100 if baseline_overall_ctr > 0 else float('inf')
    
    results = {
        'baseline_name': baseline_name,
        'treatment_name': treatment_name,
        'baseline_ctr': baseline_overall_ctr,
        'treatment_ctr': treatment_overall_ctr,
        'baseline_clicks': baseline_total_clicks,
        'baseline_impressions': baseline_total_impressions,
        'treatment_clicks': treatment_total_clicks,
        'treatment_impressions': treatment_total_impressions,
        'absolute_lift': absolute_lift,
        'relative_lift_percent': relative_lift,
        'available_positions': available_positions
    }
    
    # Print results
    print(f"\n{'='*60}")
    print(f"CTR LIFT ANALYSIS: {treatment_name} vs {baseline_name}")
    print(f"{'='*60}")
    print(f"Positions analyzed: {available_positions}")
    print(f"{baseline_name:>12} CTR: {baseline_overall_ctr:.4f} ({baseline_total_clicks:,} clicks / {baseline_total_impressions:,} impressions)")
    print(f"{treatment_name:>12} CTR: {treatment_overall_ctr:.4f} ({treatment_total_clicks:,} clicks / {treatment_total_impressions:,} impressions)")
    print(f"{'Absolute Lift':>12}: {absolute_lift:+.4f}")
    print(f"{'Relative Lift':>12}: {relative_lift:+.2f}%")
    
    if relative_lift > 0:
        print(f"\n✅ {treatment_name} performs {relative_lift:.2f}% better than {baseline_name}")
    elif relative_lift < 0:
        print(f"\n❌ {treatment_name} performs {abs(relative_lift):.2f}% worse than {baseline_name}")
    else:
        print(f"\n➖ {treatment_name} and {baseline_name} have identical performance")
    
    return results

# Compute lift between Random and BTS
lift_results = compute_ctr_lift(
    baseline_ctr_df=random_ctr,
    treatment_ctr_df=bts_ctr,
    baseline_name="Random",
    treatment_name="BTS"
)


CTR LIFT ANALYSIS: BTS vs Random
Positions analyzed: [0, 1, 2]
      Random CTR: 0.0038 (38 clicks / 10,000 impressions)
         BTS CTR: 0.0042 (42 clicks / 10,000 impressions)
Absolute Lift: +0.0004
Relative Lift: +10.53%

✅ BTS performs 10.53% better than Random


In [None]:
def add_confidence_intervals(ctr_df, confidence_level=0.95):
    """
    Add confidence intervals to CTR dataframe using Wilson score interval.
    
    Parameters:
    - ctr_df: DataFrame returned by calculate_empirical_ctr_by_position with include_counts=True
    - confidence_level: float, confidence level (e.g., 0.95 for 95% CI)
    
    Returns:
    - pandas.DataFrame: Original dataframe with added confidence interval columns
    """
    import numpy as np
    from scipy import stats
    
    result_df = ctr_df.copy()
    
    # Get z-score for confidence level
    alpha = 1 - confidence_level
    z = stats.norm.ppf(1 - alpha/2)
    
    # Find position columns
    positions = []
    for col in ctr_df.columns:
        if col.startswith('avg_reward_position_'):
            pos = col.split('_')[-1]
            positions.append(pos)
    
    for pos in positions:
        avg_col = f'avg_reward_position_{pos}'
        count_col = f'count_position_{pos}'
        sum_col = f'sum_reward_position_{pos}'
        
        if all(col in ctr_df.columns for col in [avg_col, count_col, sum_col]):
            # Extract values
            p = ctr_df[avg_col]  # proportion (CTR)
            n = ctr_df[count_col]  # sample size
            
            # Wilson score interval (more accurate for small samples)
            # Handle cases where n is 0 or NaN
            mask = (n > 0) & (~pd.isna(n)) & (~pd.isna(p))
            
            # Initialize CI columns with NaN
            ci_lower = pd.Series(np.nan, index=ctr_df.index)
            ci_upper = pd.Series(np.nan, index=ctr_df.index)
            
            # Calculate CI only for valid observations
            if mask.any():
                p_valid = p[mask]
                n_valid = n[mask]
                
                # Wilson score interval formula
                denominator = 1 + (z**2 / n_valid)
                center = (p_valid + (z**2 / (2 * n_valid))) / denominator
                margin = (z / denominator) * np.sqrt((p_valid * (1 - p_valid) / n_valid) + (z**2 / (4 * n_valid**2)))
                
                ci_lower[mask] = center - margin
                ci_upper[mask] = center + margin
            
            # Add CI columns to result
            result_df[f'ci_lower_position_{pos}'] = ci_lower
            result_df[f'ci_upper_position_{pos}'] = ci_upper
            result_df[f'ci_width_position_{pos}'] = ci_upper - ci_lower
    
    return result_df

# Example usage function
def analyze_ctr_with_confidence_intervals(behavior_policy="random", campaign="all", confidence_level=0.95):
    """
    Convenience function to get CTR data with confidence intervals.
    """
    # Get CTR data with counts
    ctr_data = calculate_empirical_ctr_by_position(
        behavior_policy=behavior_policy,
        campaign=campaign,
        include_counts=True,
        display_result=False
    )
    
    # Add confidence intervals
    ctr_with_ci = add_confidence_intervals(ctr_data, confidence_level=confidence_level)
    
    print(f"\nCTR Analysis with {confidence_level*100:.0f}% Confidence Intervals:")
    print(f"Policy: {behavior_policy}, Campaign: {campaign}")
    
    # Display summary
    positions = [col.split('_')[-1] for col in ctr_data.columns if col.startswith('avg_reward_position_')]
    for pos in positions:
        avg_col = f'avg_reward_position_{pos}'
        ci_lower_col = f'ci_lower_position_{pos}'
        ci_upper_col = f'ci_upper_position_{pos}'
        count_col = f'count_position_{pos}'
        
        valid_data = ctr_with_ci[~pd.isna(ctr_with_ci[avg_col])]
        if not valid_data.empty:
            avg_ctr = valid_data[avg_col].mean()
            avg_count = valid_data[count_col].mean()
            avg_ci_width = valid_data[f'ci_width_position_{pos}'].mean()
            
            print(f"  Position {pos}: Avg CTR = {avg_ctr:.4f}, Avg Count = {avg_count:.1f}, Avg CI Width = {avg_ci_width:.4f}")
    
    return ctr_with_ci

In [None]:
# Now you can easily analyze different behavior policies:

# Bernoulli Thompson Sampling - with counts for confidence intervals
bts_ctr = calculate_empirical_ctr_by_position(
    behavior_policy="bts", 
    campaign="all",
    fill_nan=False,
    save_csv=True,
    display_result=True,
    include_counts=True
)

In [None]:
# Example: Get CTR data with confidence intervals
random_ctr_with_ci = analyze_ctr_with_confidence_intervals("random", "all", confidence_level=0.95)

# Display a few rows to see the structure
print("\nSample of CTR data with confidence intervals:")
display(random_ctr_with_ci.head())

# You can also manually add confidence intervals to existing data
if 'count_position_1' in bts_ctr.columns:
    bts_ctr_with_ci = add_confidence_intervals(bts_ctr, confidence_level=0.95)
    print(f"\nBTS CTR data now has confidence intervals. Shape: {bts_ctr_with_ci.shape}")
else:
    print("\nNote: Make sure to set include_counts=True to get confidence intervals")

In [None]:
def compare_ctr_across_policies(policies=["random", "bts"], campaign="all"):
    """
    Compare CTR across different behavior policies.
    
    Parameters:
    - policies: list of behavior policies to compare
    - campaign: campaign to analyze
    
    Returns:
    - dict: Dictionary with CTR dataframes for each policy
    """
    
    results = {}
    
    print(f"Comparing CTR across policies: {', '.join(policies)}")
    print("="*60)
    
    for policy in policies:
        print(f"\n--- {policy.upper()} POLICY ---")
        try:
            ctr_data = calculate_empirical_ctr_by_position(
                behavior_policy=policy,
                campaign=campaign,
                display_result=False,
                save_csv=True
            )
            results[policy] = ctr_data
        except Exception as e:
            print(f"Error with {policy}: {e}")
            results[policy] = None
    
    # Summary comparison
    print(f"\n{'='*60}")
    print("SUMMARY COMPARISON")
    print(f"{'='*60}")
    
    for policy, data in results.items():
        if data is not None:
            overall_avg = data[['avg_reward_position_1', 'avg_reward_position_2', 'avg_reward_position_3']].mean(axis=1).mean()
            actions_with_data = data.dropna(subset=['avg_reward_position_1', 'avg_reward_position_2', 'avg_reward_position_3'], how='all').shape[0]
            print(f"{policy:>10}: Overall avg CTR = {overall_avg:.4f}, Actions with data = {actions_with_data}")
    
    return results

# Example usage:
policy_comparison = compare_ctr_across_policies(["random", "bts"])
policy_comparison

In [None]:
# Alternative: Ensure all 80 actions (0-79) are included even if they don't appear in data
avg_reward_per_action_position_complete = df.groupby(['position', 'action'])['reward'].mean().reset_index()

# Create a complete index of all action-position combinations
all_actions = range(ds.n_actions)  # Assuming ds.n_actions = 80
all_positions = df['position'].unique()

# Create a complete DataFrame with all combinations
import itertools
all_combinations = pd.DataFrame(
    list(itertools.product(all_positions, all_actions)), 
    columns=['position', 'action']
)

# Merge with actual data
complete_data = all_combinations.merge(
    avg_reward_per_action_position_complete, 
    on=['position', 'action'], 
    how='left'
)

# Pivot to wide format
avg_reward_pivot_complete = complete_data.pivot(index='action', columns='position', values='reward')

# Rename columns
avg_reward_pivot_complete.columns = [f'avg_reward_position_{int(pos)}' for pos in avg_reward_pivot_complete.columns]

# Reset index
avg_reward_pivot_complete = avg_reward_pivot_complete.reset_index()

print(f"Shape of complete pivot table: {avg_reward_pivot_complete.shape}")
print(f"Number of unique actions: {avg_reward_pivot_complete['action'].nunique()}")
display(avg_reward_pivot_complete.head(10))

In [None]:
# Calculate average reward per action, including all possible actions
avg_reward_per_action = df.groupby(['action'])['reward'].agg(['mean', 'count']).reset_index()
avg_reward_per_action.columns = ['action', 'avg_reward', 'num_observations']

# Create a complete range of all possible actions (0 to max action)
all_actions = pd.DataFrame({'action': range(ds.n_actions)})

# Merge to include all actions, even those not in the data
avg_reward_per_action_complete = all_actions.merge(avg_reward_per_action, on='action', how='left')

# Mark actions with no data as NaN (not 0)
avg_reward_per_action_complete['has_data'] = avg_reward_per_action_complete['num_observations'].notna()
avg_reward_per_action_complete['avg_reward'] = avg_reward_per_action_complete['avg_reward'].fillna(0)
avg_reward_per_action_complete['num_observations'] = avg_reward_per_action_complete['num_observations'].fillna(0).astype(int)

avg_reward_per_action_complete

In [None]:
import plotly.express as px

# Sort by action to ensure proper ordering
avg_reward_per_action_complete = avg_reward_per_action_complete.sort_values('action').reset_index(drop=True)

# Add a label column to differentiate between zero CTR and no data
avg_reward_per_action_complete['data_status'] = avg_reward_per_action_complete.apply(
    lambda row: 'No Data' if not row['has_data'] else ('Zero CTR' if row['avg_reward'] == 0 else 'Has CTR'),
    axis=1
)

# Plot average reward per action with color coding
fig = px.bar(avg_reward_per_action_complete, 
             x='action', 
             y='avg_reward',
             color='data_status',
             title='Average Reward per Action (All Actions Shown)',
             labels={'action': 'Action ID', 'avg_reward': 'Average Reward', 'data_status': 'Data Status'},
             height=500,
             color_discrete_map={'Has CTR': '#636EFA', 'Zero CTR': '#EF553B', 'No Data': '#CCCCCC'},
             hover_data=['num_observations'])
fig.update_layout(showlegend=True)
fig.update_xaxes(type='linear', dtick=1)  # Ensure actions are shown in numeric order
fig.show()

In [None]:
df.head()

In [None]:
counts_by_action_position = df.groupby('action')['position'].nunique().reset_index()
counts_by_action_position.columns = ['action', 'num_unique_positions']
counts_by_action_position[counts_by_action_position['num_unique_positions'] <3]

In [None]:
print("Average CTR:", bf["reward"].mean())
print("Unique items recommended:", len(set(bf["action"])))
print("Position counts:", pd.Series(bf["position"]).value_counts())

In [None]:

pos_ctr = pd.DataFrame({"pos": bf["position"], "click": bf["reward"]}).groupby("pos").mean()
pos_ctr.plot(kind="bar", legend=False, title="Click-through rate by slot")
plt.show()

In [None]:
pos_ctr

In [None]:

# evaluation policy distribution over actions per round (factorized by position)
pi_e = BernoulliTS(
    n_actions=ds.n_actions, len_list=ds.len_list,
    is_zozotown_prior=True, campaign="all", random_state=123
)
action_dist = pi_e.compute_batch_action_dist(
    n_sim=100_000, n_rounds=bf["n_rounds"]
)

ope = OffPolicyEvaluation(bandit_feedback=bf, ope_estimators=[IPW()])
est = ope.estimate_policy_values(action_dist=action_dist)
print("IPW estimate:", est["ipw"])
print("Logged avg reward:", bf["reward"].mean())
print("Relative (IPW/logged):", est["ipw"] / bf["reward"].mean())

### Making Sure the CTRs match using CSV vs OpenBanditDataset class 

In [None]:
# CSV (Random/all)
csv = pd.read_csv("zr-obp/obd/random/all/all.csv", index_col=0)
csv["pos0"] = csv["position"] - 1
ctr_csv = csv.groupby("pos0")["click"].mean().rename("ctr_csv")

# OBP loader (Random/all)
ds = OpenBanditDataset(behavior_policy="random", campaign="all")
bf = ds.obtain_batch_bandit_feedback()
ctr_bf = (pd.DataFrame({"pos0": bf["position"], "click": bf["reward"]})
          .groupby("pos0")["click"].mean().rename("ctr_bf"))

print(pd.concat([ctr_csv, ctr_bf], axis=1))