In [1]:
# Import required libraries
import os
import pandas as pd
import numpy as np
from obp.dataset import OpenBanditDataset
import plotly.express as px
import plotly.graph_objects as go

print("✅ All libraries imported successfully!")

✅ All libraries imported successfully!


# Open Bandit Dataset - CTR Analysis

Comprehensive analysis of Click-Through Rates (CTR) for Random and BTS policies using both sample and full datasets.

## Core Functions

In [2]:
def load_data(behavior_policy="random", campaign="all", dataset_type="sample"):
    """
    Unified data loader that handles both sample (dataloader) and full (CSV) datasets.
    
    Args:
        behavior_policy: 'random' or 'bts'
        campaign: 'all', 'men', or 'women'
        dataset_type: 'sample' (uses OpenBanditDataset) or 'full' (uses CSV files)
    
    Returns:
        pd.DataFrame with columns: action, position, reward (and pscore if from CSV)
    
    Examples:
        >>> # Load sample dataset (10k) from OBP dataloader
        >>> df_sample = load_data('random', 'all', 'sample')
        
        >>> # Load full dataset from CSV files
        >>> df_full = load_data('bts', 'all', 'full')
    """
    if dataset_type == "sample":
        # Use OpenBanditDataset for sample data
        ds = OpenBanditDataset(behavior_policy=behavior_policy, campaign=campaign)
        bf = ds.obtain_batch_bandit_feedback()
        df = pd.DataFrame({
            "action": bf["action"],
            "position": bf["position"],
            "reward": bf["reward"]
        })
        if "pscore" in bf:
            df["pscore"] = bf["pscore"]
            
    elif dataset_type == "full":
        # Load from CSV files
        base_path = "zr-obp/full_dataset"
        csv_path = os.path.join(base_path, behavior_policy, campaign, f"{campaign}.csv")
        
        if not os.path.exists(csv_path):
            raise FileNotFoundError(
                f"Full dataset not found at: {csv_path}\n"
                f"Expected structure: zr-obp/full_dataset/{behavior_policy}/{campaign}/{campaign}.csv"
            )
        
        df = pd.read_csv(csv_path, index_col=0)
        
        # Standardize column names
        df = df.rename(columns={
            'item_id': 'action',
            'click': 'reward',
            'propensity_score': 'pscore'
        })
        
        # Ensure required columns exist
        required_cols = ['action', 'position', 'reward']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"CSV missing required columns: {missing_cols}")
    
    else:
        raise ValueError(f"dataset_type must be 'sample' or 'full', got: {dataset_type}")
    
    print(f"✅ Loaded {len(df):,} records from {dataset_type} dataset")
    print(f"   Policy: {behavior_policy.upper()}, Campaign: {campaign.upper()}")
    
    return df

In [3]:
# Example usage of load_data function
print("="*70)
print("LOAD_DATA FUNCTION EXAMPLES")
print("="*70)

# Example 1: Load sample dataset with random policy
print("\n1️⃣  Sample + Random Policy:")
df_sample_random = load_data(behavior_policy="random", campaign="all", dataset_type="sample")
print(f"   Shape: {df_sample_random.shape}")
print(f"   Columns: {list(df_sample_random.columns)}")

# Example 2: Load sample dataset with BTS policy
print("\n2️⃣  Sample + BTS Policy:")
df_sample_bts = load_data(behavior_policy="bts", campaign="all", dataset_type="sample")
print(f"   Shape: {df_sample_bts.shape}")

# Example 3: Load full dataset (if available)
print("\n3️⃣  Full + Random Policy:")
try:
    df_full_random = load_data(behavior_policy="random", campaign="all", dataset_type="full")
    print(f"   Shape: {df_full_random.shape}")
except FileNotFoundError as e:
    print(f"   ⚠️  {str(e)}")

# Example 4: Campaign-specific data
print("\n4️⃣  Sample + Men Campaign:")
df_men = load_data(behavior_policy="random", campaign="men", dataset_type="sample")
print(f"   Shape: {df_men.shape}")

print("\n" + "="*70)
print("✅ All examples completed!")
print("="*70)

INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.
INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


LOAD_DATA FUNCTION EXAMPLES

1️⃣  Sample + Random Policy:
✅ Loaded 10,000 records from sample dataset
   Policy: RANDOM, Campaign: ALL
   Shape: (10000, 4)
   Columns: ['action', 'position', 'reward', 'pscore']

2️⃣  Sample + BTS Policy:
✅ Loaded 10,000 records from sample dataset
   Policy: BTS, Campaign: ALL
   Shape: (10000, 4)

3️⃣  Full + Random Policy:


INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


✅ Loaded 1,374,327 records from full dataset
   Policy: RANDOM, Campaign: ALL
   Shape: (1374327, 89)

4️⃣  Sample + Men Campaign:
✅ Loaded 10,000 records from sample dataset
   Policy: RANDOM, Campaign: MEN
   Shape: (10000, 4)

✅ All examples completed!


In [4]:
def compute_ctr(df):
    """Compute CTR from a dataframe with reward column."""
    return df['reward'].mean()

def run_validation_tests():
    """
    Run comprehensive validation tests against paper statistics (Table 1).
    Tests both data volumes and CTR values for all policy x campaign combinations.
    """
    
    # Expected statistics from Table 1 (paper)
    expected_stats = {
        ('random', 'all'): {'n_data': 1_374_327, 'ctr': 0.0035, 'relative_ctr': 1.00},
        ('bts', 'all'): {'n_data': 12_168_084, 'ctr': 0.0050, 'relative_ctr': 1.43},
        ('random', 'men'): {'n_data': 452_949, 'ctr': 0.0051, 'relative_ctr': 1.48},
        ('bts', 'men'): {'n_data': 4_077_727, 'ctr': 0.0067, 'relative_ctr': 1.94},
        ('random', 'women'): {'n_data': 864_585, 'ctr': 0.0048, 'relative_ctr': 1.39},
        ('bts', 'women'): {'n_data': 7_765_497, 'ctr': 0.0064, 'relative_ctr': 1.84},
    }
    
    results = []
    print("="*90)
    print("VALIDATION TESTS: Full Dataset vs. Paper Statistics (Table 1)")
    print("="*90)
    
    for (policy, campaign), expected in expected_stats.items():
        test_name = f"{policy.upper()} - {campaign.upper()}"
        print(f"\n🧪 Testing: {test_name}")
        
        try:
            # Load data
            df = load_data(behavior_policy=policy, campaign=campaign, dataset_type='full')
            
            # Test 1: Data volume
            actual_records = len(df)
            expected_records = expected['n_data']
            volume_match = actual_records == expected_records
            volume_diff = actual_records - expected_records
            volume_pct_diff = (volume_diff / expected_records) * 100 if expected_records > 0 else 0
            
            # Test 2: CTR
            actual_ctr = compute_ctr(df)
            expected_ctr = expected['ctr']
            ctr_tolerance = 0.0001  # ±0.01 percentage points
            ctr_match = abs(actual_ctr - expected_ctr) < ctr_tolerance
            ctr_diff = actual_ctr - expected_ctr
            
            # Test 3: Check required columns
            required_cols = ['action', 'position', 'reward']
            has_required_cols = all(col in df.columns for col in required_cols)
            
            # Store results
            result = {
                'Policy': policy.upper(),
                'Campaign': campaign.upper(),
                'Expected #Data': f"{expected_records:,}",
                'Actual #Data': f"{actual_records:,}",
                'Volume Match': '✅' if volume_match else f'⚠️ ({volume_diff:+,}, {volume_pct_diff:+.2f}%)',
                'Expected CTR': f"{expected_ctr:.4f} ({expected_ctr*100:.2f}%)",
                'Actual CTR': f"{actual_ctr:.4f} ({actual_ctr*100:.2f}%)",
                'CTR Match': '✅' if ctr_match else f'⚠️ ({ctr_diff:+.4f})',
                'Columns': '✅' if has_required_cols else '❌',
                'Status': '✅ PASS' if (volume_match and ctr_match and has_required_cols) else '⚠️ PARTIAL'
            }
            results.append(result)
            
            # Print summary
            print(f"   Records: {actual_records:,} (Expected: {expected_records:,}) {result['Volume Match']}")
            print(f"   CTR: {actual_ctr:.4f} (Expected: {expected_ctr:.4f}) {result['CTR Match']}")
            print(f"   Status: {result['Status']}")
            
        except FileNotFoundError as e:
            print(f"   ❌ SKIP: Dataset not found")
            result = {
                'Policy': policy.upper(),
                'Campaign': campaign.upper(),
                'Status': '❌ SKIP (File not found)'
            }
            results.append(result)
        except Exception as e:
            print(f"   ❌ ERROR: {str(e)}")
            result = {
                'Policy': policy.upper(),
                'Campaign': campaign.upper(),
                'Status': f'❌ ERROR: {str(e)[:50]}'
            }
            results.append(result)
    
    # Summary table
    print("\n" + "="*90)
    print("VALIDATION SUMMARY")
    print("="*90)
    df_results = pd.DataFrame(results)
    display(df_results)
    
    # Final verdict
    passed = sum(1 for r in results if r.get('Status', '').startswith('✅'))
    total = len(results)
    
    print(f"\n{'='*90}")
    print(f"FINAL RESULT: {passed}/{total} tests passed")
    print(f"{'='*90}")
    
    if passed == total:
        print("🎉 ALL TESTS PASSED! Dataset perfectly matches paper statistics.")
    elif passed > 0:
        print("⚠️  PARTIAL SUCCESS: Some tests passed, review details above.")
    else:
        print("❌ ALL TESTS FAILED: Check data files and paths.")
    
    return df_results

# Run the validation tests
test_results = run_validation_tests()

VALIDATION TESTS: Full Dataset vs. Paper Statistics (Table 1)

🧪 Testing: RANDOM - ALL
✅ Loaded 1,374,327 records from full dataset
   Policy: RANDOM, Campaign: ALL
   Records: 1,374,327 (Expected: 1,374,327) ✅
   CTR: 0.0035 (Expected: 0.0035) ✅
   Status: ✅ PASS

🧪 Testing: BTS - ALL
✅ Loaded 1,374,327 records from full dataset
   Policy: RANDOM, Campaign: ALL
   Records: 1,374,327 (Expected: 1,374,327) ✅
   CTR: 0.0035 (Expected: 0.0035) ✅
   Status: ✅ PASS

🧪 Testing: BTS - ALL
✅ Loaded 12,357,200 records from full dataset
   Policy: BTS, Campaign: ALL
   Records: 12,357,200 (Expected: 12,168,084) ⚠️ (+189,116, +1.55%)
   CTR: 0.0050 (Expected: 0.0050) ✅
   Status: ⚠️ PARTIAL

🧪 Testing: RANDOM - MEN
✅ Loaded 12,357,200 records from full dataset
   Policy: BTS, Campaign: ALL
   Records: 12,357,200 (Expected: 12,168,084) ⚠️ (+189,116, +1.55%)
   CTR: 0.0050 (Expected: 0.0050) ✅
   Status: ⚠️ PARTIAL

🧪 Testing: RANDOM - MEN
✅ Loaded 452,949 records from full dataset
   Policy: RANDO

Unnamed: 0,Policy,Campaign,Expected #Data,Actual #Data,Volume Match,Expected CTR,Actual CTR,CTR Match,Columns,Status
0,RANDOM,ALL,1374327,1374327,✅,0.0035 (0.35%),0.0035 (0.35%),✅,✅,✅ PASS
1,BTS,ALL,12168084,12357200,"⚠️ (+189,116, +1.55%)",0.0050 (0.50%),0.0050 (0.50%),✅,✅,⚠️ PARTIAL
2,RANDOM,MEN,452949,452949,✅,0.0051 (0.51%),0.0051 (0.51%),✅,✅,✅ PASS
3,BTS,MEN,4077727,4077727,✅,0.0067 (0.67%),0.0067 (0.67%),✅,✅,✅ PASS
4,RANDOM,WOMEN,864585,864585,✅,0.0048 (0.48%),0.0048 (0.48%),✅,✅,✅ PASS
5,BTS,WOMEN,7765497,7765497,✅,0.0064 (0.64%),0.0064 (0.64%),✅,✅,✅ PASS



FINAL RESULT: 5/6 tests passed
⚠️  PARTIAL SUCCESS: Some tests passed, review details above.


In [5]:
# Display compact summary of test results
print("="*90)
print("TEST RESULTS SUMMARY")
print("="*90)

if 'test_results' in locals():
    # Show key columns only
    summary_cols = ['Policy', 'Campaign', 'Expected #Data', 'Actual #Data', 
                    'Volume Match', 'Expected CTR', 'Actual CTR', 'CTR Match', 'Status']
    
    if all(col in test_results.columns for col in summary_cols):
        display(test_results[summary_cols])
    else:
        display(test_results)
    
    # Count results
    passed = len(test_results[test_results['Status'].str.startswith('✅', na=False)])
    skipped = len(test_results[test_results['Status'].str.contains('SKIP', na=False)])
    failed = len(test_results) - passed - skipped
    
    print(f"\n📊 Results: {passed} Passed | {skipped} Skipped | {failed} Failed")
    
    # Show any issues
    issues = test_results[~test_results['Status'].str.startswith('✅', na=False)]
    if not issues.empty:
        print(f"\n⚠️  Issues found in: {', '.join(issues['Policy'] + ' - ' + issues['Campaign'])}")
else:
    print("⚠️  No test results available. Run the validation tests first.")
    
print("="*90)

TEST RESULTS SUMMARY


Unnamed: 0,Policy,Campaign,Expected #Data,Actual #Data,Volume Match,Expected CTR,Actual CTR,CTR Match,Status
0,RANDOM,ALL,1374327,1374327,✅,0.0035 (0.35%),0.0035 (0.35%),✅,✅ PASS
1,BTS,ALL,12168084,12357200,"⚠️ (+189,116, +1.55%)",0.0050 (0.50%),0.0050 (0.50%),✅,⚠️ PARTIAL
2,RANDOM,MEN,452949,452949,✅,0.0051 (0.51%),0.0051 (0.51%),✅,✅ PASS
3,BTS,MEN,4077727,4077727,✅,0.0067 (0.67%),0.0067 (0.67%),✅,✅ PASS
4,RANDOM,WOMEN,864585,864585,✅,0.0048 (0.48%),0.0048 (0.48%),✅,✅ PASS
5,BTS,WOMEN,7765497,7765497,✅,0.0064 (0.64%),0.0064 (0.64%),✅,✅ PASS



📊 Results: 5 Passed | 0 Skipped | 1 Failed

⚠️  Issues found in: BTS - ALL


In [6]:
# Investigate the BTS - ALL issue
print("="*90)
print("DIAGNOSTIC: Investigating BTS - ALL Dataset Issue")
print("="*90)

try:
    # Try to load BTS ALL dataset
    df_bts_all = load_data(behavior_policy='bts', campaign='all', dataset_type='full')
    
    print(f"\n✅ Dataset loaded successfully!")
    print(f"   Records: {len(df_bts_all):,}")
    print(f"   Columns: {list(df_bts_all.columns)}")
    print(f"   CTR: {df_bts_all['reward'].mean():.6f} ({df_bts_all['reward'].mean()*100:.3f}%)")
    
    # Compare with expected
    expected = 12_168_084
    actual = len(df_bts_all)
    diff = actual - expected
    diff_pct = (diff / expected) * 100
    
    print(f"\n📊 Volume Comparison:")
    print(f"   Expected: {expected:,}")
    print(f"   Actual:   {actual:,}")
    print(f"   Diff:     {diff:+,} ({diff_pct:+.2f}%)")
    
    expected_ctr = 0.0050
    actual_ctr = df_bts_all['reward'].mean()
    ctr_diff = actual_ctr - expected_ctr
    
    print(f"\n📊 CTR Comparison:")
    print(f"   Expected: {expected_ctr:.4f} ({expected_ctr*100:.2f}%)")
    print(f"   Actual:   {actual_ctr:.4f} ({actual_ctr*100:.2f}%)")
    print(f"   Diff:     {ctr_diff:+.6f} ({(ctr_diff/expected_ctr)*100:+.2f}%)")
    
    # Check data quality
    print(f"\n🔍 Data Quality Checks:")
    print(f"   Missing values: {df_bts_all.isnull().sum().sum()}")
    print(f"   Duplicate rows: {df_bts_all.duplicated().sum()}")
    print(f"   Reward distribution: {df_bts_all['reward'].value_counts().to_dict()}")
    
    # Sample data
    print(f"\n📋 Sample Data (first 5 rows):")
    display(df_bts_all.head())
    
except FileNotFoundError as e:
    print(f"❌ File not found: {e}")
except Exception as e:
    print(f"❌ Error: {e}")
    import traceback
    traceback.print_exc()

print("="*90)

DIAGNOSTIC: Investigating BTS - ALL Dataset Issue
✅ Loaded 12,357,200 records from full dataset
   Policy: BTS, Campaign: ALL

✅ Dataset loaded successfully!
   Records: 12,357,200
   Columns: ['timestamp', 'action', 'position', 'reward', 'pscore', 'user_feature_0', 'user_feature_1', 'user_feature_2', 'user_feature_3', 'user-item_affinity_0', 'user-item_affinity_1', 'user-item_affinity_2', 'user-item_affinity_3', 'user-item_affinity_4', 'user-item_affinity_5', 'user-item_affinity_6', 'user-item_affinity_7', 'user-item_affinity_8', 'user-item_affinity_9', 'user-item_affinity_10', 'user-item_affinity_11', 'user-item_affinity_12', 'user-item_affinity_13', 'user-item_affinity_14', 'user-item_affinity_15', 'user-item_affinity_16', 'user-item_affinity_17', 'user-item_affinity_18', 'user-item_affinity_19', 'user-item_affinity_20', 'user-item_affinity_21', 'user-item_affinity_22', 'user-item_affinity_23', 'user-item_affinity_24', 'user-item_affinity_25', 'user-item_affinity_26', 'user-item_aff

Unnamed: 0,timestamp,action,position,reward,pscore,user_feature_0,user_feature_1,user_feature_2,user_feature_3,user-item_affinity_0,...,user-item_affinity_70,user-item_affinity_71,user-item_affinity_72,user-item_affinity_73,user-item_affinity_74,user-item_affinity_75,user-item_affinity_76,user-item_affinity_77,user-item_affinity_78,user-item_affinity_79
0,2019-11-24 00:00:00.047846+00:00,18,2,0,0.0613,8ca3f3c62ebdd4b5e311d475c6d5ca2f,e07fc4436e7c7833115dbfc8ff770109,1e58eb4fb1a017cf89826db60ffb7d67,fe64471cf5b19278c62ce48447f9264d,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2019-11-24 00:00:00.049952+00:00,61,1,0,0.104925,8ca3f3c62ebdd4b5e311d475c6d5ca2f,e07fc4436e7c7833115dbfc8ff770109,1e58eb4fb1a017cf89826db60ffb7d67,fe64471cf5b19278c62ce48447f9264d,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2019-11-24 00:00:00.052473+00:00,51,3,0,0.04862,8ca3f3c62ebdd4b5e311d475c6d5ca2f,e07fc4436e7c7833115dbfc8ff770109,1e58eb4fb1a017cf89826db60ffb7d67,fe64471cf5b19278c62ce48447f9264d,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2019-11-24 00:00:00.503685+00:00,52,1,0,0.04298,8ca3f3c62ebdd4b5e311d475c6d5ca2f,e07fc4436e7c7833115dbfc8ff770109,1e58eb4fb1a017cf89826db60ffb7d67,fe64471cf5b19278c62ce48447f9264d,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2019-11-24 00:00:00.505885+00:00,35,2,0,0.00589,8ca3f3c62ebdd4b5e311d475c6d5ca2f,e07fc4436e7c7833115dbfc8ff770109,1e58eb4fb1a017cf89826db60ffb7d67,fe64471cf5b19278c62ce48447f9264d,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0




## Unit Tests - Validate Against Paper Statistics

Validate that full datasets match the statistics from Table 1 of the Open Bandit Dataset paper.

In [8]:
# Example: Use the module to load data
print("="*80)
print("DATA LOADER MODULE - QUICK DEMO")
print("="*80)

# Example 1: Load sample data
print("\n1️⃣  Load sample data using module:")
df_sample = load_data_module('random', 'all', 'sample')
ctr_sample = compute_ctr_module(df_sample)
print(f"   CTR: {ctr_sample:.4f} ({ctr_sample*100:.2f}%)")

# Example 2: Get comprehensive stats
print("\n2️⃣  Get comprehensive statistics:")
stats = get_dataset_stats(df_sample)
print(f"   Records: {stats['n_records']:,}")
print(f"   Clicks: {stats['n_clicks']:,}")
print(f"   Unique Actions: {stats['n_unique_actions']}")
print(f"   Unique Positions: {stats['n_positions']}")

# Example 3: Load all campaigns
print("\n3️⃣  Load all campaigns for BTS policy:")
campaigns = load_all_campaigns('bts', 'sample')
for name, df in campaigns.items():
    if df is not None:
        ctr = compute_ctr_module(df)
        print(f"   {name.upper():6s}: {len(df):,} records, CTR = {ctr:.4f}")

# Example 4: Load all policies
print("\n4️⃣  Load both policies for ALL campaign:")
policies = load_all_policies('all', 'sample')
for name, df in policies.items():
    if df is not None:
        ctr = compute_ctr_module(df)
        print(f"   {name.upper():6s}: {len(df):,} records, CTR = {ctr:.4f}")

print("\n" + "="*80)
print("✅ Module demo completed!")
print("="*80)

INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.
INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.
INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.
INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


DATA LOADER MODULE - QUICK DEMO

1️⃣  Load sample data using module:
✅ Loaded 10,000 records from sample dataset
   Policy: RANDOM, Campaign: ALL
   CTR: 0.0038 (0.38%)

2️⃣  Get comprehensive statistics:
   Records: 10,000
   Clicks: 38
   Unique Actions: 80
   Unique Positions: 3

3️⃣  Load all campaigns for BTS policy:
✅ Loaded 10,000 records from sample dataset
   Policy: BTS, Campaign: ALL
✅ Loaded 10,000 records from sample dataset
   Policy: BTS, Campaign: MEN
✅ Loaded 10,000 records from sample dataset
   Policy: BTS, Campaign: WOMEN
   ALL   : 10,000 records, CTR = 0.0042
   MEN   : 10,000 records, CTR = 0.0069
   WOMEN : 10,000 records, CTR = 0.0046

4️⃣  Load both policies for ALL campaign:


INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.
INFO:obp.dataset.real:When `data_path` is not given, this class downloads the small-sized version of Open Bandit Dataset.


✅ Loaded 10,000 records from sample dataset
   Policy: RANDOM, Campaign: ALL
✅ Loaded 10,000 records from sample dataset
   Policy: BTS, Campaign: ALL
   RANDOM: 10,000 records, CTR = 0.0038
   BTS   : 10,000 records, CTR = 0.0042

✅ Module demo completed!


## 📦 Data Loader Module Summary

The `data_loader.py` module provides a clean, production-ready interface for loading the Open Bandit Dataset.

### ✅ Key Features:
- **Unified API** - One function for sample and full datasets
- **All Combinations** - 2 policies × 3 campaigns × 2 dataset types = 12 configs
- **Helper Functions** - compute_ctr(), get_dataset_stats(), bulk loaders
- **Parameter Validation** - Clear error messages
- **Auto Column Mapping** - Consistent column names across sources
- **Well Documented** - Comprehensive docstrings and examples

### 📊 Supported Combinations:
- **Policies**: `random`, `bts`
- **Campaigns**: `all`, `men`, `women`
- **Datasets**: `sample` (10k), `full` (CSV)

### 🚀 Usage:
```python
from data_loader import load_data, compute_ctr

# Load any combination
df = load_data('bts', 'men', 'full')
ctr = compute_ctr(df)
```

### 📁 Files Created:
- `data_loader.py` - Main module
- `data_loader_README.md` - Complete documentation
- `data_loader_examples.py` - 9 usage examples
- `DATA_LOADER_SUMMARY.md` - This summary

In [7]:
# Import from the data_loader module
from data_loader import (
    load_data as load_data_module,
    compute_ctr as compute_ctr_module,
    get_dataset_stats,
    load_all_campaigns,
    load_all_policies
)

print("✅ Successfully imported data_loader module!")
print("\nAvailable functions:")
print("  - load_data(behavior_policy, campaign, dataset_type)")
print("  - compute_ctr(df)")
print("  - get_dataset_stats(df)")
print("  - load_all_campaigns(behavior_policy, dataset_type)")
print("  - load_all_policies(campaign, dataset_type)")

✅ Successfully imported data_loader module!

Available functions:
  - load_data(behavior_policy, campaign, dataset_type)
  - compute_ctr(df)
  - get_dataset_stats(df)
  - load_all_campaigns(behavior_policy, dataset_type)
  - load_all_policies(campaign, dataset_type)


## Using the Data Loader Module

The `data_loader.py` module provides a clean, reusable interface for loading the Open Bandit Dataset.