In [2]:
import pandas as pd
import numpy as np

In [3]:
def create_sample_data():
    """
    Create sample auction log data for demonstration.
    
    Returns:
    pandas.DataFrame: Sample auction logs
    """
    np.random.seed(42)
    n_records = 1000
    
    sample_data = {
        'auction_id': range(n_records),
        'advertiser_id': np.random.randint(1, 51, n_records),
        'bid_amount': np.random.gamma(2, 3, n_records) * 10,
        'won_auction': np.random.choice([True, False], n_records, p=[0.3, 0.7]),
        'category': np.random.choice(['Electronics', 'Fashion', 'Food', 'Travel'], n_records),
        'time_of_day': np.random.choice(['Morning', 'Afternoon', 'Evening', 'Night'], n_records)
    }
    
    return pd.DataFrame(sample_data)

In [4]:
sample_logs = create_sample_data()

In [5]:
sample_logs

Unnamed: 0,auction_id,advertiser_id,bid_amount,won_auction,category,time_of_day
0,0,39,29.705951,True,Electronics,Evening
1,1,29,15.375183,False,Travel,Night
2,2,15,57.044861,False,Food,Morning
3,3,43,94.048075,False,Food,Afternoon
4,4,8,49.660315,False,Food,Night
...,...,...,...,...,...,...
995,995,26,69.464953,False,Fashion,Evening
996,996,34,35.502099,True,Fashion,Morning
997,997,45,96.090825,False,Travel,Afternoon
998,998,6,37.051624,False,Travel,Morning


In [15]:
def create_auction_cpt(logs_df):
    """
    Creates a Conditional Probability Table matrix for auction outcomes with row totals.
    
    Parameters:
    logs_df (pandas.DataFrame): DataFrame containing auction logs with columns:
        - bid_amount: amount bid by advertiser
        - category: ad category
        - won_auction: boolean indicating if advertiser won auction
    
    Returns:
    pandas.DataFrame: CPT matrix showing P(Win|Category, Bid_Range) with totals
    """
    # Create bid amount ranges (low, medium, high)
    logs_df['bid_range'] = pd.qcut(logs_df['bid_amount'], q=3, labels=['Low', 'Medium', 'High'])
    
    # Calculate conditional probabilities
    cpt = pd.crosstab(
        [logs_df['category'], logs_df['bid_range']],
        logs_df['won_auction'],
        normalize='index'
    )[True]  # Only keep probability of winning
    
    # Reshape into a matrix format
    cpt_matrix = cpt.unstack(level='bid_range')
    
    # Fill any NaN values with 0
    cpt_matrix = cpt_matrix.fillna(0)
    
    # Add total column
    cpt_matrix['Total'] = cpt_matrix.sum(axis=1)
    
    return cpt_matrix

In [16]:
create_auction_cpt(sample_logs)

bid_range,Low,Medium,High,Total
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Electronics,0.282353,0.265823,0.285714,0.83389
Fashion,0.337838,0.3875,0.229167,0.954505
Food,0.365591,0.241379,0.22619,0.833161
Travel,0.256098,0.290698,0.164835,0.71163
