In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from scipy.stats import entropy, pearsonr, ttest_rel

import matplotlib.pyplot as plt
import seaborn as sns

import ast
import re

# Data Loading and Preparation

In [None]:
# Load intrinsicality experiment data
gpt_1 = pd.read_csv('../../results/Auction/Intrinsicality/gpt-4o_1.csv')
gpt_2 = pd.read_csv('../../results/Auction/Intrinsicality/gpt-4o_2.csv')

claude_3_5_1 = pd.read_csv('../../results/Auction/Intrinsicality/claude-3-5_1.csv')
claude_3_5_2 = pd.read_csv('../../results/Auction/Intrinsicality/claude-3-5_2.csv')

claude_3_7_1 = pd.read_csv('../../results/Auction/Intrinsicality/claude-3-7_1.csv')
claude_3_7_2 = pd.read_csv('../../results/Auction/Intrinsicality/claude-3-7_2.csv')

In [None]:
# Load instruction experiment data (risk preferences)
claude_risk_seek_3_5 = pd.read_csv('../../results/Auction/Instruction/claude-3-5-seek.csv')
claude_risk_averse_3_5 = pd.read_csv('../../results/Auction/Instruction/claude-3-5-averse.csv')
claude_risk_seek_3_7 = pd.read_csv('../../results/Auction/Instruction/claude-3-7-seek.csv')
claude_risk_averse_3_7 = pd.read_csv('../../results/Auction/Instruction/claude-3-7-averse.csv')
gpt_risk_seek = pd.read_csv('../../results/Auction/Instruction/gpt-4o-seek.csv')
gpt_risk_averse = pd.read_csv('../../results/Auction/Instruction/gpt-4o-averse.csv')

In [None]:
def clean_and_prepare_data(df_list, model_name):
    """Clean and standardize data format for analysis."""
    for df in df_list:
        df.loc[:, "profit"] = df["profit_llm"] if "profit_llm" in df.columns else df["profit"]
        df.loc[:, "reserve_price"] = df["reserve_price_llm"] if "reserve_price_llm" in df.columns else df["reserve_price"]
        df.loc[:, "round"] = df.index % 60 + 1
        df.loc[:, "model"] = model_name
        df.rename(columns={"Bidder Group": "bidder_group"}, inplace=True)
        if "profit_llm" in df.columns and "reserve_price_llm" in df.columns:
            df.drop(columns=["profit_llm", "reserve_price_llm"], inplace=True)

# Clean and prepare all datasets
clean_and_prepare_data([gpt_1, gpt_2], "gpt-4o")
clean_and_prepare_data([claude_3_5_1, claude_3_5_2], "claude-3-5")
clean_and_prepare_data([claude_3_7_1, claude_3_7_2], "claude-3-7")
clean_and_prepare_data([gpt_risk_averse, gpt_risk_seek], "gpt-4o")
clean_and_prepare_data([claude_risk_averse_3_5, claude_risk_seek_3_5], "claude-3-5")
clean_and_prepare_data([claude_risk_averse_3_7, claude_risk_seek_3_7], "claude-3-7")


In [None]:
# Add risk preference labels
claude_risk_seek_3_5['risk'] = 'seeking'
claude_risk_averse_3_5['risk'] = 'averse'
claude_risk_seek_3_7['risk'] = 'seeking'
claude_risk_averse_3_7['risk'] = 'averse'
gpt_risk_seek['risk'] = 'seeking'
gpt_risk_averse['risk'] = 'averse'

In [None]:
# Load human experiment data
human = pd.read_csv('../../human_experiment/auction_human_data.csv')

human = human.rename(columns={"myProfit": "profit", "rPrice": "reserve_price", "Bidder Group": "bidder_group", "newBids": "bids"})
human["round"] = human.index % 60 + 1
human["model"] = "human"

bid_info = human[['bidder_group', 'numBidders', 'highBid', 'round', 'bids']]
bid_info.rename(columns={'numBidders': 'num_bidder'}, inplace=True)

human = human[["bidder_group","profit", "reserve_price", "round", "model"]]

In [None]:
# Combine intrinsicality datasets
llm_datasets = {
    "gpt-4o": gpt_1,
    "claude-3-5": claude_3_5_1,
    "claude-3-7": claude_3_7_1
}

combined_df = pd.concat(llm_datasets.values(), ignore_index=True)

In [None]:
# Load and merge bidder profiles
profiles = pd.read_excel('../profile_generation/umich_undergraduate_profiles.xlsx')
profiles['stem'] = profiles['stem'].fillna(0)
profiles["bidder_group"] = profiles["id"].apply(lambda x: f"S.{int(x[1:])}")

# Merge profiles with LLM data
combined_df = combined_df.merge(profiles, on="bidder_group")

combined_df = pd.concat([combined_df, human], ignore_index=True)
combined_df = combined_df.merge(bid_info, on=["bidder_group", "round"]).sort_values(["model", "bidder_group", "round"])


# Analysis Helper Functions

In [None]:
def parse_bid_prices(bid_prices_str, return_second_only=False):
    """
    Parse bid_prices string to get highest and second highest bids.
    
    Args:
        bid_prices_str: String or list representation of bid prices
        return_second_only: If True, return only second highest bid (for compatibility)
    
    Returns:
        tuple (highest, second_highest) or float (second_highest only)
    """
    try:
        if pd.isna(bid_prices_str) or bid_prices_str == '':
            return 0 if return_second_only else (0, 0)
        
        if isinstance(bid_prices_str, str):
            # Try to parse as list first
            try:
                bids = ast.literal_eval(bid_prices_str)
            except:
                # Handle regex pattern format
                numbers = re.findall(r'\d+\.?\d*', str(bid_prices_str))
                bids = [float(x) for x in numbers]
        elif isinstance(bid_prices_str, list):
            bids = bid_prices_str
        else:
            return 0 if return_second_only else (0, 0)
        
        if len(bids) == 0:
            return 0 if return_second_only else (0, 0)
        elif len(bids) == 1:
            return bids[0] if return_second_only else (bids[0], bids[0])
        else:
            sorted_bids = sorted(bids, reverse=True)
            if return_second_only:
                return sorted_bids[1]
            else:
                return sorted_bids[0], sorted_bids[1]
    except:
        return 0 if return_second_only else (0, 0)

def rp_entropy(series):
    """Calculate entropy for reserve price series."""
    counts = np.bincount(series.astype(int))
    probs = counts[counts > 0] / counts.sum()
    return entropy(probs, base=2)

def extract_bid_prices(bid_string: str):
    """Extract bid prices from string using regex pattern."""
    return [int(x) for x in re.findall(r'd:(\d+)', bid_string)]

def analyze_rprice_bidder_correlation(data, rprice_col='reserve_price', bidder_col='num_bidder'):
    """
    Analyze correlation between reserve price and number of bidders.
    Returns correlation type and details.
    """
    # Remove any missing values
    clean_data = data[[rprice_col, bidder_col]].dropna()
    
    if len(clean_data) < 10:  # minimum data points
        return 'Insufficient Data'
    
    rprice = clean_data[rprice_col]
    bidders = clean_data[bidder_col]
    
    # Calculate overall linear correlation
    try:
        linear_corr, linear_p = pearsonr(rprice, bidders)
    except:
        return 'Cannot calculate'
    
    # Test for independence above different thresholds
    unique_bidder_counts = sorted(bidders.unique())
    
    best_threshold = None
    best_independence_score = 0
    
    # Test different thresholds
    for threshold in unique_bidder_counts[:-1]:
        above_threshold = clean_data[clean_data[bidder_col] > threshold]
        
        if len(above_threshold) >= 5:  # Need minimum observations above threshold
            try:
                above_corr, above_p = pearsonr(above_threshold[rprice_col], above_threshold[bidder_col])
                # Independence score: closer to 0 correlation and non-significant p-value
                independence_score = (1 - abs(above_corr)) * (1 if above_p > 0.05 else 0.5)
                
                if independence_score > best_independence_score:
                    best_independence_score = independence_score
                    best_threshold = threshold
            except:
                continue
    
    # Determine correlation type
    if linear_corr > 0.3 and linear_p < 0.05:
        if best_threshold is not None and best_independence_score > 0.7:
            return f'Linear+ then indep>{best_threshold}'
        else:
            return f'Linear positive (r={linear_corr:.2f})'
    elif best_threshold is not None and best_independence_score > 0.7:
        return f'Independent >{best_threshold} bidders'
    elif abs(linear_corr) < 0.2:
        return f'Independent (r={linear_corr:.2f})'
    else:
        return f'Weak pattern (r={linear_corr:.2f})'

In [None]:
def calculate_comprehensive_metrics(df, include_human_ks=True, group_by_risk=False):
    """
    Calculate comprehensive metrics for reserve price analysis.
    
    Args:
        df: DataFrame with reserve price data
        include_human_ks: Whether to calculate KS distance to human baseline
        group_by_risk: Whether to group by risk preference (for instruction experiments)
    
    Returns:
        DataFrame with comprehensive metrics
    """
    results = []
    
    # Determine grouping columns
    if group_by_risk and 'risk' in df.columns:
        group_cols = ['model', 'risk']
        # Get unique model-risk combinations
        groups = df.groupby(group_cols).size().reset_index()
    else:
        group_cols = ['model']
        # Get unique models
        groups = df['model'].unique()
        groups = pd.DataFrame({'model': groups})
    
    # Get human baseline for KS distance comparison if needed
    if include_human_ks:
        human_rprice = df[df['model'] == 'human']['reserve_price'].values
    
    for _, group in groups.iterrows():
        if group_by_risk and 'risk' in df.columns:
            model, risk_type = group['model'], group['risk']
            model_data = df[(df['model'] == model) & (df['risk'] == risk_type)]
            source_label = f"{model} ({risk_type})" if risk_type != 'Intrinsicality' else model
        else:
            model = group['model']
            model_data = df[df['model'] == model]
            source_label = model
        
        if len(model_data) == 0:
            continue
            
        rprice_data = model_data['reserve_price'].values
        
        # Basic statistics
        mean_rprice = rprice_data.mean()
        std_rprice = rprice_data.std()
        
        # Entropy calculation
        entropy_val = rp_entropy(model_data['reserve_price'])
        
        # Sale through rate and premium capture rate 
        if 'sale' in model_data.columns:
            sale_through_rate = model_data['sale'].mean()
        else:
            sale_through_rate = np.nan
            
        if 'premium_capture' in model_data.columns:
            premium_capture_rate = model_data['premium_capture'].mean()
        else:
            premium_capture_rate = np.nan
        
        # KS distance (only for AI models, not human)
        if include_human_ks and model != 'human' and len(human_rprice) > 0:
            ks_distance = stats.ks_2samp(rprice_data, human_rprice).statistic
        else:
            ks_distance = 0.0 if model == 'human' else np.nan
        
        # rPrice-Bidder correlation analysis
        bidder_col = 'num_bidder' if 'num_bidder' in model_data.columns else 'bidder_num'
        if bidder_col in model_data.columns:
            correlation_pattern = analyze_rprice_bidder_correlation(model_data, 'reserve_price', bidder_col)
        else:
            correlation_pattern = 'No bidder data'
        
        result = {
            'Source': source_label,
            'Mean': round(mean_rprice, 3),
            'Std': round(std_rprice, 3), 
            'Entropy': round(entropy_val, 3),
            'rPrice-Bidder Correlation': correlation_pattern
        }

        if not np.isnan(sale_through_rate):
            result['Sale Through Rate'] = round(sale_through_rate, 3)
        if not np.isnan(premium_capture_rate):
            result['Premium Capture Rate'] = round(premium_capture_rate, 3)
        if include_human_ks:
            result['KS Distance to Human'] = round(ks_distance, 3) if not np.isnan(ks_distance) and model != 'human' else ('N/A' if model == 'human' else round(ks_distance, 3))
        
        # Add risk type for risk experiments
        if group_by_risk and 'risk' in df.columns:
            result['style'] = risk_type
            
        results.append(result)
    
    return pd.DataFrame(results)

In [None]:
def analyze_rprice_bidder_correlation(data, rprice_col='reserve_price', bidder_col='num_bidder'):
    """
    Analyze correlation between reserve price and number of bidders.
    Returns correlation type and details.
    """
    # Remove any missing values
    clean_data = data[[rprice_col, bidder_col]].dropna()
    
    if len(clean_data) < 10:  # Need minimum data points
        return 'Insufficient Data'
    
    rprice = clean_data[rprice_col]
    bidders = clean_data[bidder_col]
    
    # Calculate overall linear correlation
    try:
        linear_corr, linear_p = pearsonr(rprice, bidders)
    except:
        return 'Cannot calculate'
    
    # Test for independence above different thresholds
    unique_bidder_counts = sorted(bidders.unique())
    
    best_threshold = None
    best_independence_score = 0
    
    # Test different thresholds
    for threshold in unique_bidder_counts[:-1]:
        above_threshold = clean_data[clean_data[bidder_col] > threshold]
        
        if len(above_threshold) >= 5:  # Need minimum observations above threshold
            try:
                above_corr, above_p = pearsonr(above_threshold[rprice_col], above_threshold[bidder_col])
                # Independence score: closer to 0 correlation and non-significant p-value
                independence_score = (1 - abs(above_corr)) * (1 if above_p > 0.05 else 0.5)
                
                if independence_score > best_independence_score:
                    best_independence_score = independence_score
                    best_threshold = threshold
            except:
                continue
    
    # Determine correlation type
    if linear_corr > 0.3 and linear_p < 0.05:
        if best_threshold is not None and best_independence_score > 0.7:
            return f'Linear+ then indep>{best_threshold}'
        else:
            return f'Linear positive (r={linear_corr:.2f})'
    elif best_threshold is not None and best_independence_score > 0.7:
        return f'Independent >{best_threshold} bidders'
    elif abs(linear_corr) < 0.2:
        return f'Independent (r={linear_corr:.2f})'
    else:
        return f'Weak pattern (r={linear_corr:.2f})'

# Data Processing for Analysis

In [None]:
# Process bid information
combined_df['bid_price_list'] = combined_df['bids'].apply(extract_bid_prices)
combined_df['second_highest_bid'] = combined_df['bid_price_list'].apply(lambda x: sorted(x)[-2] if len(x) > 1 else 0)

In [None]:
# Calculate sale and premium capture metrics
combined_df['sale'] = combined_df['reserve_price'] <= combined_df['highBid']
combined_df['premium_capture'] = (combined_df['reserve_price'] <= combined_df['highBid']) & (combined_df['reserve_price'] > combined_df['second_highest_bid'])

In [None]:
# Calculate rates by bidder group
sale_rate = combined_df.groupby(['model', 'bidder_group'])['sale'].mean().reset_index()
premium_capture_rate = combined_df.groupby(['model', 'bidder_group'])['premium_capture'].mean().reset_index()

# Reserve Price Analysis

In [None]:
def create_time_segment(round_num):
    """Create time segments for temporal analysis."""
    if round_num <= 20:
        return '0-20'
    elif round_num <= 40:
        return '21-40'
    else:
        return '41-60'

def linear_regression_analysis(y, x):
    """Perform linear regression and return coefficient, p-value, and significance."""
    try:
        if len(x) > 2 and len(y) > 2 and x.var() > 0 and y.var() > 0:
            X = sm.add_constant(x)
            model = sm.OLS(y, X).fit()
            
            coef = model.params.iloc[1]  # Coefficient for x variable
            p_value = model.pvalues.iloc[1]  # P-value for x variable
            
            # Determine significance level
            if p_value < 0.001:
                significance = '***'
            elif p_value < 0.01:
                significance = '**'
            elif p_value < 0.05:
                significance = '*'
            else:
                significance = ''
                
            return coef, p_value, significance
        else:
            return np.nan, np.nan, ''
    except:
        return np.nan, np.nan, ''

# Add time segments to data
combined_df['time_segment'] = combined_df['round'].apply(create_time_segment)

# Temporal analysis by model and time segment
def analyze_temporal_patterns(df, models_to_analyze=None):
    """Analyze temporal patterns in reserve pricing."""
    if models_to_analyze is None:
        models_to_analyze = df['model'].unique()
    
    results = []
    
    for agent_type in models_to_analyze:
        for time_seg in ['0-20', '21-40', '41-60']:
            segment_data = df[(df['model'] == agent_type) & (df['time_segment'] == time_seg)]
            
            if len(segment_data) > 0:
                # Basic statistics
                avg_rprice = segment_data['reserve_price'].mean()
                std_rprice = segment_data['reserve_price'].std()
                
                # Linear regression analyses
                coef_round, p_round, sig_round = linear_regression_analysis(
                    segment_data['reserve_price'], segment_data['round'])
                
                # Check if bidder number column exists
                bidder_col = None
                for col in ['num_bidder', 'bidder_num']:
                    if col in segment_data.columns:
                        bidder_col = col
                        break
                
                if bidder_col:
                    coef_bidnum, p_bidnum, sig_bidnum = linear_regression_analysis(
                        segment_data['reserve_price'], segment_data[bidder_col])
                else:
                    coef_bidnum, p_bidnum, sig_bidnum = np.nan, np.nan, ''
                
                results.append({
                    'Agent_Type': agent_type,
                    'Time_Segment': time_seg,
                    'Avg_Reserve_Price': round(avg_rprice, 2),
                    'Std_Reserve_Price': round(std_rprice, 2),
                    'Coef_rPrice_Round': round(coef_round, 4) if not np.isnan(coef_round) else np.nan,
                    'P_val_Round': round(p_round, 4) if not np.isnan(p_round) else np.nan,
                    'Sig_Round': sig_round,
                    'Coef_rPrice_BidNum': round(coef_bidnum, 4) if not np.isnan(coef_bidnum) else np.nan,
                    'P_val_BidNum': round(p_bidnum, 4) if not np.isnan(p_bidnum) else np.nan,
                    'Sig_BidNum': sig_bidnum,
                    'Count': len(segment_data)
                })
    
    return pd.DataFrame(results)

# Run temporal analysis
temporal_results = analyze_temporal_patterns(combined_df)
print("Temporal Analysis Results:")
print(temporal_results.to_string(index=False))

# Bidder Response Analysis

In [None]:
def plot_literature_benchmarks():
    """Plot literature benchmarks for reserve price vs number of bidders."""
    # Literature data
    x = [1, 4, 7, 10]
    davis = [18.4, 29.5, 38.7, 48.2]
    wisconsin = [19.2, 25.0, 37.8, 45.5]
    michigan = [14.8, 24.3, 32.9, 39.5]

    data = {
        'num_bidder': x * 3,
        'reserve_price': davis + wisconsin + michigan,
        'model': ['Davis et al. (2011)'] * 4 + ['Wisconsin'] * 4 + ['Michigan'] * 4
    }
    literature_df = pd.DataFrame(data)

    # Plot styling
    marker_style = {
        'Davis et al. (2011)': 's',
        'Wisconsin': '^',
        'Michigan': 'X'
    }
    color_style = {
        'Davis et al. (2011)': (57/255, 81/255, 162/255),
        'Wisconsin': (253/255, 185/255, 107/255),
        'Michigan': (236/255, 93/255, 59/255)
    }

    fig, ax = plt.subplots(figsize=(5, 4))
    sns.lineplot(
        data=literature_df,
        x='num_bidder', y='reserve_price',
        hue='model', style='model',
        markers=marker_style, dashes=False,
        palette=color_style, ax=ax, linewidth=2.5
    )

    ax.set_xticks([1, 4, 7, 10])
    ax.set_xlabel("Number of Bidders")
    ax.set_ylabel("Reserve Price")
    ax.spines[['top', 'right']].set_visible(False)
    ax.legend(title="", frameon=False, fontsize=9)
    fig.tight_layout()
    return fig, ax

# Create literature benchmark plot
lit_fig, lit_ax = plot_literature_benchmarks()


In [None]:
def plot_llm_vs_human_bidder_response(df):
    """Plot LLM vs Human reserve price response to number of bidders."""
    # Check if bidder data is available
    bidder_col = None
    for col in ['num_bidder', 'bidder_num']:
        if col in df.columns:
            bidder_col = col
            break
    
    if bidder_col is None:
        print("No bidder data available for plotting")
        return None, None
    
    # Color scheme
    colors = {
        "claude-3-5": (57/255, 81/255, 162/255),
        "claude-3-7": (114/255, 170/255, 207/255),
        "gpt-4o": (253/255, 185/255, 107/255),
        "human": (236/255, 93/255, 59/255)
    }

    legend_labels = {
        'gpt-4o': 'GPT-4o',
        'claude-3-5': 'Claude-3.5',
        'claude-3-7': 'Claude-3.7', 
        'human': 'Human'
    }

    fig, ax = plt.subplots(figsize=(6, 4))
    
    # Plot lines for each model
    available_models = df['model'].unique()
    available_colors = {k: v for k, v in colors.items() if k in available_models}
    
    sns.lineplot(
        data=df, 
        x=bidder_col, 
        y='reserve_price',
        hue='model', 
        style='model', 
        markers=['o', 's', '^', 'X'][:len(available_models)],
        dashes=False,
        palette=available_colors, 
        ax=ax, 
        linewidth=2.5
    )

    ax.set_xlabel("Number of Bidders")
    ax.set_ylabel("Reserve Price")
    ax.spines[['top', 'right']].set_visible(False)
    
    # Update legend
    handles, labels = ax.get_legend_handles_labels()
    new_labels = [legend_labels.get(label, label) for label in labels]
    ax.legend(
        handles=handles,
        labels=new_labels,
        title="",
        frameon=True,
        fontsize=13,
        framealpha=0.8,
        facecolor='white'
    )
    
    fig.tight_layout()
    return fig, ax

# Create LLM vs Human comparison plot
comparison_fig, comparison_ax = plot_llm_vs_human_bidder_response(combined_df)

# Risk Preference Analysis

In [None]:
# Combine risk preference datasets
combined_df_risk_seek = pd.concat([claude_risk_seek_3_5, claude_risk_seek_3_7, gpt_risk_seek], ignore_index=True)
combined_df_risk_seek = combined_df_risk_seek.merge(profiles, on="bidder_group")

combined_df_risk_averse = pd.concat([claude_risk_averse_3_5, claude_risk_averse_3_7, gpt_risk_averse], ignore_index=True)
combined_df_risk_averse = combined_df_risk_averse.merge(profiles, on="bidder_group")

In [None]:
# Combine all risk and intrinsicality data
risk_df = pd.concat([combined_df_risk_seek, combined_df_risk_averse, combined_df], ignore_index=True)
risk_df['risk'] = risk_df['risk'].fillna('Intrinsicality')

In [None]:
# Add time segments to risk data
risk_df['time_segment'] = risk_df['round'].apply(create_time_segment)

def analyze_risk_temporal_patterns(df):
    """Analyze temporal patterns in risk preference experiments."""
    # Summary by risk preference and time segment
    time_risk_summary = df.groupby(['time_segment', 'risk'])['reserve_price'].agg(['mean', 'std', 'count']).round(2)
    time_risk_summary.columns = ['Average Reserve Price', 'Standard Deviation', 'Count']
    
    # Detailed breakdown by model, time segment, and risk
    detailed_summary = df.groupby(['model', 'time_segment', 'risk'])['reserve_price'].agg(['mean', 'std', 'count']).round(2)
    detailed_summary.columns = ['Average Reserve Price', 'Standard Deviation', 'Count']
    
    return time_risk_summary, detailed_summary

# Run risk temporal analysis
risk_temporal_summary, risk_detailed_summary = analyze_risk_temporal_patterns(risk_df)


# Model Consistency Analysis

In [None]:
class ModelConsistencyAnalyzer:
    """Analyze consistency across multiple runs of the same model."""
    
    def __init__(self, model_name, df_list):
        self.model_name = model_name
        self.df_list = df_list
        self.num_models = len(df_list)
        self.model_labels = [f"{self.model_name}-{i+1}" for i in range(self.num_models)]

    def analyze_metric_consistency(self, metric):
        """Analyze consistency of a metric across model runs."""
        # Use bidder_group as grouping variable instead of Profile ID if available
        group_col = "bidder_group" if "bidder_group" in self.df_list[0].columns else "Profile ID"
        
        metric_sums = {}
        for label, df in zip(self.model_labels, self.df_list):
            if group_col in df.columns:
                metric_sums[label] = df.groupby(group_col)[metric].sum()
            else:
                metric_sums[label] = df[metric]

        metric_df = pd.concat(metric_sums.values(), axis=1)
        metric_df.columns = metric_sums.keys()

        # Descriptive statistics
        desc_stats = metric_df.describe()

        # Correlation analysis
        correlation_matrix = metric_df.corr()

        return {
            'data': metric_df,
            'descriptive_stats': desc_stats,
            'correlations': correlation_matrix
        }

    def perform_consistency_tests(self, metric):
        """Perform statistical tests for consistency."""
        group_col = "bidder_group" if "bidder_group" in self.df_list[0].columns else "Profile ID"
        
        if group_col not in self.df_list[0].columns:
            # Fallback: use direct metric values
            metric_data = [df[metric].values for df in self.df_list]
        else:
            metric_data = [df.groupby(group_col)[metric].sum().values for df in self.df_list]
        
        ttest_results = []
        
        for i in range(self.num_models):
            for j in range(i + 1, self.num_models):
                if len(metric_data[i]) == len(metric_data[j]):
                    t_stat, p_value = ttest_rel(metric_data[i], metric_data[j])
                    ttest_results.append({
                        "Comparison": f"{self.model_labels[i]} vs {self.model_labels[j]}",
                        "t-statistic": round(t_stat, 4),
                        "p-value": round(p_value, 4)
                    })

        return pd.DataFrame(ttest_results)

    def run_full_analysis(self):
        """Run complete consistency analysis."""
        results = {}
        
        # Analyze reserve price consistency
        rp_analysis = self.analyze_metric_consistency("reserve_price")
        rp_tests = self.perform_consistency_tests("reserve_price")
        
        results["reserve_price"] = {
            "analysis": rp_analysis,
            "tests": rp_tests
        }
        
        # Analyze profit consistency if available
        if "profit" in self.df_list[0].columns:
            profit_analysis = self.analyze_metric_consistency("profit")
            profit_tests = self.perform_consistency_tests("profit")
            
            results["profit"] = {
                "analysis": profit_analysis,
                "tests": profit_tests
            }
        
        return results

In [None]:
# Analyze GPT model consistency
gpt_analyzer = ModelConsistencyAnalyzer("GPT", [gpt_1, gpt_2])
gpt_consistency_results = gpt_analyzer.run_full_analysis()

print("GPT Model Consistency Analysis:")
print("Reserve Price T-tests:")
print(gpt_consistency_results["reserve_price"]["tests"])

In [None]:
# Analyze Claude model consistency
claude_analyzer = ModelConsistencyAnalyzer("Claude", [claude_3_7_1, claude_3_7_2])
claude_consistency_results = claude_analyzer.run_full_analysis()

print("Claude Model Consistency Analysis:")
print("Reserve Price T-tests:")
print(claude_consistency_results["reserve_price"]["tests"])

In [None]:
# Risk preference summary analysis
def summarize_risk_preferences(df):
    """Create summary statistics for risk preference analysis."""
    # Filter for risk preference data only
    risk_only_df = df[df['risk'].isin(['averse', 'seeking'])]
    
    if len(risk_only_df) == 0:
        return None
    
    # Summary by risk preference
    risk_summary = risk_only_df.groupby('risk')['reserve_price'].agg(['mean', 'std', 'count']).round(2)
    risk_summary.columns = ['Average Reserve Price', 'Standard Deviation', 'Count']
    
    # Detailed summary by model and risk
    detailed_summary = risk_only_df.groupby(['model', 'risk'])['reserve_price'].agg(['mean', 'std', 'count']).round(2)
    detailed_summary.columns = ['Average Reserve Price', 'Standard Deviation', 'Count']
    
    return risk_summary, detailed_summary

# Generate risk preference summary
risk_summaries = summarize_risk_preferences(risk_df)
if risk_summaries:
    risk_summary, detailed_risk_summary = risk_summaries
    risk_summary


In [None]:
# Run comprehensive temporal regression analysis
comprehensive_temporal_results = analyze_temporal_patterns(combined_df)

# Add agent type categorization for LLM vs Human comparison
combined_df['agent_type'] = combined_df['model'].apply(lambda x: 'Human' if x == 'human' else 'LLM')

print("Temporal Analysis Results:")
print(comprehensive_temporal_results.to_string(index=False))

# Summary Analysis

In [None]:
# Generate comprehensive metrics for intrinsicality experiments
intrinsic_metrics = calculate_comprehensive_metrics(
    combined_df, 
    include_human_ks=True, 
    group_by_risk=False
)

print(intrinsic_metrics.to_string(index=False))

In [None]:
# Generate comprehensive metrics for risk preference experiments
risk_only_df = risk_df[risk_df['risk'].isin(['averse', 'seeking'])]

if len(risk_only_df) > 0:
    risk_metrics = calculate_comprehensive_metrics(
        risk_only_df, 
        include_human_ks=True,  # Include human baseline for comparison
        group_by_risk=True
    )
    
    print("Risk Preference Experiment Comprehensive Metrics:")
    print(risk_metrics.to_string(index=False))
else:
    print("No risk preference data available for comprehensive analysis")
