In [13]:
# Create dataframe with win rates for each condition
import json
import os
from collections import Counter
import pandas as pd

def get_win_rates(log_dir):
    """Get win counts and rates from a log directory"""
    winners = []
    for filename in os.listdir(log_dir):
        if filename.endswith('.json'):
            with open(os.path.join(log_dir, filename), 'r') as f:
                data = json.load(f)
                winner = data.get('winner')
                if winner:
                    winners.append(winner)
    
    tally = Counter(winners)
    total = len(winners)
    
    return tally, total

# Get data from both conditions
control_tally, control_total = get_win_rates('.logs/multiple_buyers_control')
no_disc_tally, no_disc_total = get_win_rates('.logs/multiple_buyers_no_discrimination')

# Get all unique player names
all_players = set(list(control_tally.keys()) + list(no_disc_tally.keys()))

# Create dataframe
data = []
for player in sorted(all_players):
    control_wins = control_tally.get(player, 0)
    control_rate = (control_wins / control_total * 100) if control_total > 0 else 0
    
    no_disc_wins = no_disc_tally.get(player, 0)
    no_disc_rate = (no_disc_wins / no_disc_total * 100) if no_disc_total > 0 else 0
    
    data.append({
        'Player': player,
        'Control_Wins': control_wins,
        'Control_Total': control_total,
        'Control_Win_Rate_%': control_rate,
        'No_Discrimination_Wins': no_disc_wins,
        'No_Discrimination_Total': no_disc_total,
        'No_Discrimination_Win_Rate_%': no_disc_rate,
        'Difference_%': no_disc_rate - control_rate
    })

df = pd.DataFrame(data)
df = df.sort_values('Difference_%', ascending=False)

print("Win Rates by Player and Condition")
print("=" * 80)
print(df.to_string(index=False))
print()
print(f"\nControl total games: {control_total}")
print(f"No Discrimination total games: {no_disc_total}")

# Display the dataframe
df

Win Rates by Player and Condition
           Player  Control_Wins  Control_Total  Control_Win_Rate_%  No_Discrimination_Wins  No_Discrimination_Total  No_Discrimination_Win_Rate_%  Difference_%
DaQuan Washington            88            170           51.764706                      77                      119                     64.705882     12.941176
    Hunter Becker            82            170           48.235294                      42                      119                     35.294118    -12.941176


Control total games: 170
No Discrimination total games: 119


Unnamed: 0,Player,Control_Wins,Control_Total,Control_Win_Rate_%,No_Discrimination_Wins,No_Discrimination_Total,No_Discrimination_Win_Rate_%,Difference_%
0,DaQuan Washington,88,170,51.764706,77,119,64.705882,12.941176
1,Hunter Becker,82,170,48.235294,42,119,35.294118,-12.941176


In [14]:
# Statistical test: Compare win rate distributions between conditions
from scipy.stats import chi2_contingency
import numpy as np

# Create contingency table from the dataframe
# Rows: Players, Columns: Control wins, No Discrimination wins
contingency_table = np.array([
    [df[df['Player'] == 'DaQuan Washington']['Control_Wins'].values[0],
     df[df['Player'] == 'DaQuan Washington']['No_Discrimination_Wins'].values[0]],
    [df[df['Player'] == 'Hunter Becker']['Control_Wins'].values[0],
     df[df['Player'] == 'Hunter Becker']['No_Discrimination_Wins'].values[0]]
])

print("Statistical Test: Win Rate Distribution Comparison")
print("=" * 70)
print("\nContingency Table:")
print("                      Control    No Discrimination")
print(f"DaQuan Washington:    {contingency_table[0, 0]:6d}     {contingency_table[0, 1]:6d}")
print(f"Hunter Becker:        {contingency_table[1, 0]:6d}     {contingency_table[1, 1]:6d}")
print()

# Chi-square test of independence
chi2, p_value, dof, expected = chi2_contingency(contingency_table)

print("-" * 70)
print("Chi-Square Test of Independence:")
print("H0: Win rate distribution is independent of condition (no effect)")
print("H1: Win rate distribution depends on condition (significant effect)")
print()
print(f"χ² statistic: {chi2:.4f}")
print(f"p-value: {p_value:.4f}")
print(f"degrees of freedom: {dof}")
print()

if p_value < 0.05:
    print(f"Result: REJECT null hypothesis at α=0.05")
    print(f"The win rate distributions are significantly different")
    print(f"The condition (control vs no_discrimination) affects win rates")
else:
    print(f"Result: FAIL TO REJECT null hypothesis at α=0.05")
    print(f"No significant evidence that distributions differ")
    print(f"The condition does not significantly affect win rates")

print()
print("-" * 70)
print("Expected frequencies under independence:")
print(f"                      Control    No Discrimination")
print(f"DaQuan Washington:    {expected[0, 0]:6.1f}     {expected[0, 1]:6.1f}")
print(f"Hunter Becker:        {expected[1, 0]:6.1f}     {expected[1, 1]:6.1f}")

# Calculate effect size (Cramér's V)
n = contingency_table.sum()
cramers_v = np.sqrt(chi2 / n)
print()
print(f"Effect size (Cramér's V): {cramers_v:.4f}")
if cramers_v < 0.1:
    effect_interpretation = "negligible"
elif cramers_v < 0.3:
    effect_interpretation = "small"
elif cramers_v < 0.5:
    effect_interpretation = "medium"
else:
    effect_interpretation = "large"
print(f"Effect size interpretation: {effect_interpretation}")

Statistical Test: Win Rate Distribution Comparison

Contingency Table:
                      Control    No Discrimination
DaQuan Washington:        88         77
Hunter Becker:            82         42

----------------------------------------------------------------------
Chi-Square Test of Independence:
H0: Win rate distribution is independent of condition (no effect)
H1: Win rate distribution depends on condition (significant effect)

χ² statistic: 4.2719
p-value: 0.0387
degrees of freedom: 1

Result: REJECT null hypothesis at α=0.05
The win rate distributions are significantly different
The condition (control vs no_discrimination) affects win rates

----------------------------------------------------------------------
Expected frequencies under independence:
                      Control    No Discrimination
DaQuan Washington:      97.1       67.9
Hunter Becker:          72.9       51.1

Effect size (Cramér's V): 0.1216
Effect size interpretation: small
