In [22]:
# Create dataframe with win rates for each condition
import json
import os
from collections import Counter
import pandas as pd

def get_win_rates(log_dir):
    """Get win counts and rates from a log directory"""
    winners = []
    for filename in os.listdir(log_dir):
        if filename.endswith('.json'):
            with open(os.path.join(log_dir, filename), 'r') as f:
                data = json.load(f)
                winner = data.get('winner')
                if winner:
                    winners.append(winner)
    
    tally = Counter(winners)
    total = len(winners)
    
    return tally, total

# Get data from both conditions
control_tally, control_total = get_win_rates('.logs/multiple_buyers_control')
no_disc_tally, no_disc_total = get_win_rates('.logs/multiple_buyers_no_discrimination')

# Get all unique player names
all_players = set(list(control_tally.keys()) + list(no_disc_tally.keys()))

# Create dataframe
data = []
for player in sorted(all_players):
    control_wins = control_tally.get(player, 0)
    control_rate = (control_wins / control_total * 100) if control_total > 0 else 0
    
    no_disc_wins = no_disc_tally.get(player, 0)
    no_disc_rate = (no_disc_wins / no_disc_total * 100) if no_disc_total > 0 else 0
    
    data.append({
        'Player': player,
        'Control_Wins': control_wins,
        'Control_Total': control_total,
        'Control_Win_Rate_%': control_rate,
        'No_Discrimination_Wins': no_disc_wins,
        'No_Discrimination_Total': no_disc_total,
        'No_Discrimination_Win_Rate_%': no_disc_rate,
        'Difference_%': no_disc_rate - control_rate
    })

df = pd.DataFrame(data)
df = df.sort_values('Difference_%', ascending=False)

print("Win Rates by Player and Condition")
print("=" * 80)
print(df.to_string(index=False))
print()
print(f"\nControl total games: {control_total}")
print(f"No Discrimination total games: {no_disc_total}")

# Display the dataframe
df

Win Rates by Player and Condition
           Player  Control_Wins  Control_Total  Control_Win_Rate_%  No_Discrimination_Wins  No_Discrimination_Total  No_Discrimination_Win_Rate_%  Difference_%
DaQuan Washington           196            387           50.645995                     219                      390                     56.153846      5.507851
           [name]             1            387            0.258398                       0                      390                      0.000000     -0.258398
    Hunter Becker           190            387           49.095607                     171                      390                     43.846154     -5.249453


Control total games: 387
No Discrimination total games: 390


Unnamed: 0,Player,Control_Wins,Control_Total,Control_Win_Rate_%,No_Discrimination_Wins,No_Discrimination_Total,No_Discrimination_Win_Rate_%,Difference_%
0,DaQuan Washington,196,387,50.645995,219,390,56.153846,5.507851
2,[name],1,387,0.258398,0,390,0.0,-0.258398
1,Hunter Becker,190,387,49.095607,171,390,43.846154,-5.249453


In [26]:
df = df.drop(df.index[1])

In [27]:
df

Unnamed: 0,Player,Control_Wins,Control_Total,Control_Win_Rate_%,No_Discrimination_Wins,No_Discrimination_Total,No_Discrimination_Win_Rate_%,Difference_%
0,DaQuan Washington,196,387,50.645995,219,390,56.153846,5.507851
1,Hunter Becker,190,387,49.095607,171,390,43.846154,-5.249453


In [28]:
# Statistical test: Compare win rate distributions between conditions
from scipy.stats import chi2_contingency
import numpy as np

# Create contingency table from the dataframe
# Rows: Players, Columns: Control wins, No Discrimination wins
contingency_table = np.array([
    [df[df['Player'] == 'DaQuan Washington']['Control_Wins'].values[0],
     df[df['Player'] == 'DaQuan Washington']['No_Discrimination_Wins'].values[0]],
    [df[df['Player'] == 'Hunter Becker']['Control_Wins'].values[0],
     df[df['Player'] == 'Hunter Becker']['No_Discrimination_Wins'].values[0]]
])

print("Statistical Test: Win Rate Distribution Comparison")
print("=" * 70)
print("\nContingency Table:")
print("                      Control    No Discrimination")
print(f"DaQuan Washington:    {contingency_table[0, 0]:6d}     {contingency_table[0, 1]:6d}")
print(f"Hunter Becker:        {contingency_table[1, 0]:6d}     {contingency_table[1, 1]:6d}")
print()

# Chi-square test of independence
chi2, p_value, dof, expected = chi2_contingency(contingency_table)

print("-" * 70)
print("Chi-Square Test of Independence:")
print("H0: Win rate distribution is independent of condition (no effect)")
print("H1: Win rate distribution depends on condition (significant effect)")
print()
print(f"χ² statistic: {chi2:.4f}")
print(f"p-value: {p_value:.4f}")
print(f"degrees of freedom: {dof}")
print()

if p_value < 0.05:
    print(f"Result: REJECT null hypothesis at α=0.05")
    print(f"The win rate distributions are significantly different")
    print(f"The condition (control vs no_discrimination) affects win rates")
else:
    print(f"Result: FAIL TO REJECT null hypothesis at α=0.05")
    print(f"No significant evidence that distributions differ")
    print(f"The condition does not significantly affect win rates")

print()
print("-" * 70)
print("Expected frequencies under independence:")
print(f"                      Control    No Discrimination")
print(f"DaQuan Washington:    {expected[0, 0]:6.1f}     {expected[0, 1]:6.1f}")
print(f"Hunter Becker:        {expected[1, 0]:6.1f}     {expected[1, 1]:6.1f}")

# Calculate effect size (Cramér's V)
n = contingency_table.sum()
cramers_v = np.sqrt(chi2 / n)
print()
print(f"Effect size (Cramér's V): {cramers_v:.4f}")
if cramers_v < 0.1:
    effect_interpretation = "negligible"
elif cramers_v < 0.3:
    effect_interpretation = "small"
elif cramers_v < 0.5:
    effect_interpretation = "medium"
else:
    effect_interpretation = "large"
print(f"Effect size interpretation: {effect_interpretation}")

Statistical Test: Win Rate Distribution Comparison

Contingency Table:
                      Control    No Discrimination
DaQuan Washington:       196        219
Hunter Becker:           190        171

----------------------------------------------------------------------
Chi-Square Test of Independence:
H0: Win rate distribution is independent of condition (no effect)
H1: Win rate distribution depends on condition (significant effect)

χ² statistic: 2.0432
p-value: 0.1529
degrees of freedom: 1

Result: FAIL TO REJECT null hypothesis at α=0.05
No significant evidence that distributions differ
The condition does not significantly affect win rates

----------------------------------------------------------------------
Expected frequencies under independence:
                      Control    No Discrimination
DaQuan Washington:     206.4      208.6
Hunter Becker:         179.6      181.4

Effect size (Cramér's V): 0.0513
Effect size interpretation: negligible


In [30]:
# One-sample binomial test: No-discrimination case vs 50%
from scipy import stats

# Get DaQuan Washington's wins in no-discrimination condition
daquan_wins_no_disc = df[df['Player'] == 'DaQuan Washington']['No_Discrimination_Wins'].values[0]
no_disc_total = df[df['Player'] == 'DaQuan Washington']['No_Discrimination_Total'].values[0]

# Binomial test
# H0: p = 0.5 (DaQuan's win rate is 50%)
# H1: p ≠ 0.5 (DaQuan's win rate is not 50%)
binomial_result = stats.binomtest(daquan_wins_no_disc, no_disc_total, 0.5, alternative='two-sided')

print("One-Sample Binomial Test: No-Discrimination Condition")
print("=" * 70)
print(f"Null Hypothesis: DaQuan Washington win rate = 50%")
print(f"Alternative: Win rate ≠ 50%")
print()
print(f"DaQuan Washington wins: {daquan_wins_no_disc}/{no_disc_total}")
print(f"Observed win rate: {daquan_wins_no_disc/no_disc_total:.2%}")
print()
print(f"Binomial Test p-value: {binomial_result.pvalue:.4f}")
print(f"95% Confidence Interval: ({binomial_result.proportion_ci(confidence_level=0.95).low:.2%}, {binomial_result.proportion_ci(confidence_level=0.95).high:.2%})")
print()

if binomial_result.pvalue < 0.05:
    print(f"Result: REJECT null hypothesis at α=0.05")
    print(f"DaQuan's win rate is significantly different from 50%")
else:
    print(f"Result: FAIL TO REJECT null hypothesis at α=0.05")
    print(f"No significant evidence that win rate differs from 50%")

# Also perform one-sample proportion z-test
observed_proportion = daquan_wins_no_disc / no_disc_total
expected_proportion = 0.5
se = np.sqrt(expected_proportion * (1 - expected_proportion) / no_disc_total)
z_score = (observed_proportion - expected_proportion) / se
p_value_z = 2 * (1 - stats.norm.cdf(abs(z_score)))

print()
print("-" * 70)
print("One-sample proportion z-test (for comparison):")
print(f"z-score: {z_score:.4f}")
print(f"p-value: {p_value_z:.4f}")
print(f"Standard error: {se:.4f}")

One-Sample Binomial Test: No-Discrimination Condition
Null Hypothesis: DaQuan Washington win rate = 50%
Alternative: Win rate ≠ 50%

DaQuan Washington wins: 219/390
Observed win rate: 56.15%

Binomial Test p-value: 0.0172
95% Confidence Interval: (51.07%, 61.14%)

Result: REJECT null hypothesis at α=0.05
DaQuan's win rate is significantly different from 50%

----------------------------------------------------------------------
One-sample proportion z-test (for comparison):
z-score: 2.4306
p-value: 0.0151
Standard error: 0.0253


In [31]:
# More appropriate test: Two-sample proportion test
# Compare DaQuan's win rate between control and no-discrimination conditions

from scipy.stats import chi2_contingency
from statsmodels.stats.proportion import proportions_ztest

# Get DaQuan's wins in both conditions
daquan_control = df[df['Player'] == 'DaQuan Washington']['Control_Wins'].values[0]
control_total = df[df['Player'] == 'DaQuan Washington']['Control_Total'].values[0]

daquan_no_disc = df[df['Player'] == 'DaQuan Washington']['No_Discrimination_Wins'].values[0]
no_disc_total = df[df['Player'] == 'DaQuan Washington']['No_Discrimination_Total'].values[0]

print("Two-Sample Proportion Test")
print("=" * 70)
print("Comparing DaQuan Washington's win rate between conditions")
print()
print(f"Control condition:")
print(f"  DaQuan wins: {daquan_control}/{control_total} ({daquan_control/control_total:.2%})")
print()
print(f"No-discrimination condition:")
print(f"  DaQuan wins: {daquan_no_disc}/{no_disc_total} ({daquan_no_disc/no_disc_total:.2%})")
print()
print(f"Difference: {(daquan_no_disc/no_disc_total - daquan_control/control_total)*100:.2f} percentage points")
print()

# Two-sample z-test for proportions
counts = np.array([daquan_control, daquan_no_disc])
nobs = np.array([control_total, no_disc_total])

z_stat, p_value = proportions_ztest(counts, nobs, alternative='two-sided')

print("-" * 70)
print("Two-sample proportion z-test:")
print(f"H0: Win rates are equal in both conditions")
print(f"H1: Win rates differ between conditions")
print()
print(f"z-statistic: {z_stat:.4f}")
print(f"p-value: {p_value:.4f}")
print()

if p_value < 0.05:
    print(f"Result: REJECT null hypothesis at α=0.05")
    print(f"DaQuan's win rate is significantly different between conditions")
else:
    print(f"Result: FAIL TO REJECT null hypothesis at α=0.05")
    print(f"No significant evidence that win rate differs between conditions")

# Manual calculation for verification
p1 = daquan_control / control_total
p2 = daquan_no_disc / no_disc_total
p_pooled = (daquan_control + daquan_no_disc) / (control_total + no_disc_total)
se_pooled = np.sqrt(p_pooled * (1 - p_pooled) * (1/control_total + 1/no_disc_total))
z_manual = (p2 - p1) / se_pooled

print()
print("-" * 70)
print("Manual calculation (verification):")
print(f"Pooled proportion: {p_pooled:.4f}")
print(f"Pooled standard error: {se_pooled:.4f}")
print(f"z-statistic: {z_manual:.4f}")

print()
print("=" * 70)
print("Note: This test is more appropriate than chi-squared because it")
print("directly tests whether DaQuan's win rate changed between conditions,")
print("rather than testing independence of categorical variables.")

Two-Sample Proportion Test
Comparing DaQuan Washington's win rate between conditions

Control condition:
  DaQuan wins: 196/387 (50.65%)

No-discrimination condition:
  DaQuan wins: 219/390 (56.15%)

Difference: 5.51 percentage points

----------------------------------------------------------------------
Two-sample proportion z-test:
H0: Win rates are equal in both conditions
H1: Win rates differ between conditions

z-statistic: -1.5389
p-value: 0.1238

Result: FAIL TO REJECT null hypothesis at α=0.05
No significant evidence that win rate differs between conditions

----------------------------------------------------------------------
Manual calculation (verification):
Pooled proportion: 0.5341
Pooled standard error: 0.0358
z-statistic: 1.5389

Note: This test is more appropriate than chi-squared because it
directly tests whether DaQuan's win rate changed between conditions,
rather than testing independence of categorical variables.
