In [4]:
# Tally winners from all log files
import json
import os
from collections import Counter

log_dir = '.logs/multiple_buyers_no_discrimination'
winners = []

for filename in os.listdir(log_dir):
    if filename.endswith('.json'):
        with open(os.path.join(log_dir, filename), 'r') as f:
            data = json.load(f)
            winner = data.get('winner')
            if winner:
                winners.append(winner)

# Tally the winners
tally = Counter(winners)

print("Winner Tally:")
print("-" * 40)
for name, count in sorted(tally.items(), key=lambda x: x[1], reverse=True):
    print(f"{name}: {count}")
print("-" * 40)
print(f"Total games: {len(winners)}")
print()

# Calculate percentages
print("Percentages:")
for name, count in sorted(tally.items(), key=lambda x: x[1], reverse=True):
    percentage = (count / len(winners)) * 100
    print(f"{name}: {percentage:.1f}%")

Winner Tally:
----------------------------------------
DaQuan Washington: 77
Hunter Becker: 42
----------------------------------------
Total games: 119

Percentages:
DaQuan Washington: 64.7%
Hunter Becker: 35.3%


In [5]:
# Statistical test: H0: DaQuan Washington's win rate = 50%
from scipy import stats
import numpy as np

# Get DaQuan Washington's wins
daquan_wins = tally.get('DaQuan Washington', 0)
total_games = len(winners)

# Binomial test (more appropriate for proportions)
# H0: p = 0.5 (win rate is 50%)
# H1: p ≠ 0.5 (win rate is not 50%)
p_value_binomial = stats.binomtest(daquan_wins, total_games, 0.5, alternative='two-sided').pvalue

print("Statistical Test Results")
print("=" * 50)
print(f"Null Hypothesis: DaQuan Washington win rate = 50%")
print(f"Alternative: Win rate ≠ 50%")
print()
print(f"DaQuan Washington wins: {daquan_wins}/{total_games}")
print(f"Observed win rate: {daquan_wins/total_games:.1%}")
print()
print(f"Binomial Test p-value: {p_value_binomial:.4f}")
print()

if p_value_binomial < 0.05:
    print(f"Result: REJECT null hypothesis at α=0.05")
    print(f"The win rate is significantly different from 50%")
else:
    print(f"Result: FAIL TO REJECT null hypothesis at α=0.05")
    print(f"No significant evidence that win rate differs from 50%")

# Also perform one-sample proportion z-test for comparison
observed_proportion = daquan_wins / total_games
expected_proportion = 0.5
se = np.sqrt(expected_proportion * (1 - expected_proportion) / total_games)
z_score = (observed_proportion - expected_proportion) / se
p_value_z = 2 * (1 - stats.norm.cdf(abs(z_score)))  # two-tailed

print()
print("-" * 50)
print("One-sample proportion z-test:")
print(f"z-score: {z_score:.4f}")
print(f"p-value: {p_value_z:.4f}")

Statistical Test Results
Null Hypothesis: DaQuan Washington win rate = 50%
Alternative: Win rate ≠ 50%

DaQuan Washington wins: 77/119
Observed win rate: 64.7%

Binomial Test p-value: 0.0017

Result: REJECT null hypothesis at α=0.05
The win rate is significantly different from 50%

--------------------------------------------------
One-sample proportion z-test:
z-score: 3.2084
p-value: 0.0013
