In [4]:
# Create dataframe with win rates for each condition
import json
import os
from collections import Counter
import pandas as pd
import numpy as np

def get_win_rates_by_last_name(log_dir):
    """Get win counts by last name from a log directory"""
    winners = []
    for filename in os.listdir(log_dir):
        if filename.endswith('.json'):
            with open(os.path.join(log_dir, filename), 'r') as f:
                data = json.load(f)
                winner = data.get('winner')
                if winner:
                    # Extract last name
                    last_name = winner.split()[-1] if winner else None
                    if last_name:
                        winners.append(last_name)
    
    tally = Counter(winners)
    total = len(winners)
    
    return tally, total

# Get data from both conditions
control_tally, control_total = get_win_rates_by_last_name('.logs/house_control')
no_disc_tally, no_disc_total = get_win_rates_by_last_name('.logs/house_no_discrimination')

# Get all unique last names
all_last_names = set(list(control_tally.keys()) + list(no_disc_tally.keys()))

# Create dataframe
data = []
for last_name in sorted(all_last_names):
    control_wins = control_tally.get(last_name, 0)
    control_rate = (control_wins / control_total * 100) if control_total > 0 else 0
    
    no_disc_wins = no_disc_tally.get(last_name, 0)
    no_disc_rate = (no_disc_wins / no_disc_total * 100) if no_disc_total > 0 else 0
    
    data.append({
        'Last_Name': last_name,
        'Control_Wins': control_wins,
        'Control_Total': control_total,
        'Control_Win_Rate_%': control_rate,
        'No_Discrimination_Wins': no_disc_wins,
        'No_Discrimination_Total': no_disc_total,
        'No_Discrimination_Win_Rate_%': no_disc_rate,
        'Difference_%': no_disc_rate - control_rate
    })

df = pd.DataFrame(data)
df = df.sort_values('Difference_%', ascending=False)

print("Win Rates by Last Name and Condition")
print("=" * 80)
print(df.to_string(index=False))
print()
print(f"\nControl total games: {control_total}")
print(f"No Discrimination total games: {no_disc_total}")

# Display the dataframe
df

Win Rates by Last Name and Condition
 Last_Name  Control_Wins  Control_Total  Control_Win_Rate_%  No_Discrimination_Wins  No_Discrimination_Total  No_Discrimination_Win_Rate_%  Difference_%
Washington           368            792           46.464646                     397                      793                     50.063052      3.598405
    Becker           424            792           53.535354                     396                      793                     49.936948     -3.598405


Control total games: 792
No Discrimination total games: 793


Unnamed: 0,Last_Name,Control_Wins,Control_Total,Control_Win_Rate_%,No_Discrimination_Wins,No_Discrimination_Total,No_Discrimination_Win_Rate_%,Difference_%
1,Washington,368,792,46.464646,397,793,50.063052,3.598405
0,Becker,424,792,53.535354,396,793,49.936948,-3.598405


In [5]:
# One-sample binomial test: No-discrimination case vs 50%
from scipy import stats

# Get Washington's wins in no-discrimination condition
washington_wins_no_disc = df[df['Last_Name'] == 'Washington']['No_Discrimination_Wins'].values[0]
no_disc_total = df[df['Last_Name'] == 'Washington']['No_Discrimination_Total'].values[0]

# Binomial test
# H0: p = 0.5 (Washington's win rate is 50%)
# H1: p ≠ 0.5 (Washington's win rate is not 50%)
binomial_result = stats.binomtest(washington_wins_no_disc, no_disc_total, 0.5, alternative='two-sided')

print("One-Sample Binomial Test: No-Discrimination Condition")
print("=" * 70)
print(f"Null Hypothesis: Washington win rate = 50%")
print(f"Alternative: Win rate ≠ 50%")
print()
print(f"Washington wins: {washington_wins_no_disc}/{no_disc_total}")
print(f"Observed win rate: {washington_wins_no_disc/no_disc_total:.2%}")
print()
print(f"Binomial Test p-value: {binomial_result.pvalue:.4f}")
print(f"95% Confidence Interval: ({binomial_result.proportion_ci(confidence_level=0.95).low:.2%}, {binomial_result.proportion_ci(confidence_level=0.95).high:.2%})")
print()

if binomial_result.pvalue < 0.05:
    print(f"Result: REJECT null hypothesis at α=0.05")
    print(f"Washington's win rate is significantly different from 50%")
else:
    print(f"Result: FAIL TO REJECT null hypothesis at α=0.05")
    print(f"No significant evidence that win rate differs from 50%")

# Also perform one-sample proportion z-test
observed_proportion = washington_wins_no_disc / no_disc_total
expected_proportion = 0.5
se = np.sqrt(expected_proportion * (1 - expected_proportion) / no_disc_total)
z_score = (observed_proportion - expected_proportion) / se
p_value_z = 2 * (1 - stats.norm.cdf(abs(z_score)))

print()
print("-" * 70)
print("One-sample proportion z-test (for comparison):")
print(f"z-score: {z_score:.4f}")
print(f"p-value: {p_value_z:.4f}")
print(f"Standard error: {se:.4f}")

One-Sample Binomial Test: No-Discrimination Condition
Null Hypothesis: Washington win rate = 50%
Alternative: Win rate ≠ 50%

Washington wins: 397/793
Observed win rate: 50.06%

Binomial Test p-value: 1.0000
95% Confidence Interval: (46.53%, 53.60%)

Result: FAIL TO REJECT null hypothesis at α=0.05
No significant evidence that win rate differs from 50%

----------------------------------------------------------------------
One-sample proportion z-test (for comparison):
z-score: 0.0355
p-value: 0.9717
Standard error: 0.0178


In [6]:
# More appropriate test: Two-sample proportion test
# Compare Washington's win rate between control and no-discrimination conditions

from scipy.stats import chi2_contingency
from statsmodels.stats.proportion import proportions_ztest

# Get Washington's wins in both conditions
washington_control = df[df['Last_Name'] == 'Washington']['Control_Wins'].values[0]
control_total = df[df['Last_Name'] == 'Washington']['Control_Total'].values[0]

washington_no_disc = df[df['Last_Name'] == 'Washington']['No_Discrimination_Wins'].values[0]
no_disc_total = df[df['Last_Name'] == 'Washington']['No_Discrimination_Total'].values[0]

print("Two-Sample Proportion Test")
print("=" * 70)
print("Comparing Washington's win rate between conditions")
print()
print(f"Control condition:")
print(f"  Washington wins: {washington_control}/{control_total} ({washington_control/control_total:.2%})")
print()
print(f"No-discrimination condition:")
print(f"  Washington wins: {washington_no_disc}/{no_disc_total} ({washington_no_disc/no_disc_total:.2%})")
print()
print(f"Difference: {(washington_no_disc/no_disc_total - washington_control/control_total)*100:.2f} percentage points")
print()

# Two-sample z-test for proportions (TWO-TAILED)
counts = np.array([washington_no_disc, washington_control])
nobs = np.array([no_disc_total, control_total])

z_stat, p_value = proportions_ztest(counts, nobs, alternative='two-sided')

print("-" * 70)
print("Two-sample proportion z-test (two-tailed):")
print(f"H0: Win rates are equal in both conditions")
print(f"H1: Win rates differ between conditions")
print()
print(f"z-statistic: {z_stat:.4f}")
print(f"p-value (two-tailed): {p_value:.4f}")
print()

if p_value < 0.05:
    print(f"Result: REJECT null hypothesis at α=0.05")
    print(f"Washington's win rate is significantly different between conditions")
else:
    print(f"Result: FAIL TO REJECT null hypothesis at α=0.05")
    print(f"No significant evidence that win rate differs between conditions")

# Manual calculation for verification
p1 = washington_control / control_total
p2 = washington_no_disc / no_disc_total
p_pooled = (washington_control + washington_no_disc) / (control_total + no_disc_total)
se_pooled = np.sqrt(p_pooled * (1 - p_pooled) * (1/control_total + 1/no_disc_total))
z_manual = (p2 - p1) / se_pooled
p_value_manual = 2 * (1 - stats.norm.cdf(abs(z_manual)))  # two-tailed

print()
print("-" * 70)
print("Manual calculation (verification):")
print(f"Pooled proportion: {p_pooled:.4f}")
print(f"Pooled standard error: {se_pooled:.4f}")
print(f"z-statistic: {z_manual:.4f}")
print(f"p-value (two-tailed): {p_value_manual:.4f}")

print()
print("=" * 70)
print("Note: This test is more appropriate than chi-squared because it")
print("directly tests whether Washington's win rate changed between conditions,")
print("rather than testing independence of categorical variables.")

Two-Sample Proportion Test
Comparing Washington's win rate between conditions

Control condition:
  Washington wins: 368/792 (46.46%)

No-discrimination condition:
  Washington wins: 397/793 (50.06%)

Difference: 3.60 percentage points

----------------------------------------------------------------------
Two-sample proportion z-test (two-tailed):
H0: Win rates are equal in both conditions
H1: Win rates differ between conditions

z-statistic: 1.4335
p-value (two-tailed): 0.1517

Result: FAIL TO REJECT null hypothesis at α=0.05
No significant evidence that win rate differs between conditions

----------------------------------------------------------------------
Manual calculation (verification):
Pooled proportion: 0.4826
Pooled standard error: 0.0251
z-statistic: 1.4335
p-value (two-tailed): 0.1517

Note: This test is more appropriate than chi-squared because it
directly tests whether Washington's win rate changed between conditions,
rather than testing independence of categorical var