# 03a: Count Domino Locations

**Goal**: Understand the distribution of count dominoes across deals.

**Key Questions**:
1. Which count dominoes exist and their point values?
2. How are counts distributed across players/teams in each deal?
3. Does initial count distribution correlate with V?

**Reference**: docs/analysis-draft.md Section 6

In [None]:
# === CONFIGURATION ===
DATA_DIR = "/mnt/d/shards-standard/"
PROJECT_ROOT = "/home/jason/v2/mk5-tailwind"

# === Setup imports ===
import sys
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from forge.analysis.utils import loading, features, viz
from forge.oracle import schema, tables

viz.setup_notebook_style()
print("Ready")

## 1. Count Domino Reference

In Texas 42, "count" dominoes are worth extra points when captured:
- **5-count**: (5,0), (4,1), (3,2) = 5 points each
- **10-count**: (5,5), (6,4) = 10 points each

Total count points: 5*3 + 10*2 = 35 points

In [None]:
# Display count domino info
print("Count Dominoes:")
print("=" * 40)
total_points = 0
for domino_id in features.COUNT_DOMINO_IDS:
    pips = schema.domino_pips(domino_id)
    points = tables.DOMINO_COUNT_POINTS[domino_id]
    total_points += points
    print(f"  ID {domino_id:2d}: {pips[0]}-{pips[1]} = {points:2d} points")
print(f"\nTotal count points available: {total_points}")

## 2. Load Sample Data

In [None]:
# Load multiple seeds
shard_files = loading.find_shard_files(DATA_DIR)
N_SEEDS = min(10, len(shard_files))
sample_files = shard_files[:N_SEEDS]
print(f"Analyzing {N_SEEDS} shards")

In [None]:
# Load shards with sampling
SAMPLE_PER_SHARD = 20_000

dfs = []
for path in tqdm(sample_files, desc="Loading shards"):
    df, seed, decl_id = schema.load_file(path)
    if len(df) > SAMPLE_PER_SHARD:
        df = df.sample(n=SAMPLE_PER_SHARD, random_state=seed)
    df['seed'] = seed
    df['decl_id'] = decl_id
    dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)
print(f"Loaded {len(combined_df):,} states from {N_SEEDS} shards")

## 3. Count Distribution at Deal Time

For each seed, analyze how count dominoes are distributed at the start.

In [None]:
# Analyze count distribution at deal time for each seed
deal_analysis = []

for seed in combined_df['seed'].unique():
    hands = schema.deal_from_seed(int(seed))
    
    # Count points per player and team
    player_counts = [0, 0, 0, 0]
    for domino_id in features.COUNT_DOMINO_IDS:
        points = tables.DOMINO_COUNT_POINTS[domino_id]
        for p, hand in enumerate(hands):
            if domino_id in hand:
                player_counts[p] += points
                break
    
    team0_counts = player_counts[0] + player_counts[2]
    team1_counts = player_counts[1] + player_counts[3]
    
    deal_analysis.append({
        'seed': seed,
        'p0_counts': player_counts[0],
        'p1_counts': player_counts[1],
        'p2_counts': player_counts[2],
        'p3_counts': player_counts[3],
        'team0_counts': team0_counts,
        'team1_counts': team1_counts,
        'count_balance': team0_counts - team1_counts,
    })

deal_df = pd.DataFrame(deal_analysis)
print("Count distribution at deal:")
print(deal_df.describe())

In [None]:
# Visualize count balance distribution
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Team count distribution
axes[0].hist(deal_df['team0_counts'], bins=8, alpha=0.7, label='Team 0', color='blue')
axes[0].hist(deal_df['team1_counts'], bins=8, alpha=0.7, label='Team 1', color='orange')
axes[0].set_xlabel('Count Points at Deal')
axes[0].set_ylabel('Number of Deals')
axes[0].set_title('Count Points by Team')
axes[0].legend()

# Count balance (Team 0 - Team 1)
axes[1].hist(deal_df['count_balance'], bins=15, color='green', alpha=0.7)
axes[1].axvline(x=0, color='red', linestyle='--', label='Balanced')
axes[1].set_xlabel('Count Balance (Team 0 - Team 1)')
axes[1].set_ylabel('Number of Deals')
axes[1].set_title('Count Balance Distribution')
axes[1].legend()

# Player count distribution
all_player_counts = pd.concat([
    deal_df['p0_counts'], deal_df['p1_counts'],
    deal_df['p2_counts'], deal_df['p3_counts']
])
axes[2].hist(all_player_counts, bins=8, color='purple', alpha=0.7)
axes[2].set_xlabel('Count Points per Player')
axes[2].set_ylabel('Frequency')
axes[2].set_title('Individual Player Count Distribution')

plt.tight_layout()
plt.savefig('../../results/figures/03a_count_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

## 4. Count Locations During Play

Track where count dominoes are as the game progresses.

In [None]:
# Extract count locations for all states
states = combined_df['state'].values
V = combined_df['V'].values
depths = features.depth(states)

# Get counts remaining at each state
counts_remaining_vals = np.zeros(len(states), dtype=np.int32)
team0_counts_vals = np.zeros(len(states), dtype=np.int32)
team1_counts_vals = np.zeros(len(states), dtype=np.int32)

for seed in tqdm(combined_df['seed'].unique(), desc="Computing count locations"):
    seed_int = int(seed)
    mask = combined_df['seed'].values == seed
    seed_states = states[mask]
    counts_remaining_vals[mask] = features.counts_remaining(seed_states, seed_int)
    t0, t1 = features.counts_by_team(seed_states, seed_int)
    team0_counts_vals[mask] = t0
    team1_counts_vals[mask] = t1

print("Count tracking complete")

In [None]:
# Analyze counts remaining by depth
depth_counts = pd.DataFrame({
    'depth': depths,
    'counts_remaining': counts_remaining_vals,
    'V': V,
})

# Average counts remaining at each depth
counts_by_depth = depth_counts.groupby('depth')['counts_remaining'].agg(['mean', 'std', 'count'])
print("Counts remaining by depth:")
print(counts_by_depth)

In [None]:
# Plot counts remaining vs depth
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Average counts remaining by depth
axes[0].plot(counts_by_depth.index, counts_by_depth['mean'], 'o-', markersize=6)
axes[0].fill_between(
    counts_by_depth.index,
    counts_by_depth['mean'] - counts_by_depth['std'],
    counts_by_depth['mean'] + counts_by_depth['std'],
    alpha=0.3
)
axes[0].set_xlabel('Depth (dominoes remaining)')
axes[0].set_ylabel('Count points remaining')
axes[0].set_title('Count Points Remaining vs Depth')
axes[0].axhline(y=35, color='red', linestyle='--', alpha=0.5, label='Max (35)')
axes[0].legend()

# Correlation: counts remaining vs V
sample_idx = np.random.choice(len(states), min(10000, len(states)), replace=False)
axes[1].scatter(counts_remaining_vals[sample_idx], V[sample_idx], alpha=0.1, s=1)
axes[1].set_xlabel('Count points remaining')
axes[1].set_ylabel('V (minimax value)')
axes[1].set_title(f'V vs Counts Remaining (r={np.corrcoef(counts_remaining_vals, V)[0,1]:.3f})')

plt.tight_layout()
plt.savefig('../../results/figures/03a_counts_vs_depth.png', dpi=150, bbox_inches='tight')
plt.show()

## 5. Count Advantage vs V

Does having more count dominoes correlate with a better position?

In [None]:
# Compute count advantage (Team 0 counts - Team 1 counts)
count_advantage = team0_counts_vals - team1_counts_vals

# Correlation with V
corr = np.corrcoef(count_advantage, V)[0, 1]
print(f"Correlation(count_advantage, V) = {corr:.4f}")

# Mean V by count advantage
adv_df = pd.DataFrame({'count_adv': count_advantage, 'V': V})
v_by_adv = adv_df.groupby('count_adv')['V'].agg(['mean', 'std', 'count'])
print("\nV by count advantage:")
print(v_by_adv)

In [None]:
# Plot V by count advantage
fig, ax = plt.subplots(figsize=(10, 6))

# Filter to advantages with enough samples
significant = v_by_adv[v_by_adv['count'] >= 100]

ax.errorbar(
    significant.index, 
    significant['mean'],
    yerr=significant['std'] / np.sqrt(significant['count']),
    fmt='o-', markersize=8, capsize=4
)
ax.axhline(y=0, color='red', linestyle='--', alpha=0.5)
ax.axvline(x=0, color='gray', linestyle='--', alpha=0.5)
ax.set_xlabel('Count Advantage (Team 0 - Team 1)')
ax.set_ylabel('Mean V')
ax.set_title('Mean V by Count Advantage in Hand')

plt.tight_layout()
plt.savefig('../../results/figures/03a_v_by_count_adv.png', dpi=150, bbox_inches='tight')
plt.show()

## 6. Individual Count Domino Analysis

Analyze each count domino separately.

In [None]:
# Get holder for each count domino
count_holder_data = {}

for seed in tqdm(combined_df['seed'].unique(), desc="Tracking individual counts"):
    seed_int = int(seed)
    mask = combined_df['seed'].values == seed
    seed_states = states[mask]
    
    locations = features.count_locations(seed_states, seed_int)
    
    for domino_id, holders in locations.items():
        if domino_id not in count_holder_data:
            count_holder_data[domino_id] = []
        count_holder_data[domino_id].extend(holders)

# Convert to arrays
count_holders = {d: np.array(h) for d, h in count_holder_data.items()}

In [None]:
# Analyze each count domino
count_stats = []
for domino_id in features.COUNT_DOMINO_IDS:
    pips = schema.domino_pips(domino_id)
    points = tables.DOMINO_COUNT_POINTS[domino_id]
    holders = count_holders[domino_id]
    
    # What fraction is played (holder == -1)?
    played_frac = (holders == -1).mean()
    
    # Among unplayed, team distribution
    unplayed = holders[holders >= 0]
    if len(unplayed) > 0:
        team0_frac = ((unplayed == 0) | (unplayed == 2)).mean()
    else:
        team0_frac = 0.5
    
    count_stats.append({
        'domino': f"{pips[0]}-{pips[1]}",
        'points': points,
        'played_pct': 100 * played_frac,
        'team0_pct': 100 * team0_frac,
    })

count_stats_df = pd.DataFrame(count_stats)
print("Count domino statistics:")
print(count_stats_df.to_string(index=False))

## Summary

In [None]:
summary = {
    'Total states': f"{len(states):,}",
    'Seeds analyzed': len(combined_df['seed'].unique()),
    'Count dominoes': len(features.COUNT_DOMINO_IDS),
    'Total count points': 35,
    'Corr(count_advantage, V)': f"{corr:.4f}",
    'Mean counts remaining': f"{counts_remaining_vals.mean():.1f}",
}

print(viz.create_summary_table(summary, "Count Location Analysis Summary"))

In [None]:
# Save results
count_stats_df.to_csv('../../results/tables/03a_count_stats.csv', index=False)
print("Results saved to results/tables/03a_count_stats.csv")