# 01b: Q-Value Structure

**Goal**: Characterize the decision landscape across game states.

**Key Questions**:
1. How often is there a unique optimal move vs ties?
2. Distribution of q_spread: fat tails = volatile decisions, tight = forced
3. Correlation between q_gap and depth (do decisions get easier or harder?)

**Reference**: docs/analysis-draft.md Section 1.2

In [None]:
# === CONFIGURATION ===
DATA_DIR = "/mnt/d/shards-standard/"
PROJECT_ROOT = "/home/jason/v2/mk5-tailwind"

# === Setup imports ===
import sys
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

from forge.analysis.utils import loading, features, viz
from forge.oracle import schema

viz.setup_notebook_style()
print("✓ Ready")

## 1. Load Sample Data

In [None]:
# Load sample shards
shard_files = loading.find_shard_files(DATA_DIR)
N_SEEDS = min(10, len(shard_files))
sample_files = shard_files[:N_SEEDS]
print(f"Analyzing {N_SEEDS} shards")

In [None]:
# Build q_df with Q-value statistics for sampled states
# Sample states per shard to avoid memory issues (274M total is too large)
# Columns: n_legal, q_spread, q_gap, n_optimal, best_q, depth, team, seed, decl_id

SAMPLE_PER_SHARD = 100_000  # Sample 100k states per shard for manageable memory
print(f"Building Q-value statistics from {N_SEEDS} shards ({SAMPLE_PER_SHARD:,} samples each)...")

dfs = []
for path in tqdm(sample_files, desc="Loading shards"):
    df, seed, decl_id = schema.load_file(path)
    
    # Sample if shard is large
    if len(df) > SAMPLE_PER_SHARD:
        df = df.sample(n=SAMPLE_PER_SHARD, random_state=seed)
    
    # Extract Q columns (q0 through q6)
    q_cols = [f'q{i}' for i in range(7)]
    q_values = df[q_cols].values
    
    # Compute Q statistics
    q_stats = features.q_stats(q_values)
    
    # Add depth, team, seed, decl_id
    states = df['state'].values
    q_stats['depth'] = features.depth(states)
    q_stats['team'] = features.team(states).astype(int)
    q_stats['seed'] = seed
    q_stats['decl_id'] = decl_id
    
    dfs.append(q_stats)

q_df = pd.concat(dfs, ignore_index=True)
print(f"✓ Built q_df: {len(q_df):,} sampled states from {N_SEEDS} shards")

## 2. Overall Q-Value Statistics

In [None]:
# Summary statistics
print("Q-value statistics summary:")
print(q_df[['n_legal', 'q_spread', 'q_gap', 'n_optimal', 'best_q']].describe())

In [None]:
# Distribution plots
viz.plot_q_structure(q_df, title="Overall Q-Value Structure")
plt.savefig('../../results/figures/01b_q_structure.png', dpi=150, bbox_inches='tight')
plt.show()

## 3. Optimal Move Uniqueness

How often is there a single best move vs ties?

In [None]:
# Distribution of n_optimal (ties for best move)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram
n_opt_counts = q_df['n_optimal'].value_counts().sort_index()
axes[0].bar(n_opt_counts.index, n_opt_counts.values, color='steelblue', alpha=0.7)
axes[0].set_xlabel('Number of optimal moves (ties)')
axes[0].set_ylabel('Count')
axes[0].set_title('Distribution of Tied Optimal Moves')

# Percentages
n_opt_pct = 100 * n_opt_counts / len(q_df)
axes[1].bar(n_opt_pct.index, n_opt_pct.values, color='coral', alpha=0.7)
axes[1].set_xlabel('Number of optimal moves')
axes[1].set_ylabel('Percentage')
axes[1].set_title('Percentage of States by Optimal Count')

for i, (idx, pct) in enumerate(n_opt_pct.items()):
    axes[1].text(idx, pct + 0.5, f'{pct:.1f}%', ha='center', fontsize=9)

plt.tight_layout()
plt.show()

print(f"States with unique optimal: {100*n_opt_pct.get(1, 0):.1f}%")
print(f"States with 2+ ties: {100*(1 - n_opt_pct.get(1, 0)/100):.1f}%")

## 4. Q-Spread Analysis

Fat tails = volatile decisions, tight = forced

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Q-spread histogram
axes[0].hist(q_df['q_spread'], bins=50, color='steelblue', alpha=0.7, edgecolor='black')
axes[0].set_xlabel('Q-spread (best - worst among legal)')
axes[0].set_ylabel('Count')
axes[0].set_title('Q-Spread Distribution')
axes[0].axvline(x=q_df['q_spread'].median(), color='red', linestyle='--', label=f"Median: {q_df['q_spread'].median():.1f}")
axes[0].legend()

# Log scale for tail
axes[1].hist(q_df['q_spread'], bins=50, color='steelblue', alpha=0.7, edgecolor='black')
axes[1].set_xlabel('Q-spread')
axes[1].set_ylabel('Count (log)')
axes[1].set_title('Q-Spread Distribution (Log Scale)')
axes[1].set_yscale('log')

plt.tight_layout()
plt.show()

# Fat tail analysis
percentiles = [50, 75, 90, 95, 99]
print("Q-spread percentiles:")
for p in percentiles:
    print(f"  {p}th: {np.percentile(q_df['q_spread'], p):.1f}")

## 5. Q-Gap Analysis

Gap between best and second-best move - critical for mistakes.

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Q-gap histogram
axes[0].hist(q_df['q_gap'], bins=50, color='coral', alpha=0.7, edgecolor='black')
axes[0].set_xlabel('Q-gap (best - 2nd best)')
axes[0].set_ylabel('Count')
axes[0].set_title('Q-Gap Distribution')

# Cumulative - what fraction of mistakes cost < X points?
q_gap_sorted = np.sort(q_df['q_gap'].values)
cdf = np.arange(1, len(q_gap_sorted) + 1) / len(q_gap_sorted)
axes[1].plot(q_gap_sorted, cdf)
axes[1].set_xlabel('Q-gap')
axes[1].set_ylabel('CDF')
axes[1].set_title('Cumulative Distribution of Q-Gap')
axes[1].axhline(y=0.9, color='red', linestyle='--', alpha=0.5)
axes[1].axhline(y=0.5, color='orange', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

# Critical thresholds
pct_zero_gap = 100 * (q_df['q_gap'] == 0).mean()
pct_small_gap = 100 * (q_df['q_gap'] <= 5).mean()
print(f"States with q_gap = 0 (tied optimal): {pct_zero_gap:.1f}%")
print(f"States with q_gap <= 5 (low-stakes): {pct_small_gap:.1f}%")

## 6. Q-Gap vs Depth

Do decisions get easier or harder as game progresses?

In [None]:
# Average q_gap by depth
depth_q = q_df.groupby('depth').agg({
    'q_gap': ['mean', 'median', 'std'],
    'q_spread': ['mean', 'median'],
    'n_optimal': 'mean',
}).reset_index()
depth_q.columns = ['depth', 'gap_mean', 'gap_median', 'gap_std', 'spread_mean', 'spread_median', 'optimal_mean']

fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Q-gap vs depth
axes[0].plot(depth_q['depth'], depth_q['gap_mean'], 'o-', label='Mean')
axes[0].plot(depth_q['depth'], depth_q['gap_median'], 's--', label='Median')
axes[0].set_xlabel('Depth')
axes[0].set_ylabel('Q-gap')
axes[0].set_title('Q-Gap vs Depth')
axes[0].legend()

# Q-spread vs depth
axes[1].plot(depth_q['depth'], depth_q['spread_mean'], 'o-', label='Mean')
axes[1].plot(depth_q['depth'], depth_q['spread_median'], 's--', label='Median')
axes[1].set_xlabel('Depth')
axes[1].set_ylabel('Q-spread')
axes[1].set_title('Q-Spread vs Depth')
axes[1].legend()

# Avg ties vs depth
axes[2].plot(depth_q['depth'], depth_q['optimal_mean'], 'o-', color='green')
axes[2].set_xlabel('Depth')
axes[2].set_ylabel('Avg # optimal moves')
axes[2].set_title('Optimal Move Ties vs Depth')

plt.tight_layout()
plt.savefig('../../results/figures/01b_q_by_depth.png', dpi=150, bbox_inches='tight')
plt.show()

# Correlation
corr_gap = np.corrcoef(q_df['depth'], q_df['q_gap'])[0,1]
corr_spread = np.corrcoef(q_df['depth'], q_df['q_spread'])[0,1]
print(f"Correlation(depth, q_gap): {corr_gap:.4f}")
print(f"Correlation(depth, q_spread): {corr_spread:.4f}")

## 7. Team Comparison

Is decision difficulty symmetric between teams?

In [None]:
# Compare by team
team_stats = q_df.groupby('team').agg({
    'q_gap': ['mean', 'median'],
    'q_spread': ['mean', 'median'],
    'n_optimal': 'mean',
}).reset_index()
team_stats.columns = ['team', 'gap_mean', 'gap_median', 'spread_mean', 'spread_median', 'optimal_mean']

print("Q-statistics by team:")
print(team_stats)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, (col, title) in enumerate([('q_gap', 'Q-Gap'), ('q_spread', 'Q-Spread'), ('n_optimal', '# Optimal')]):
    sns.boxplot(data=q_df, x='team', y=col, ax=axes[i])
    axes[i].set_xlabel('Team (0=maximizing, 1=minimizing)')
    axes[i].set_title(f'{title} by Team')

plt.tight_layout()
plt.show()

## 8. Critical Decisions

States where mistakes are very costly (high q_gap).

In [None]:
# Define "critical" as q_gap > 10
CRITICAL_THRESHOLD = 10
critical_mask = q_df['q_gap'] > CRITICAL_THRESHOLD
n_critical = critical_mask.sum()
pct_critical = 100 * critical_mask.mean()

print(f"Critical decisions (q_gap > {CRITICAL_THRESHOLD}): {n_critical:,} ({pct_critical:.2f}%)")

# Characteristics of critical decisions
print("\nCritical decision characteristics:")
print(f"  Avg depth: {q_df.loc[critical_mask, 'depth'].mean():.1f}")
print(f"  Avg n_legal: {q_df.loc[critical_mask, 'n_legal'].mean():.1f}")
print(f"  Team 0 fraction: {q_df.loc[critical_mask, 'team'].mean():.2f}")

In [None]:
# Critical decisions by depth
critical_by_depth = q_df.groupby('depth').apply(lambda x: 100 * (x['q_gap'] > CRITICAL_THRESHOLD).mean())

fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(critical_by_depth.index, critical_by_depth.values, color='crimson', alpha=0.7)
ax.set_xlabel('Depth')
ax.set_ylabel(f'% with q_gap > {CRITICAL_THRESHOLD}')
ax.set_title('Critical Decisions by Depth')
plt.show()

## Summary

In [None]:
summary = {
    'Total states': f"{len(q_df):,}",
    'Unique optimal %': f"{100 * (q_df['n_optimal'] == 1).mean():.1f}%",
    'Avg n_legal': f"{q_df['n_legal'].mean():.2f}",
    'Median q_gap': f"{q_df['q_gap'].median():.1f}",
    'Median q_spread': f"{q_df['q_spread'].median():.1f}",
    f'Critical (gap>{CRITICAL_THRESHOLD}) %': f"{pct_critical:.2f}%",
    'Corr(depth, gap)': f"{corr_gap:.4f}",
}

print(viz.create_summary_table(summary, "Q-Value Structure Summary"))

In [None]:
# Save aggregated results
depth_q.to_csv('../../results/tables/01b_q_by_depth.csv', index=False)
print("Results saved to results/tables/01b_q_by_depth.csv")