# Phase 8: Final Report

Aggregate all analysis results and generate the final HTML report.

**Inputs:**
- All previous phase outputs

**Outputs:**
- `report.html` - Final HTML report
- `report_summary.json` - Machine-readable summary

In [None]:
# Parameters (injected by Papermill)
username = "default_user"  # Chess.com username
report_title = ""  # Custom report title (empty string = auto)
report_options = "Default"  # CLI options used during generation

In [None]:
# Setup
import sys
sys.path.insert(0, '..')
from common import (
    setup_notebook, validate_parameters, print_section, print_subsection,
    get_user_data_dir, get_report_dir, save_phase_output, load_phase_output,
    load_dataset_parquet, load_baseline,
    PROJECT_ROOT,
    render_position,  # For SVG board rendering
    detect_game_phase, GamePhase,  # For phase detection
)
import chess
import json
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
from jinja2 import Environment, FileSystemLoader

setup_notebook()
validate_parameters(username)

# Convert empty string to None for optional parameters
actual_report_title = report_title if report_title else None

if actual_report_title is None:
    actual_report_title = f"Fairness Analysis Report: {username}"

In [None]:
# Load all phase outputs
print_section(f"GENERATING REPORT: {username}")

report_data = {
    'username': username,
    'title': actual_report_title,
    'generated_at': datetime.now().isoformat(),
    'report_options': report_options,
    'phases': {},
}

# Fetch player profile for status
try:
    from chess_analysis import fetch_player_profile
    player_profile = fetch_player_profile(username)
    if player_profile:
        report_data['player_status'] = player_profile.status
        print(f"Player status: {player_profile.status}")
    else:
        report_data['player_status'] = 'unknown'
        print("Player status: unknown (could not fetch)")
except Exception as e:
    report_data['player_status'] = 'unknown'
    print(f"Player status: unknown (error: {e})")

# Phase 1: Data collection
try:
    manifest = load_phase_output(username, "phase1", "manifest.json")
    report_data['phases']['data_collection'] = manifest
    print(f"Phase 1: {manifest.get('total_games', 0)} games")
    
    # Extract date range from games
    user_data_dir = get_user_data_dir(username)
    games_path = user_data_dir / "games.parquet"
    if games_path.exists():
        games_data = load_dataset_parquet(games_path)
        # Handle both list and DataFrame returns
        games_df = pd.DataFrame(games_data) if isinstance(games_data, list) else games_data
        if 'date' in games_df.columns:
            dates = pd.to_datetime(games_df['date'])
            date_start = dates.min().strftime('%Y-%m-%d')
            date_end = dates.max().strftime('%Y-%m-%d')
            report_data['date_range'] = f"{date_start} to {date_end}"
            print(f"Date range: {report_data['date_range']}")
except FileNotFoundError:
    print("Phase 1: Not found")

# Build game_id -> full metadata lookup from games.parquet
game_player_colors = {}
game_metadata = {}  # game_id -> all metadata for display
try:
    user_data_dir = get_user_data_dir(username)
    games_path = user_data_dir / "games.parquet"
    if games_path.exists():
        games_data = load_dataset_parquet(games_path)
        games_df = pd.DataFrame(games_data) if isinstance(games_data, list) else games_data
        
        for _, row in games_df.iterrows():
            game_id = str(row['game_id'])
            
            # Player color lookup (existing)
            if 'player_color' in games_df.columns:
                player_is_white = row['player_color'] == 'white'
                game_player_colors[game_id] = player_is_white
            
            # Full metadata lookup (new)
            game_metadata[game_id] = {
                'player_color': row.get('player_color', 'white'),
                'player_elo': int(row.get('player_elo', 0)) if pd.notna(row.get('player_elo')) else 0,
                'opponent_elo': int(row.get('opponent_elo', 0)) if pd.notna(row.get('opponent_elo')) else 0,
                'result': row.get('result', ''),
                'player_result': row.get('player_result', ''),
                'termination': row.get('termination', ''),
                'date': row.get('date', ''),
                'end_time': int(row.get('end_time', 0)) if pd.notna(row.get('end_time')) else 0,
            }
        
        print(f"Loaded metadata for {len(game_metadata)} games")
except Exception as e:
    print(f"Warning: Could not load game metadata: {e}")

# Phase 2: Quick analysis
try:
    quick_stats = load_phase_output(username, "phase2", "quick_stats.json")
    report_data['phases']['quick_analysis'] = quick_stats
    print(f"Phase 2: Loaded quick stats")
except FileNotFoundError:
    print("Phase 2: Not found")

# Phase 3: Prioritization
try:
    priority = load_phase_output(username, "phase3", "high_priority_games.json")
    report_data['phases']['prioritization'] = {
        'high_priority_count': priority.get('selected_count', 0),
        'min_suspicion_score': priority.get('min_suspicion_score', 0),
    }
    print(f"Phase 3: {priority.get('selected_count', 0)} high priority games")
except FileNotFoundError:
    print("Phase 3: Not found")


def get_position_complexity(fen: str) -> dict:
    """
    Calculate complexity metrics for a position.
    
    Returns dict with:
    - legal_moves: number of legal moves
    - phase: game phase (opening/middlegame/endgame)
    - is_complex: True if position is considered complex
    """
    try:
        board = chess.Board(fen)
        legal_moves = len(list(board.legal_moves))
        phase = detect_game_phase(board)
        
        # Consider position complex if:
        # - Many legal moves (>25) suggesting many options
        # - OR in middlegame/endgame (not opening theory)
        is_complex = legal_moves > 25 or phase != GamePhase.OPENING
        
        return {
            'legal_moves': legal_moves,
            'phase': phase.value,
            'is_complex': is_complex,
        }
    except Exception:
        return {'legal_moves': 0, 'phase': 'unknown', 'is_complex': False}


def normalize_position_data(pos: dict) -> dict:
    """
    Normalize position data to handle backwards compatibility with old data formats.

    Old format had:
    - 'complexity' instead of 'engine_complexity_score'
    - Missing: eval_volatility, gap_cp, branching_factor

    This function maps old field names to new ones and provides defaults.
    Now includes new position assessment fields for reorganized template.
    """
    # Map old field names to new ones
    engine_complexity = pos.get('engine_complexity_score')
    if engine_complexity is None:
        # Try old field name
        engine_complexity = pos.get('complexity', 0)

    # Derive engine_complexity_category from score if not present
    engine_category = pos.get('engine_complexity_category')
    if engine_category is None or engine_category == 'UNKNOWN':
        if engine_complexity >= 0.7:
            engine_category = 'VERY_HIGH'
        elif engine_complexity >= 0.5:
            engine_category = 'HIGH'
        elif engine_complexity >= 0.3:
            engine_category = 'MEDIUM'
        else:
            engine_category = 'LOW'

    return {
        # Basic move info
        'cpl': pos.get('cpl', 0),
        'eval_before': pos.get('eval_before'),
        'eval_after': pos.get('eval_after'),
        'best_move': pos.get('best_move'),
        'is_best': pos.get('is_best', False),
        'move_class': pos.get('move_class'),

        # Stockfish section (new fields)
        'analysis_depth': pos.get('analysis_depth', pos.get('depth', 20)),
        'move_rank': pos.get('move_rank'),
        'total_legal_moves': pos.get('total_legal_moves', pos.get('legal_moves', 0)),

        # Maia2 section (new fields)
        'num_human_moves': pos.get('num_human_moves'),
        'cp_adjustment': pos.get('cp_adjustment'),
        'maia_total_moves': pos.get('maia_total_moves'),

        # Game phase
        'phase': pos.get('phase', 'unknown'),
        'game_phase': pos.get('game_phase', pos.get('phase', 'unknown')),

        # Traps / Trickiness (new)
        'has_trap': pos.get('has_trap', False),
        'is_tricky': pos.get('is_tricky', False),

        # Pure material (new)
        'pure_material': pos.get('pure_material'),

        # Raw branching factor (new)
        'raw_branching_factor': pos.get('raw_branching_factor'),

        # Graph-based fragility (extended)
        'fragility': pos.get('fragility', 0),
        'is_pre_fragility_peak': pos.get('is_pre_fragility_peak', False),
        'fragility_trend': pos.get('fragility_trend'),
        'distance_to_peak': pos.get('distance_to_peak'),

        # Legal moves
        'legal_moves': pos.get('legal_moves', 0),

        # Engine complexity heuristics (with backwards compatibility)
        'eval_volatility': pos.get('eval_volatility', 0),
        'eval_volatility_normalized': pos.get('eval_volatility_normalized', 0),
        'gap_cp': pos.get('gap_cp', 0),
        'avg_gap_cp': pos.get('avg_gap_cp', 0),
        'convergence_depth': pos.get('convergence_depth'),
        'branching_factor': pos.get('branching_factor', 3.5),
        'node_branching_factor': pos.get('node_branching_factor', pos.get('branching_factor', 3.5)),
        'num_playable_moves': pos.get('num_playable_moves'),
        'engine_complexity_score': engine_complexity,
        'engine_complexity_category': engine_category,
        'total_nodes': pos.get('total_nodes', 0),
        'eval_swing': pos.get('eval_swing', 0),

        # Book status (new)
        'is_book_move': pos.get('is_book_move', False),
        'distance_from_book': pos.get('distance_from_book'),

        # Tablebase status (new)
        'tablebase_status': pos.get('tablebase_status'),
    }


def get_strong_move_context(pos: dict) -> str:
    """
    Generate context explaining why this strong move is notable.
    
    Args:
        pos: Position dictionary with is_best, eval_swing, legal_moves, etc.
    
    Returns:
        String explaining why this position is notable
    """
    reasons = []
    
    is_best = pos.get('is_best', False)
    eval_swing = abs(pos.get('eval_swing', 0))
    legal_moves = pos.get('legal_moves', 0)
    phase = pos.get('phase', 'unknown')
    cpl = pos.get('cpl', 0)
    fragility = pos.get('fragility', 0)
    is_pre_peak = pos.get('is_pre_fragility_peak', False)
    
    # Engine heuristics (normalized)
    engine_complexity = pos.get('engine_complexity_score', 0)
    engine_category = pos.get('engine_complexity_category', 'UNKNOWN')
    eval_volatility = pos.get('eval_volatility', 0)
    gap_cp = pos.get('gap_cp', 0)
    
    if is_best and cpl == 0:
        reasons.append("Found the best move")
    elif cpl < 10:
        reasons.append("Found an excellent move")
    
    # Engine complexity category
    if engine_category == 'VERY_HIGH':
        reasons.append(f"Extremely complex engine position ({engine_complexity*100:.0f}%)")
    elif engine_category == 'HIGH':
        reasons.append(f"Highly complex engine position ({engine_complexity*100:.0f}%)")
    elif engine_complexity > 0.4:
        reasons.append(f"Complex position ({engine_complexity*100:.0f}%)")
    
    # Gap metric context
    if gap_cp > 0 and gap_cp < 50:
        reasons.append(f"Low Gap between Best move and 2nd best ({gap_cp}cp gap)")
    elif gap_cp > 0 and gap_cp < 100:
        reasons.append(f"Multiple viable moves ({gap_cp}cp gap)")
    
    # Eval volatility context
    if eval_volatility > 150:
        reasons.append(f"Very unstable position ({eval_volatility:.0f}cp volatility)")
    elif eval_volatility > 80:
        reasons.append(f"Unstable position ({eval_volatility:.0f}cp volatility)")
    
    if is_pre_peak:
        reasons.append("Decisive moment (pre-fragility peak)")
    elif fragility > 0.3:
        reasons.append(f"Fragile position ({fragility:.2f})")
    
    if eval_swing > 50:
        reasons.append(f"Critical position (eval swing: {eval_swing}cp)")
    
    if phase == 'middlegame':
        reasons.append("Middlegame position")
    elif phase == 'endgame':
        reasons.append("Endgame technique")
    
    if not reasons:
        reasons.append("Strong move in demanding position")
    
    return "; ".join(reasons)


def get_maia2_context(pos: dict) -> tuple[str, str]:
    """
    Generate context for Maia2 surprising move with complexity-aware discounting.
    
    A position is "simple" (discountable) if:
    1. Only 1 competitive move (forced)
    2. Almost all moves are winning (any move works)
    
    A position is "complex" (concerning if surprising) if:
    - There are 3-8 competitive moves - real choice exists
    
    Args:
        pos: Position dictionary with probability, rank, complexity, etc.
    
    Returns:
        Tuple of (context_string, discount_status)
        discount_status: 'full', 'partial', or 'none'
    """
    probability = pos.get('probability', 0)
    rank = pos.get('rank', 0)
    engine_complexity = pos.get('engine_complexity_score', 0)
    engine_category = pos.get('engine_complexity_category', 'UNKNOWN')
    num_competitive = pos.get('num_competitive_moves', 0)
    legal_moves = pos.get('legal_moves', 0)
    cpl = pos.get('cpl', 999)
    gap_cp = pos.get('gap_cp', 0)
    eval_volatility = pos.get('eval_volatility', 0)
    
    context_parts = [f"Surprising move: Maia2 probability {probability*100:.1f}% (ranked #{rank})"]
    discount_status = 'none'
    
    # Discount based on position characteristics
    # Simple position type 1: Almost every move wins (>80% of legal moves are competitive)
    if legal_moves > 0 and num_competitive > 0:
        competitive_ratio = num_competitive / legal_moves
        
        if competitive_ratio > 0.8 and num_competitive > 10:
            # Almost every move works - just pick any move
            context_parts.append(f"DISCOUNTED: Trivial position ({num_competitive}/{legal_moves} moves are good)")
            discount_status = 'full'
        elif competitive_ratio > 0.6 and num_competitive > 6:
            # Most moves work
            context_parts.append(f"Partially discounted: Easy position ({num_competitive}/{legal_moves} moves work)")
            discount_status = 'partial'
        # Simple position type 2: Only one good move (forced)
        elif num_competitive <= 1:
            context_parts.append(f"DISCOUNTED: Forced position (only {num_competitive} good move)")
            discount_status = 'full'
        elif num_competitive == 2:
            context_parts.append(f"Partially discounted: Near-forced ({num_competitive} good moves)")
            discount_status = 'partial'
        # Complex position: Real choice exists, low probability IS concerning
        elif num_competitive >= 3 and num_competitive <= 8 and engine_complexity > 0.3:
            # This is the concerning case - many choices, player picked an unusual one
            context_parts.append(f"Complex position: {num_competitive} competitive moves")
            discount_status = 'none'
    
    # Add engine complexity context
    if engine_category in ['HIGH', 'VERY_HIGH']:
        context_parts.append(f"Engine complexity: {engine_category}")
    
    # Add gap/volatility context if notable
    if gap_cp > 0 and gap_cp < 50:
        context_parts.append(f"Close to 2nd best ({gap_cp}cp gap)")
    if eval_volatility > 100:
        context_parts.append(f"Unstable position ({eval_volatility:.0f}cp volatility)")
    
    # Note if move was actually good despite low Maia2 probability
    if cpl < 10:
        context_parts.append(f"Move was excellent (CPL {cpl:.0f})")
    elif cpl < 25:
        context_parts.append(f"Move was good (CPL {cpl:.0f})")
    
    return "; ".join(context_parts), discount_status



# Pre-load Maia2 position-level data for humanness lookup
# (Loaded before engine analysis so we can attach humanness to engine positions)
maia2_position_lookup = {}  # (game_id, fen, move) -> humanness data
try:
    maia2_positions_data = load_phase_output(username, "phase6", "maia2_positions.parquet")
    maia2_pos_df = pd.DataFrame(maia2_positions_data) if isinstance(maia2_positions_data, list) else maia2_positions_data
    
    for _, row in maia2_pos_df.iterrows():
        key = (str(row['game_id']), row['fen'], row['move'])
        maia2_position_lookup[key] = {
            'probability': row.get('probability', 0),
            'rank': row.get('rank', 0),
            'top_move': row.get('top_move'),
            'top_move_probability': row.get('top_move_probability', 0),
        }
    
    print(f"Built Maia2 position lookup with {len(maia2_position_lookup)} positions")
except FileNotFoundError:
    print("Maia2 position data not found (run phase 6 to generate)")
except Exception as e:
    print(f"Warning: Could not load Maia2 position data: {e}")


# Phase 4a: Engine analysis
# Build lookup for engine position data: (game_id, fen, move) -> position_data
engine_position_lookup = {}

try:
    engine_data = load_phase_output(username, "phase4a", "engine_analysis.parquet")
    engine_df = pd.DataFrame(engine_data) if isinstance(engine_data, list) else engine_data
    avg_acpl = float(engine_df['acpl'].mean())
    
    # Build engine analysis phase data with new heuristics if available
    ea_data = {
        'games_analyzed': len(engine_df),
        'avg_acpl': avg_acpl,
        'avg_accuracy': float(engine_df['accuracy'].mean()),
        'best_move_rate': float(engine_df['best_move_rate'].mean()),
    }
    
    # Add new engine heuristics if available (with backwards compatibility)
    if 'avg_eval_volatility' in engine_df.columns:
        ea_data['avg_eval_volatility'] = float(engine_df['avg_eval_volatility'].mean())
    if 'avg_gap_cp' in engine_df.columns:
        ea_data['avg_gap_cp'] = float(engine_df['avg_gap_cp'].mean())
    
    # Try new field name first, then fall back to old field name
    if 'avg_engine_complexity' in engine_df.columns:
        ea_data['avg_engine_complexity'] = float(engine_df['avg_engine_complexity'].mean())
    elif 'avg_complexity' in engine_df.columns:
        # Backwards compatibility: old field name
        ea_data['avg_engine_complexity'] = float(engine_df['avg_complexity'].mean())
    
    if 'high_complexity_moves' in engine_df.columns:
        ea_data['high_complexity_moves'] = int(engine_df['high_complexity_moves'].sum())
    
    # Add fragility average if available
    if 'avg_fragility' in engine_df.columns:
        ea_data['avg_fragility'] = float(engine_df['avg_fragility'].mean())
    
    report_data['phases']['engine_analysis'] = ea_data
    print(f"Phase 4a: {len(engine_df)} games analyzed")
    
    # Try to load key positions for detailed view with SVG rendering
    try:
        engine_positions = load_phase_output(username, "phase4a", "engine_positions.json")
        key_positions = {}
        
        # Build lookup for all engine positions (for Maia2 cross-reference)
        for game in engine_positions.get('games', []):
            game_id = str(game.get('game_id'))
            for pos in game.get('positions', []):
                fen = pos.get('fen')
                move = pos.get('move')
                if fen and move:
                    # Key by (game_id, fen, move) - normalize position data for backwards compatibility
                    engine_position_lookup[(game_id, fen, move)] = normalize_position_data(pos)
        
        print(f"Built engine position lookup with {len(engine_position_lookup)} positions")
        
        for game in engine_positions.get('games', []):  # All games with positions
            game_id = game.get('game_id')
            game_acpl = game.get('acpl', avg_acpl)
            
            if game_id:
                positions = game.get('positions', [])
                
                # Find strong moves in complex positions - using engine heuristics
                strong_moves = []
                for p in positions:
                    # Normalize position data for backwards compatibility
                    p_norm = normalize_position_data(p)
                    # Copy original fields not in normalize_position_data
                    p_norm['fen'] = p.get('fen')
                    p_norm['move'] = p.get('move')
                    p_norm['ply'] = p.get('ply')  # Move number for report links
                    p_norm['peak_fragility'] = p.get('peak_fragility', 0)
                    
                    phase = p_norm.get('phase', 'unknown')
                    
                    # Skip opening positions - not interesting for cheat detection
                    if phase == 'opening':
                        continue
                    
                    is_best = p_norm.get('is_best', False)
                    cpl = p_norm.get('cpl', 999)
                    fragility = p_norm.get('fragility', 0)
                    is_pre_peak = p_norm.get('is_pre_fragility_peak', False)
                    eval_swing = abs(p_norm.get('eval_swing', 0))
                    
                    # Engine heuristics (normalized)
                    engine_complexity = p_norm.get('engine_complexity_score', 0)
                    engine_category = p_norm.get('engine_complexity_category', 'UNKNOWN')
                    eval_volatility = p_norm.get('eval_volatility', 0)
                    gap_cp = p_norm.get('gap_cp', 0)
                    
                    # Select if: low CPL + (high engine complexity OR pre-fragility peak OR critical position)
                    is_suspicious = (
                        cpl < 10 and (
                            engine_complexity > 0.5 or
                            engine_category in ['HIGH', 'VERY_HIGH'] or
                            is_pre_peak or
                            eval_swing > 30 or
                            (gap_cp > 0 and gap_cp < 50)  # Close gap = hard choice
                        )
                    )
                    
                    if is_suspicious:
                        p_copy = p_norm.copy()
                        # Priority score: engine complexity + fragility bonus + eval_swing + gap bonus
                        gap_bonus = 30 if (gap_cp > 0 and gap_cp < 50) else 0
                        volatility_bonus = min(30, eval_volatility / 5) if eval_volatility > 0 else 0
                        p_copy['priority_score'] = (
                            engine_complexity * 100 + 
                            (50 if is_pre_peak else 0) + 
                            eval_swing + 
                            gap_bonus +
                            volatility_bonus
                        )
                        strong_moves.append(p_copy)
                
                # Sort by priority score (most suspicious first)
                strong_moves = sorted(
                    strong_moves,
                    key=lambda x: x.get('priority_score', 0),
                    reverse=True
                )  # All high complexity moves (no limit)
                
                # Get player color for board orientation
                player_is_white = game_player_colors.get(str(game_id), True)
                
                if strong_moves:
                    # Generate SVG and context for each key position
                    for pos in strong_moves:
                        fen = pos.get('fen')
                        played_move = pos.get('move')
                        best_move = pos.get('best_move')
                        
                        # Add context explaining why this position was selected
                        pos['context'] = get_strong_move_context(pos)
                        pos['error_class'] = 'Strong Move'  # Override error classification
                        
                        # Attach Maia2 humanness data if available
                        maia2_key = (str(game_id), fen, played_move)
                        if maia2_key in maia2_position_lookup:
                            maia2_info = maia2_position_lookup[maia2_key]
                            pos['probability'] = maia2_info.get('probability', 0)
                            pos['rank'] = maia2_info.get('rank', 0)
                            pos['top_move'] = maia2_info.get('top_move')
                            pos['top_move_probability'] = maia2_info.get('top_move_probability', 0)
                        
                        if fen and played_move:
                            try:
                                # Render board with arrows: green for played move
                                # Only show blue arrow if different from played
                                svg = render_position(
                                    fen=fen,
                                    played_move=played_move,
                                    best_move=best_move if best_move != played_move else None,
                                    size=300,
                                    show_coordinates=True,
                                    flipped=not player_is_white,  # Flip when player is black
                                )
                                pos['svg'] = svg
                            except Exception as e:
                                print(f"Warning: Could not render position: {e}")
                    
                    key_positions[game_id] = {
                        'positions': strong_moves, 
                        'game_acpl': game_acpl,
                        'player_is_white': player_is_white,
                    }
        
        if key_positions:
            report_data['key_positions'] = key_positions
            print(f"Loaded key positions from {len(key_positions)} games (strong moves in complex positions)")
    except FileNotFoundError:
        pass
except FileNotFoundError:
    print("Phase 4a: Not found")

# Phase 4b: Regan analysis
try:
    regan_data = load_phase_output(username, "phase4b", "regan_analysis.parquet")
    regan_df = pd.DataFrame(regan_data) if isinstance(regan_data, list) else regan_data
    report_data['phases']['regan_analysis'] = {
        'games_analyzed': len(regan_df),
        'avg_z_score': float(regan_df['z_score'].mean()),
        'max_z_score': float(regan_df['z_score'].max()),
        'flagged_games': int(regan_df['is_flagged'].sum()),
    }
    print(f"Phase 4b: {int(regan_df['is_flagged'].sum())} flagged games")
    
    # Try to get flagged games details with links
    try:
        suspicious = load_phase_output(username, "phase4b", "suspicious_positions.json")
        flagged_games_list = suspicious.get('flagged_games', [])
        flagged_moves = sum(len(g.get('moves', [])) for g in flagged_games_list)
        report_data['phases']['regan_analysis']['flagged_moves'] = flagged_moves
        
        # Store flagged games for display in report
        if flagged_games_list:
            regan_flagged = {}
            for game in flagged_games_list[:10]:  # Top 10 flagged games
                game_id = game.get('game_id')
                if game_id:
                    regan_flagged[game_id] = {
                        'z_score': game.get('z_score', 0),
                        'ipr': game.get('ipr', 0),
                        'official_elo': game.get('official_elo', 0),
                        'elo_difference': game.get('elo_difference', 0),
                        'move_match_rate': game.get('move_match_rate', 0),
                        'suspicion_level': game.get('suspicion_level', 'unknown'),
                        'acpl': game.get('acpl', 0),
                        'total_moves': game.get('total_moves', 0),
                    }
            if regan_flagged:
                report_data['regan_flagged_games'] = regan_flagged
                print(f"Loaded {len(regan_flagged)} Regan flagged games for display")
    except FileNotFoundError:
        pass
except FileNotFoundError:
    print("Phase 4b: Not found")

# Phase 4c: Tablebase
try:
    tb = load_phase_output(username, "phase4c", "tablebase_consistency.json")
    report_data['phases']['tablebase'] = tb
    print(f"Phase 4c: {tb.get('overall_accuracy', 0):.1%} tablebase accuracy")
except FileNotFoundError:
    print("Phase 4c: Not found")

# Phase 5: Time analysis
try:
    time = load_phase_output(username, "phase5", "time_analysis.json")
    report_data['phases']['time_analysis'] = time
    print(f"Phase 5: {time.get('suspicious_games', 0)} suspicious timing games")
except FileNotFoundError:
    print("Phase 5: Not found")

# Phase 6: Maia2
try:
    maia = load_phase_output(username, "phase6", "surprising_moves.json")
    maia_data = {
        'games_analyzed': maia.get('games_analyzed', 0),
        'avg_humanness': maia.get('avg_humanness', 0),
        'maia_available': maia.get('maia_available', False),
    }

    # Count flagged games/moves from surprising moves
    surprising_moves = maia.get('surprising_moves', [])
    if surprising_moves:
        flagged_games = len([g for g in surprising_moves if g.get('moves')])
        flagged_moves = sum(len(g.get('moves', [])) for g in surprising_moves)
        maia_data['flagged_games'] = flagged_games
        maia_data['flagged_moves'] = flagged_moves

    report_data['phases']['maia2'] = maia_data
    print(f"Phase 6: {maia.get('avg_humanness', 0):.2f} avg humanness")

    # Build key positions for Maia2 surprising moves with SVG boards
    maia2_positions = {}
    for game_entry in surprising_moves[:5]:  # Top 5 games with surprising moves
        game_id = game_entry.get('game_id')
        moves = game_entry.get('moves', [])[:3]  # Top 3 surprising moves per game
        # Get player color - first from maia output, fallback to game lookup
        player_is_white = game_entry.get('player_is_white', game_player_colors.get(str(game_id), True))

        if game_id and moves:
            processed_moves = []
            for move_data in moves:
                fen = move_data.get('fen')
                played_move = move_data.get('move')
                probability = move_data.get('probability', 0)
                rank = move_data.get('rank', 0)
                top_move = move_data.get('top_move')
                top_move_probability = move_data.get('top_move_probability', 0)

                if fen and played_move:
                    pos = {
                        'fen': fen,
                        'move': played_move,
                        'probability': probability,
                        'rank': rank,
                        'top_move': top_move,
                        'top_move_probability': top_move_probability,
                    }
                    
                    # Look up engine analysis data for this position (includes engine heuristics)
                    engine_key = (str(game_id), fen, played_move)
                    if engine_key in engine_position_lookup:
                        engine_info = engine_position_lookup[engine_key]
                        pos['cpl'] = engine_info.get('cpl', 0)
                        pos['eval_before'] = engine_info.get('eval_before')
                        pos['eval_after'] = engine_info.get('eval_after')
                        pos['best_move'] = engine_info.get('best_move')
                        pos['is_best'] = engine_info.get('is_best', False)
                        pos['move_class'] = engine_info.get('move_class')
                        # Game phase and fragility
                        pos['phase'] = engine_info.get('phase', 'unknown')
                        pos['game_phase'] = engine_info.get('game_phase', engine_info.get('phase', 'unknown'))
                        pos['fragility'] = engine_info.get('fragility', 0)
                        pos['is_pre_fragility_peak'] = engine_info.get('is_pre_fragility_peak', False)
                        pos['fragility_trend'] = engine_info.get('fragility_trend')
                        pos['distance_to_peak'] = engine_info.get('distance_to_peak')
                        pos['legal_moves'] = engine_info.get('legal_moves', 0)
                        # Engine complexity heuristics (normalized for backwards compatibility)
                        pos['eval_volatility'] = engine_info.get('eval_volatility', 0)
                        pos['gap_cp'] = engine_info.get('gap_cp', 0)
                        pos['num_playable_moves'] = engine_info.get('num_playable_moves')
                        pos['node_branching_factor'] = engine_info.get('node_branching_factor')
                        pos['engine_complexity_score'] = engine_info.get('engine_complexity_score', 0)
                        pos['engine_complexity_category'] = engine_info.get('engine_complexity_category', 'UNKNOWN')
                        pos['branching_factor'] = engine_info.get('branching_factor', 3.5)
                        pos['raw_branching_factor'] = engine_info.get('raw_branching_factor')
                        # New fields
                        pos['analysis_depth'] = engine_info.get('analysis_depth', 20)
                        pos['move_rank'] = engine_info.get('move_rank')
                        pos['total_legal_moves'] = engine_info.get('total_legal_moves', engine_info.get('legal_moves', 0))
                        pos['pure_material'] = engine_info.get('pure_material')
                        pos['is_book_move'] = engine_info.get('is_book_move', False)
                        pos['distance_from_book'] = engine_info.get('distance_from_book')
                        pos['tablebase_status'] = engine_info.get('tablebase_status')
                        pos['has_trap'] = engine_info.get('has_trap', False)
                        pos['is_tricky'] = engine_info.get('is_tricky', False)
                    
                    # Generate context with complexity-aware discounting
                    context, discount_status = get_maia2_context(pos)
                    pos['context'] = context
                    pos['discounted'] = discount_status

                    try:
                        # Render board with arrow for the played move
                        # Flip board when player is black so their pieces are at bottom
                        svg = render_position(
                            fen=fen,
                            played_move=played_move,
                            best_move=pos.get('best_move') if pos.get('best_move') != played_move else None,
                            size=300,
                            show_coordinates=True,
                            flipped=not player_is_white,  # Flip when player is black
                        )
                        pos['svg'] = svg
                    except Exception as e:
                        print(f"Warning: Could not render Maia2 position: {e}")

                    processed_moves.append(pos)

            if processed_moves:
                maia2_positions[game_id] = {
                    'positions': processed_moves,
                    'player_is_white': player_is_white,
                }

    if maia2_positions:
        report_data['maia2_positions'] = maia2_positions
        print(f"Loaded Maia2 positions from {len(maia2_positions)} games (with SVG boards and engine data)")
except FileNotFoundError:
    print("Phase 6: Not found")

# Phase 7: Cheater comparison
try:
    risk = load_phase_output(username, "phase7", "risk_assessment.json")
    report_data['phases']['risk_assessment'] = risk
    print(f"Phase 7: Risk level = {risk.get('risk_level', 'UNKNOWN')}")
except FileNotFoundError:
    print("Phase 7: Not found")

# Build consolidated games view - merges all flagged games/positions from all sources
print_subsection("CONSOLIDATING KEY GAMES")
consolidated_games = {}  # game_id -> { flags: [], positions: [], regan_data: {}, player_is_white: bool, metadata: {} }

# Add engine analysis positions
for game_id, pos_data in report_data.get('key_positions', {}).items():
    if game_id not in consolidated_games:
        consolidated_games[game_id] = {'flags': [], 'positions': [], 'regan_data': None, 'player_is_white': True}
    consolidated_games[game_id]['flags'].append('engine')
    consolidated_games[game_id]['player_is_white'] = pos_data.get('player_is_white', True)
    for pos in pos_data.get('positions', []):
        pos_copy = pos.copy()
        pos_copy['source'] = 'engine'
        consolidated_games[game_id]['positions'].append(pos_copy)

# Add Regan flagged games
for game_id, game_data in report_data.get('regan_flagged_games', {}).items():
    if game_id not in consolidated_games:
        consolidated_games[game_id] = {'flags': [], 'positions': [], 'regan_data': None, 'player_is_white': True}
    consolidated_games[game_id]['flags'].append('regan')
    consolidated_games[game_id]['regan_data'] = game_data
    # Try to get player color from games lookup
    if game_id in game_player_colors:
        consolidated_games[game_id]['player_is_white'] = game_player_colors[game_id]

# Add Maia2 positions
for game_id, pos_data in report_data.get('maia2_positions', {}).items():
    if game_id not in consolidated_games:
        consolidated_games[game_id] = {'flags': [], 'positions': [], 'regan_data': None, 'player_is_white': True}
    consolidated_games[game_id]['flags'].append('maia2')
    consolidated_games[game_id]['player_is_white'] = pos_data.get('player_is_white', True)
    for pos in pos_data.get('positions', []):
        pos_copy = pos.copy()
        pos_copy['source'] = 'maia2'
        consolidated_games[game_id]['positions'].append(pos_copy)

# Add game metadata to each consolidated game
for game_id, game_data in consolidated_games.items():
    if game_id in game_metadata:
        meta = game_metadata[game_id]
        game_data['player_color'] = meta['player_color']
        game_data['player_elo'] = meta['player_elo']
        game_data['opponent_elo'] = meta['opponent_elo']
        game_data['result'] = meta['result']
        game_data['player_result'] = meta['player_result']
        game_data['termination'] = meta['termination']
        game_data['date'] = meta['date']
        game_data['end_time'] = meta['end_time']

# Sort by date descending (most recent first), using end_time
sorted_games = sorted(
    consolidated_games.items(),
    key=lambda x: x[1].get('end_time', 0),
    reverse=True  # Most recent first
)
report_data['consolidated_games'] = dict(sorted_games)

# Report consolidation stats
total_consolidated = len(consolidated_games)
multi_flagged = sum(1 for g in consolidated_games.values() if len(g['flags']) > 1)
print(f"Total consolidated games: {total_consolidated}")
print(f"Games flagged by multiple analyses: {multi_flagged}")
for game_id, data in list(sorted_games)[:3]:  # Show top 3
    date_str = data.get('date', 'unknown date')
    print(f"  {game_id} ({date_str}): {data['flags']}")

In [None]:
# Generate summary
print_subsection("REPORT SUMMARY")

summary = {
    'username': username,
    'generated_at': datetime.now().isoformat(),
    'title': actual_report_title,
}

# Game count
if 'data_collection' in report_data['phases']:
    summary['total_games'] = report_data['phases']['data_collection'].get('total_games', 0)

# Key metrics
if 'engine_analysis' in report_data['phases']:
    ea = report_data['phases']['engine_analysis']
    summary['accuracy'] = ea.get('avg_accuracy', 0)
    summary['acpl'] = ea.get('avg_acpl', 0)

if 'regan_analysis' in report_data['phases']:
    ra = report_data['phases']['regan_analysis']
    summary['z_score'] = ra.get('avg_z_score', 0)
    summary['flagged_games'] = ra.get('flagged_games', 0)

# Add humanness from Maia2
if 'maia2' in report_data['phases']:
    m2 = report_data['phases']['maia2']
    summary['humanness'] = m2.get('avg_humanness', 0)

if 'risk_assessment' in report_data['phases']:
    risk = report_data['phases']['risk_assessment']
    summary['risk_score'] = risk.get('risk_score', 0)
    summary['risk_level'] = risk.get('risk_level', 'UNKNOWN')

# Display summary
for key, value in summary.items():
    if isinstance(value, float):
        print(f"  {key}: {value:.3f}")
    else:
        print(f"  {key}: {value}")

In [None]:
# Generate HTML report
print_subsection("GENERATING HTML")

# Try to use Jinja2 template if available
template_dir = Path(__file__).parent.parent / "templates" if '__file__' in dir() else PROJECT_ROOT / "fairness_report" / "templates"
template_file = template_dir / "report.html.jinja2"

if template_file.exists():
    env = Environment(loader=FileSystemLoader(str(template_dir)))
    template = env.get_template("report.html.jinja2")
    html_content = template.render(report=report_data, summary=summary)
else:
    # Generate simple HTML report
    # Note: accuracy is already 0-100, not 0-1
    accuracy_display = f"{summary.get('accuracy', 0):.1f}%"
    
    html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <title>{actual_report_title}</title>
    <style>
        body {{ font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }}
        h1 {{ color: #333; border-bottom: 2px solid #333; padding-bottom: 10px; }}
        h2 {{ color: #666; margin-top: 30px; }}
        .summary {{ background: #f5f5f5; padding: 20px; border-radius: 8px; margin: 20px 0; }}
        .metric {{ display: inline-block; margin: 10px 20px; text-align: center; }}
        .metric-value {{ font-size: 2em; font-weight: bold; color: #333; }}
        .metric-label {{ font-size: 0.9em; color: #666; }}
        .risk-low {{ color: green; }}
        .risk-moderate {{ color: orange; }}
        .risk-high {{ color: red; }}
        .risk-very-high {{ color: darkred; font-weight: bold; }}
        table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
        th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
        th {{ background: #f5f5f5; }}
    </style>
</head>
<body>
    <h1>{actual_report_title}</h1>
    <p>Generated: {summary.get('generated_at', 'Unknown')}</p>
    
    <div class="summary">
        <h2>Summary</h2>
        <div class="metric">
            <div class="metric-value">{summary.get('total_games', 'N/A')}</div>
            <div class="metric-label">Games Analyzed</div>
        </div>
        <div class="metric">
            <div class="metric-value">{accuracy_display}</div>
            <div class="metric-label">Accuracy</div>
        </div>
        <div class="metric">
            <div class="metric-value">{summary.get('acpl', 0):.1f}</div>
            <div class="metric-label">ACPL</div>
        </div>
        <div class="metric">
            <div class="metric-value">{summary.get('z_score', 0):.2f}</div>
            <div class="metric-label">Avg Z-Score</div>
        </div>
        <div class="metric">
            <div class="metric-value risk-{summary.get('risk_level', 'unknown').lower().replace(' ', '-')}">{summary.get('risk_level', 'N/A')}</div>
            <div class="metric-label">Risk Level</div>
        </div>
    </div>
    
    <h2>Phase Results</h2>
    <table>
        <tr><th>Phase</th><th>Status</th><th>Key Metrics</th></tr>
"""

    for phase_name, phase_data in report_data['phases'].items():
        if isinstance(phase_data, dict):
            metrics = ', '.join(f"{k}: {v}" for k, v in list(phase_data.items())[:3])
        else:
            metrics = str(phase_data)
        html_content += f"        <tr><td>{phase_name}</td><td>Complete</td><td>{metrics}</td></tr>\n"

    html_content += """
    </table>
    
    <h2>Risk Factors</h2>
"""

    if 'risk_assessment' in report_data['phases']:
        risk_factors = report_data['phases']['risk_assessment'].get('risk_factors', [])
        if risk_factors:
            html_content += "    <ul>\n"
            for rf in risk_factors:
                html_content += f"        <li>{rf.get('metric', 'Unknown')}: z-score = {rf.get('z_score', 0):.2f}</li>\n"
            html_content += "    </ul>\n"
        else:
            html_content += "    <p>No significant risk factors identified.</p>\n"

    html_content += """
</body>
</html>
"""

print(f"HTML report generated ({len(html_content)} characters)")

In [None]:
# Save outputs
report_dir = get_report_dir(username)

# Save HTML report
html_path = report_dir / "report.html"
with open(html_path, 'w') as f:
    f.write(html_content)
print(f"HTML report saved: {html_path}")

# Save JSON summary
save_phase_output(username, "phase8", "report_summary.json", summary)

# Save full report data
save_phase_output(username, "phase8", "report_data.json", report_data)

print(f"\nPhase 8 complete!")
print(f"\nReport available at: {html_path}")

In [None]:
# Display report in notebook
from IPython.display import HTML, display

print_section("FINAL REPORT PREVIEW")
display(HTML(html_content))