# Phase 7: Cheater Baseline Comparison

Compare player metrics against trusted and cheater baselines.

**Inputs:**
- All previous phase outputs
- Baseline data (trusted and cheater)

**Outputs:**
- `cheater_comparison.json` - Comparison with baselines
- `risk_assessment.json` - Overall risk score

In [None]:
# Parameters (injected by Papermill)
username = "default_user"  # Chess.com username

In [None]:
# Setup
import sys
sys.path.insert(0, '..')
from common import (
    setup_notebook, validate_parameters, print_section, print_subsection,
    get_user_data_dir, save_phase_output, load_phase_output,
    load_dataset_parquet, load_baseline,
    BASELINE_DIR, CHEATER_DIR,
)
import json
import pandas as pd
import numpy as np

setup_notebook()
validate_parameters(username)

In [None]:
# Load all analysis outputs
print_section(f"CHEATER COMPARISON: {username}")

# Load baselines
trusted_baseline = load_baseline("trusted")
cheater_baseline = load_baseline("cheater")

print(f"Trusted baseline: {trusted_baseline.get('num_players', 0)} players, {trusted_baseline.get('total_games', 0)} games")
print(f"Cheater baseline: {cheater_baseline.get('num_players', 0)} players, {cheater_baseline.get('total_games', 0)} games")

# Load player analysis
player_metrics = {}

# Phase 2: Quick stats
try:
    quick_stats = load_phase_output(username, "phase2", "quick_stats.json")
    player_metrics['elo'] = quick_stats.get('elo_analysis', {})
    player_metrics['results'] = quick_stats.get('result_patterns', {})
    print("Loaded Phase 2 quick stats")
except FileNotFoundError:
    print("Phase 2 not available")

# Phase 4a: Engine analysis
try:
    engine_df = load_phase_output(username, "phase4a", "engine_analysis.parquet")
    player_metrics['engine'] = {
        'avg_acpl': engine_df['acpl'].mean(),
        'avg_accuracy': engine_df['accuracy'].mean(),
        'best_move_rate': engine_df['best_move_rate'].mean(),
    }
    print(f"Loaded Phase 4a engine analysis ({len(engine_df)} games)")
except FileNotFoundError:
    print("Phase 4a not available")

# Phase 4b: Regan analysis
try:
    regan_df = load_phase_output(username, "phase4b", "regan_analysis.parquet")
    player_metrics['regan'] = {
        'avg_z_score': regan_df['z_score'].mean(),
        'max_z_score': regan_df['z_score'].max(),
        'flagged_games': int(regan_df['is_flagged'].sum()),
        'move_match_rate': regan_df['move_match_rate'].mean(),
    }
    print(f"Loaded Phase 4b Regan analysis ({len(regan_df)} games)")
except FileNotFoundError:
    print("Phase 4b not available")

# Phase 4c: Tablebase
try:
    tb_consistency = load_phase_output(username, "phase4c", "tablebase_consistency.json")
    player_metrics['tablebase'] = {
        'accuracy': tb_consistency.get('overall_accuracy', 0),
        'perfect_rate': tb_consistency.get('perfect_games', 0) / max(1, tb_consistency.get('games_analyzed', 1)),
    }
    print("Loaded Phase 4c tablebase analysis")
except FileNotFoundError:
    print("Phase 4c not available")

# Phase 5: Time analysis
try:
    time_analysis = load_phase_output(username, "phase5", "time_analysis.json")
    player_metrics['time'] = {
        'avg_move_time': time_analysis.get('avg_move_time', 0),
        'instant_rate': time_analysis.get('instant_move_rate', 0),
        'suspicious_games': time_analysis.get('suspicious_games', 0),
    }
    print("Loaded Phase 5 time analysis")
except FileNotFoundError:
    print("Phase 5 not available")

# Phase 6: Maia2
try:
    maia_df = load_phase_output(username, "phase6", "maia2_analysis.parquet")
    player_metrics['maia2'] = {
        'avg_humanness': maia_df['humanness_score'].mean(),
        'min_humanness': maia_df['humanness_score'].min(),
    }
    print(f"Loaded Phase 6 Maia2 analysis ({len(maia_df)} games)")
except FileNotFoundError:
    print("Phase 6 not available")

In [None]:
# Compare to baselines
print_subsection("BASELINE COMPARISON")

comparisons = []

# Helper function
def compare_metric(name, player_val, trusted_mean, trusted_std, cheater_mean=None, higher_is_suspicious=True):
    """Compare player value to baseline."""
    if player_val is None or trusted_mean is None:
        return None
    
    # Z-score relative to trusted baseline
    z_vs_trusted = (player_val - trusted_mean) / max(trusted_std, 0.001)
    
    # Closer to cheater or trusted?
    if cheater_mean is not None:
        dist_to_trusted = abs(player_val - trusted_mean)
        dist_to_cheater = abs(player_val - cheater_mean)
        closer_to = 'cheater' if dist_to_cheater < dist_to_trusted else 'trusted'
    else:
        closer_to = 'unknown'
    
    # Is this suspicious?
    if higher_is_suspicious:
        suspicious = z_vs_trusted > 2
    else:
        suspicious = z_vs_trusted < -2
    
    return {
        'metric': name,
        'player_value': player_val,
        'trusted_mean': trusted_mean,
        'trusted_std': trusted_std,
        'cheater_mean': cheater_mean,
        'z_vs_trusted': z_vs_trusted,
        'closer_to': closer_to,
        'suspicious': suspicious,
    }

# Compare key metrics
if 'elo' in player_metrics and trusted_baseline.get('elo_baseline'):
    tb = trusted_baseline['elo_baseline']
    cb = cheater_baseline.get('elo_baseline', {}) if cheater_baseline else {}
    
    comp = compare_metric(
        'manipulation_score',
        player_metrics['elo'].get('manipulation_score'),
        tb.get('manipulation_score_mean'),
        tb.get('manipulation_score_std', 0.1),
        cb.get('manipulation_score_mean'),
        higher_is_suspicious=True
    )
    if comp:
        comparisons.append(comp)

if 'engine' in player_metrics:
    # Compare accuracy to cheater baseline if available
    # Note: accuracy is on 0-100 scale (Lichess-style percentage)
    comp = compare_metric(
        'accuracy',
        player_metrics['engine'].get('avg_accuracy'),
        75,  # Typical human accuracy (0-100 scale)
        10,  # Standard deviation
        90,  # Typical cheater accuracy
        higher_is_suspicious=True
    )
    if comp:
        comparisons.append(comp)
    
    comp = compare_metric(
        'best_move_rate',
        player_metrics['engine'].get('best_move_rate'),
        0.40,  # Typical human best move rate
        0.10,
        0.70,  # Typical cheater best move rate
        higher_is_suspicious=True
    )
    if comp:
        comparisons.append(comp)

if 'regan' in player_metrics:
    comp = compare_metric(
        'z_score',
        player_metrics['regan'].get('avg_z_score'),
        0.0,  # Expected z-score for fair play
        1.0,
        2.5,  # Typical cheater z-score
        higher_is_suspicious=True
    )
    if comp:
        comparisons.append(comp)

if 'time' in player_metrics:
    comp = compare_metric(
        'instant_rate',
        player_metrics['time'].get('instant_rate'),
        0.15,  # Typical human instant rate
        0.10,
        0.40,  # Typical bot instant rate
        higher_is_suspicious=True
    )
    if comp:
        comparisons.append(comp)

# Display comparisons
if comparisons:
    comp_df = pd.DataFrame(comparisons)
    print(comp_df[['metric', 'player_value', 'trusted_mean', 'z_vs_trusted', 'closer_to', 'suspicious']].to_string())
else:
    print("No metrics available for comparison.")

In [None]:
# Calculate overall risk score
print_subsection("RISK ASSESSMENT")

risk_factors = []

for comp in comparisons:
    if comp['suspicious']:
        risk_factors.append({
            'metric': comp['metric'],
            'z_score': comp['z_vs_trusted'],
            'weight': 1.0
        })

# Add specific flags
if 'regan' in player_metrics:
    flagged = player_metrics['regan'].get('flagged_games', 0)
    if flagged > 0:
        risk_factors.append({
            'metric': 'regan_flagged_games',
            'z_score': flagged,
            'weight': 2.0
        })

if 'time' in player_metrics:
    suspicious_timing = player_metrics['time'].get('suspicious_games', 0)
    if suspicious_timing > 5:
        risk_factors.append({
            'metric': 'suspicious_timing_games',
            'z_score': suspicious_timing,
            'weight': 1.5
        })

# Calculate weighted risk score
if risk_factors:
    total_weight = sum(rf['weight'] for rf in risk_factors)
    weighted_sum = sum(rf['z_score'] * rf['weight'] for rf in risk_factors)
    risk_score = weighted_sum / total_weight if total_weight > 0 else 0
else:
    risk_score = 0

# Determine risk level
if risk_score < 1:
    risk_level = "LOW"
elif risk_score < 2:
    risk_level = "MODERATE"
elif risk_score < 3:
    risk_level = "HIGH"
else:
    risk_level = "VERY HIGH"

print(f"\nRisk Assessment:")
print(f"  Risk Score: {risk_score:.2f}")
print(f"  Risk Level: {risk_level}")
print(f"  Risk Factors: {len(risk_factors)}")

if risk_factors:
    print(f"\nRisk factors identified:")
    for rf in risk_factors:
        print(f"  - {rf['metric']}: z={rf['z_score']:.2f} (weight={rf['weight']})")

In [None]:
# Save outputs
comparison_output = {
    "username": username,
    "player_metrics": player_metrics,
    "comparisons": comparisons,
    "baselines_used": {
        "trusted_players": trusted_baseline.get('num_players', 0),
        "trusted_games": trusted_baseline.get('total_games', 0),
        "cheater_players": cheater_baseline.get('num_players', 0) if cheater_baseline else 0,
        "cheater_games": cheater_baseline.get('total_games', 0) if cheater_baseline else 0,
    }
}
save_phase_output(username, "phase7", "cheater_comparison.json", comparison_output)

risk_output = {
    "username": username,
    "risk_score": risk_score,
    "risk_level": risk_level,
    "risk_factors": risk_factors,
    "suspicious_metrics": [c['metric'] for c in comparisons if c.get('suspicious')],
}
save_phase_output(username, "phase7", "risk_assessment.json", risk_output)

print(f"\nPhase 7 complete!")

In [None]:
# Visualization
import matplotlib.pyplot as plt

if comparisons:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    metrics = [c['metric'] for c in comparisons]
    z_scores = [c['z_vs_trusted'] for c in comparisons]
    colors = ['red' if c['suspicious'] else 'green' for c in comparisons]
    
    bars = ax.barh(metrics, z_scores, color=colors)
    ax.axvline(2, color='red', linestyle='--', alpha=0.5, label='Suspicious threshold')
    ax.axvline(-2, color='red', linestyle='--', alpha=0.5)
    ax.axvline(0, color='black', linestyle='-', alpha=0.3)
    
    ax.set_xlabel('Z-Score vs Trusted Baseline')
    ax.set_title(f'Player Comparison to Baseline: {username}')
    ax.legend()
    
    plt.tight_layout()
    plt.show()