# Phase 5: Time Analysis

Analyze time usage patterns to detect bot-like behavior.

**Inputs:**
- Phase 1: games.parquet, raw games

**Outputs:**
- `time_patterns.parquet` - Time usage per game
- `time_analysis.json` - Aggregated time patterns

In [None]:
# Parameters (injected by Papermill)
username = "default_user"  # Chess.com username

In [None]:
# Setup
import sys
sys.path.insert(0, '..')
from common import (
    setup_notebook, validate_parameters, print_section, print_subsection,
    get_user_data_dir, save_phase_output, load_phase_output,
    load_dataset_parquet, load_cached_games_v2, load_baseline,
    extract_clock_times, analyze_time_patterns, detect_bot_patterns,
    classify_time_spent, analyze_time_distribution,
)
import chess
import chess.pgn
import io
import json
import pandas as pd
import numpy as np
from tqdm import tqdm

setup_notebook()
validate_parameters(username)

In [None]:
# Load data
print_section(f"TIME ANALYSIS: {username}")

user_data_dir = get_user_data_dir(username)

# Load games
games_df = pd.DataFrame(load_dataset_parquet(user_data_dir / "games.parquet"))
print(f"Games loaded: {len(games_df)}")

# Load raw games for clock data
all_games_raw, _ = load_cached_games_v2(user_data_dir)
print(f"Raw games loaded: {len(all_games_raw)}")

# Load baselines
trusted_baseline = load_baseline("trusted")

In [None]:
# Helper to get PGN from raw game
def get_game_pgn(game_id: str, all_games: list) -> tuple:
    """Extract PGN and metadata for a specific game."""
    for game in all_games:
        url = game.get('url', '')
        if game_id in url or url.endswith(f'/{game_id}'):
            pgn_str = game.get('pgn', '')
            if pgn_str:
                return chess.pgn.read_game(io.StringIO(pgn_str)), game
    return None, None

# Extract time data from all games
time_results = []

for _, row in tqdm(games_df.iterrows(), total=len(games_df), desc="Extracting time data"):
    game_id = row.get('game_id', '')
    pgn, raw_game = get_game_pgn(game_id, all_games_raw)
    
    if pgn is None:
        continue
    
    # Determine player color
    white = pgn.headers.get('White', '').lower()
    player_is_white = username.lower() == white
    
    # Extract clock times (returns list of dicts for each move)
    clock_data = extract_clock_times(pgn)
    
    if clock_data:
        # Filter to player's moves only
        player_moves = [m for m in clock_data if (m['ply'] % 2 == 1) == player_is_white]
        
        # Get time spent values
        move_times = [m['time_spent'] for m in player_moves if m.get('time_spent') is not None and m['time_spent'] >= 0]
        
        if move_times:
            time_results.append({
                'game_id': game_id,
                'time_class': row.get('time_class', ''),
                'player_result': row.get('player_result', ''),
                'total_moves': len(move_times),
                'avg_move_time': np.mean(move_times),
                'std_move_time': np.std(move_times) if len(move_times) > 1 else 0,
                'min_move_time': np.min(move_times),
                'max_move_time': np.max(move_times),
                'median_move_time': np.median(move_times),
                'instant_moves': sum(1 for t in move_times if t < 1),
                'quick_moves': sum(1 for t in move_times if t < 3),
                'long_thinks': sum(1 for t in move_times if t > 30),
            })

print(f"\nGames with time data: {len(time_results)}")

In [None]:
# Analyze time patterns
print_subsection("TIME PATTERNS")

if time_results:
    time_df = pd.DataFrame(time_results)
    
    print(f"\nOverall statistics:")
    print(f"  Average move time: {time_df['avg_move_time'].mean():.2f}s")
    print(f"  Time consistency (std): {time_df['std_move_time'].mean():.2f}s")
    print(f"  Instant move rate: {time_df['instant_moves'].sum() / time_df['total_moves'].sum():.1%}")
    print(f"  Quick move rate: {time_df['quick_moves'].sum() / time_df['total_moves'].sum():.1%}")
    
    # By time class
    print(f"\nBy time class:")
    for tc in time_df['time_class'].unique():
        tc_data = time_df[time_df['time_class'] == tc]
        print(f"  {tc}: avg {tc_data['avg_move_time'].mean():.2f}s, instant rate {tc_data['instant_moves'].sum() / tc_data['total_moves'].sum():.1%}")
else:
    time_df = pd.DataFrame()
    print("No time data available.")

In [None]:
# Detect bot-like patterns
print_subsection("BOT PATTERN DETECTION")

if not time_df.empty:
    # Suspicious patterns
    time_df['low_variance'] = time_df['std_move_time'] < 2  # Very consistent timing
    time_df['high_instant_rate'] = time_df['instant_moves'] / time_df['total_moves'] > 0.5
    time_df['suspicious_timing'] = time_df['low_variance'] & time_df['high_instant_rate']
    
    suspicious_count = time_df['suspicious_timing'].sum()
    print(f"Games with suspicious timing patterns: {suspicious_count} ({suspicious_count/len(time_df):.1%})")
    
    if suspicious_count > 0:
        print(f"\nSuspicious games:")
        suspicious = time_df[time_df['suspicious_timing']]
        print(suspicious[['game_id', 'avg_move_time', 'std_move_time', 'instant_moves', 'total_moves']].to_string())
    
    # Compare to baseline
    if trusted_baseline:
        # Would compare instant move rates to baseline
        pass

In [None]:
# Time distribution analysis
print_subsection("TIME DISTRIBUTION")

if not time_df.empty:
    # Classify all games by time spent pattern
    time_classifications = {
        'instant': 0,
        'quick': 0,
        'short': 0,
        'normal': 0,
        'long': 0,
        'very_long': 0,
    }
    
    total_moves = time_df['total_moves'].sum()
    time_classifications['instant'] = time_df['instant_moves'].sum() / total_moves
    time_classifications['quick'] = (time_df['quick_moves'].sum() - time_df['instant_moves'].sum()) / total_moves
    time_classifications['long'] = time_df['long_thinks'].sum() / total_moves
    time_classifications['normal'] = 1 - time_classifications['instant'] - time_classifications['quick'] - time_classifications['long']
    
    print("Move time distribution:")
    for category, rate in time_classifications.items():
        print(f"  {category}: {rate:.1%}")

In [None]:
# Save outputs
if not time_df.empty:
    save_phase_output(username, "phase5", "time_patterns.parquet", time_df)

# Save analysis summary
time_analysis = {
    "username": username,
    "games_with_time_data": len(time_df) if not time_df.empty else 0,
    "avg_move_time": float(time_df['avg_move_time'].mean()) if not time_df.empty else 0,
    "avg_std_move_time": float(time_df['std_move_time'].mean()) if not time_df.empty else 0,
    "instant_move_rate": float(time_df['instant_moves'].sum() / time_df['total_moves'].sum()) if not time_df.empty and time_df['total_moves'].sum() > 0 else 0,
    "quick_move_rate": float(time_df['quick_moves'].sum() / time_df['total_moves'].sum()) if not time_df.empty and time_df['total_moves'].sum() > 0 else 0,
    "suspicious_games": int(time_df['suspicious_timing'].sum()) if not time_df.empty and 'suspicious_timing' in time_df else 0,
}
save_phase_output(username, "phase5", "time_analysis.json", time_analysis)

print(f"\nPhase 5 complete!")

In [None]:
# Visualization
import matplotlib.pyplot as plt

if not time_df.empty:
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    
    # Average move time distribution
    axes[0].hist(time_df['avg_move_time'], bins=30, color='steelblue', edgecolor='white')
    axes[0].axvline(time_df['avg_move_time'].mean(), color='red', linestyle='--', label=f"Mean: {time_df['avg_move_time'].mean():.1f}s")
    axes[0].set_xlabel('Average Move Time (s)')
    axes[0].set_ylabel('Games')
    axes[0].set_title('Average Move Time Distribution')
    axes[0].legend()
    
    # Time consistency (std dev)
    axes[1].hist(time_df['std_move_time'], bins=30, color='green', edgecolor='white')
    axes[1].axvline(time_df['std_move_time'].mean(), color='red', linestyle='--', label=f"Mean: {time_df['std_move_time'].mean():.1f}s")
    axes[1].set_xlabel('Std Dev of Move Time (s)')
    axes[1].set_ylabel('Games')
    axes[1].set_title('Time Consistency Distribution')
    axes[1].legend()
    
    # Instant move rate per game
    instant_rate = time_df['instant_moves'] / time_df['total_moves']
    axes[2].hist(instant_rate, bins=20, color='orange', edgecolor='white')
    axes[2].axvline(instant_rate.mean(), color='red', linestyle='--', label=f"Mean: {instant_rate.mean():.1%}")
    axes[2].set_xlabel('Instant Move Rate')
    axes[2].set_ylabel('Games')
    axes[2].set_title('Instant Move Rate Distribution')
    axes[2].legend()
    
    plt.tight_layout()
    plt.show()