In [1]:
# 🎮 Pokémon Tournament Simulator
# Complete tournament simulation with skill-based outcomes for season-end projections

import pandas as pd
import numpy as np
import random
from typing import List, Dict, Set, Tuple, Union, Optional
from player_database import PlayerDatabase, Player, RatingZone
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict, Counter

# Set random seed for reproducibility (can change for different simulation runs)
random.seed(42)
np.random.seed(42)

print("🎮 Complete Tournament Simulator")
print("Built for running thousands of simulations to project season outcomes")
print("All logic in notebook for easy parameter tuning")

🎮 Complete Tournament Simulator
Built for running thousands of simulations to project season outcomes
All logic in notebook for easy parameter tuning


In [2]:
# 📊 Tournament Configuration
# Modify these parameters for different scenarios

TOURNAMENT_CONFIG = {
    'db_path': 'custom_tournament_players.db',  # NEW: Custom field composition
    'num_players': 3700,           # Total field size (all players in custom DB)
    'day1_rounds': 9,              # Day 1 Swiss rounds
    'day2_rounds': 4,              # Day 2 Swiss rounds
    'points_to_advance': 19,       # Points needed to advance to Day 2
    'skill_factor': 0.85,          # 0.0 = pure random, 1.0 = pure skill (INCREASED for more skill impact)
    'global_tie_rate': 0.08,       # Base tie rate (8%) - Much lower for decisive matches
    'num_simulations': 1,          # Number of tournaments to simulate
}

print("📊 Tournament Configuration:")
for key, value in TOURNAMENT_CONFIG.items():
    print(f"   {key}: {value}")
    
print(f"\n🎯 CUSTOM FIELD: Real players from all regions + low-CP NA fillers")
print(f"🌍 Field composition: All 600 NA + Top 50 EU + Top 50 LA + Top 10 OC + Top 3 MESA")
print(f"📈 Skill advantage: Higher CP players are more likely to win")
print(f"🎯 Simulation Focus: Running {TOURNAMENT_CONFIG['num_simulations']} tournament(s)")
print(f"💡 Change 'num_simulations' to 1000+ for season projections")

📊 Tournament Configuration:
   db_path: custom_tournament_players.db
   num_players: 3700
   day1_rounds: 9
   day2_rounds: 4
   points_to_advance: 19
   skill_factor: 0.85
   global_tie_rate: 0.08
   num_simulations: 1

🎯 CUSTOM FIELD: Real players from all regions + low-CP NA fillers
🌍 Field composition: All 600 NA + Top 50 EU + Top 50 LA + Top 10 OC + Top 3 MESA
📈 Skill advantage: Higher CP players are more likely to win
🎯 Simulation Focus: Running 1 tournament(s)
💡 Change 'num_simulations' to 1000+ for season projections


In [3]:
# 🎲 Load Player Database
# Load and prepare players for tournament simulation

def load_players_for_tournament(db_path: str, num_players: Optional[int] = None) -> pd.DataFrame:
    """Load players from database and convert to tournament format"""
    
    db = PlayerDatabase(db_path)
    all_players = db.load_all_players()
    
    if num_players is not None:
        players = all_players[:num_players]
    else:
        players = all_players
    
    print(f"Loading {len(players):,} players for tournament...")
    
    # Convert to tournament format
    players_data = {
        'player_id': [],
        'name': [],
        'rating_zone': [],
        'cp': [],
        'match_points': [],
        'wins': [],
        'losses': [],
        'ties': [],
        'opponents_played': [],
        'received_bye': [],
        'is_active': []
    }
    
    for player in players:
        players_data['player_id'].append(player.player_id)
        players_data['name'].append(player.name)
        players_data['rating_zone'].append(player.rating_zone.value)
        players_data['cp'].append(player.cp)
        players_data['match_points'].append(0)
        players_data['wins'].append(0)
        players_data['losses'].append(0)
        players_data['ties'].append(0)
        players_data['opponents_played'].append(set())
        players_data['received_bye'].append(False)
        players_data['is_active'].append(True)
    
    df = pd.DataFrame(players_data)
    
    # Show field composition
    zone_counts = df['rating_zone'].value_counts()
    print(f"\\nTournament Field Composition:")
    for zone in ['NA', 'EU', 'LATAM', 'OCE', 'MESA']:
        if zone in zone_counts.index:
            count = zone_counts[zone]
            percentage = count / len(df) * 100
            print(f"   {zone}: {count:,} players ({percentage:.1f}%)")
    
    # Show CP statistics
    cp_values = df['cp'].values
    print(f"\\nCP Statistics:")
    print(f"   Range: {cp_values.min():,} - {cp_values.max():,}")
    print(f"   Average: {cp_values.mean():.0f}")
    print(f"   Median: {np.median(cp_values):.0f}")
    
    return df

# Load the player base
base_players_df = load_players_for_tournament(
    TOURNAMENT_CONFIG['db_path'], 
    TOURNAMENT_CONFIG['num_players']
)


Loading 3,700 players for tournament...
\nTournament Field Composition:
   NA: 3,587 players (96.9%)
   EU: 50 players (1.4%)
   LATAM: 50 players (1.4%)
   OCE: 10 players (0.3%)
   MESA: 3 players (0.1%)
\nCP Statistics:
   Range: 50 - 2,020
   Average: 269
   Median: 220


In [4]:
# ⚔️ Match Simulation Engine
# Core functions for simulating individual matches and rounds

def calculate_skill_level(cp: int) -> float:
    """
    Convert CP to skill level with realistic curve matching actual player tiers
    
    Skill curve design based on your player analysis:
    - Very low (0-331 CP): Terrible skill (0.05-0.25) - steep penalty
    - Real (332-500 CP): Baseline skill (0.25-0.45) - gradual improvement  
    - Pretty good (501-700 CP): Solid skill (0.45-0.65) - competitive
    - Good (701-1000 CP): High skill (0.65-0.80) - very competitive with top
    - A little better (1001-1300 CP): Very high skill (0.80-0.90) - slight edge
    - Great (1400+ CP): Elite skill (0.90-1.0) - top tier but beatable
    
    Returns:
        Skill level between 0.05 and 1.0
    """
    
    if cp <= 331:
        # Very low skill - EXTREME penalty for fake players
        # These players should almost never succeed against real players
        return 0.01 + 0.14 * (cp / 331) ** 4  # 0.01 to 0.15, with EXTREME steep curve
    elif cp <= 500:
        # Real players baseline - significant jump from fake players
        progress = (cp - 332) / 168  # 0 to 1 over 332-500 range
        return 0.30 + 0.20 * progress  # 0.30 to 0.50 (bigger gap from fake players)
    elif cp <= 700:
        # Pretty good - solid competitive level
        progress = (cp - 501) / 199  # 0 to 1 over 501-700 range
        return 0.45 + 0.20 * progress  # 0.45 to 0.65
    elif cp <= 1000:
        # Good - high skill, very competitive with top players
        progress = (cp - 701) / 299  # 0 to 1 over 701-1000 range
        return 0.65 + 0.15 * progress  # 0.65 to 0.80
    elif cp <= 1300:
        # A little better - very high skill with slight edge
        progress = (cp - 1001) / 299  # 0 to 1 over 1001-1300 range
        return 0.80 + 0.10 * progress  # 0.80 to 0.90
    else:
        # Great - elite skill but still beatable
        excess = min(cp - 1301, 699)  # Cap progression at 2000 CP
        return 0.90 + 0.10 * (excess / 699)  # 0.90 to 1.0

def skill_based_match_simulation(player1_row: pd.Series, player2_row: pd.Series, 
                               global_tie_rate: float = 0.15, 
                               skill_factor: float = 0.50) -> str:
    """
    Simulate match with realistic skill consideration based on CP
    
    Args:
        player1_row: Pandas Series for player 1
        player2_row: Pandas Series for player 2
        global_tie_rate: Base tie rate
        skill_factor: How much skill affects win probability (0.0-1.0)
        
    Returns:
        Match outcome: 'player1_wins', 'player2_wins', or 'tie'
    """
    
    p1_cp = player1_row['cp']
    p2_cp = player2_row['cp']
    
    # Calculate skill levels using realistic curve
    p1_skill = calculate_skill_level(p1_cp)
    p2_skill = calculate_skill_level(p2_cp)
    
    # Calculate skill difference
    skill_diff = p1_skill - p2_skill
    
    # Convert skill difference to win probability
    # Base probability is 50/50, then adjusted by skill
    base_p1_win = 0.5
    
    # Apply skill factor - how much skill matters
    skill_advantage = skill_diff * skill_factor
    
    # Calculate final win probability
    p1_win_prob = base_p1_win + skill_advantage
    
    # Clamp to extreme bounds (very low CP players should almost never win)
    # More extreme bounds for larger skill gaps
    if abs(skill_diff) > 0.4:  # Large skill gap
        p1_win_prob = max(0.02, min(0.98, p1_win_prob))
    else:
        p1_win_prob = max(0.05, min(0.95, p1_win_prob))
    
    # Simulate outcome
    rand_val = random.random()
    
    if rand_val < global_tie_rate:
        return 'tie'
    elif rand_val < global_tie_rate + p1_win_prob * (1 - global_tie_rate):
        return 'player1_wins'
    else:
        return 'player2_wins'

def update_player_stats(players_df: pd.DataFrame, player_id: int, 
                       match_points: int, wins: int = 0, losses: int = 0, ties: int = 0,
                       opponent_id: int = None, received_bye: bool = False):
    """Update player statistics after a match"""
    idx = players_df[players_df['player_id'] == player_id].index[0]
    
    players_df.at[idx, 'match_points'] += match_points
    players_df.at[idx, 'wins'] += wins
    players_df.at[idx, 'losses'] += losses
    players_df.at[idx, 'ties'] += ties
    
    if opponent_id is not None:
        players_df.at[idx, 'opponents_played'].add(opponent_id)
    
    if received_bye:
        players_df.at[idx, 'received_bye'] = True

print("⚔️ Match simulation engine loaded")


⚔️ Match simulation engine loaded


In [5]:
# 🔄 Pairing & Round Simulation
# Swiss pairing system and round execution

def pair_round(players_df: pd.DataFrame) -> List[Tuple[Union[int, str], Union[int, str]]]:
    """
    Generate pairings for a Swiss round
    
    Returns:
        List of tuples: (player1_id, player2_id) or (player_id, 'BYE')
    """
    # Filter active players and sort by match points (descending), then by player_id
    active_players = players_df[players_df['is_active']].copy()
    active_players = active_players.sort_values(['match_points', 'player_id'], 
                                               ascending=[False, True])
    
    pairings = []
    paired_players = set()
    
    player_list = active_players['player_id'].tolist()
    
    # Pair players
    i = 0
    while i < len(player_list):
        if player_list[i] in paired_players:
            i += 1
            continue
            
        player1_id = player_list[i]
        player1_opponents = active_players[active_players['player_id'] == player1_id]['opponents_played'].iloc[0]
        
        # Find a valid opponent
        paired = False
        for j in range(i + 1, len(player_list)):
            player2_id = player_list[j]
            if player2_id in paired_players:
                continue
                
            # Check if they've played before
            if player2_id not in player1_opponents:
                pairings.append((player1_id, player2_id))
                paired_players.add(player1_id)
                paired_players.add(player2_id)
                paired = True
                break
        
        # If no valid opponent found, pair with next available (allowing rematch if necessary)
        if not paired:
            for j in range(i + 1, len(player_list)):
                player2_id = player_list[j]
                if player2_id not in paired_players:
                    pairings.append((player1_id, player2_id))
                    paired_players.add(player1_id)
                    paired_players.add(player2_id)
                    paired = True
                    break
        
        # If still not paired and no more players, give bye
        if not paired:
            pairings.append((player1_id, 'BYE'))
            paired_players.add(player1_id)
            paired = True
        
        i += 1
    
    return pairings

def simulate_round(players_df: pd.DataFrame, round_number: int, 
                  global_tie_rate: float = 0.15, skill_factor: float = 0.50, 
                  verbose: bool = True):
    """
    Simulate a complete Swiss round
    
    Args:
        players_df: Tournament players DataFrame
        round_number: Current round number
        global_tie_rate: Base tie rate
        skill_factor: How much CP affects outcomes
        verbose: Whether to print round details
    """
    if verbose:
        print(f"\\n--- Round {round_number} ---")
    
    # Generate pairings
    pairings = pair_round(players_df)
    if verbose:
        print(f"Generated {len(pairings)} pairings")
    
    match_count = 0
    bye_count = 0
    
    for pairing in pairings:
        if pairing[1] == 'BYE':
            # Handle bye
            player_id = pairing[0]
            update_player_stats(
                players_df, player_id, 
                match_points=3, wins=1, received_bye=True
            )
            bye_count += 1
            if verbose:
                print(f"  Player {player_id} receives a BYE")
        else:
            # Simulate match with skill consideration
            player1_id, player2_id = pairing
            p1_row = players_df[players_df['player_id'] == player1_id].iloc[0]
            p2_row = players_df[players_df['player_id'] == player2_id].iloc[0]
            
            outcome = skill_based_match_simulation(p1_row, p2_row, global_tie_rate, skill_factor)
            
            if outcome == 'player1_wins':
                update_player_stats(players_df, player1_id, match_points=3, wins=1, opponent_id=player2_id)
                update_player_stats(players_df, player2_id, match_points=0, losses=1, opponent_id=player1_id)
            elif outcome == 'player2_wins':
                update_player_stats(players_df, player1_id, match_points=0, losses=1, opponent_id=player2_id)
                update_player_stats(players_df, player2_id, match_points=3, wins=1, opponent_id=player1_id)
            else:  # tie
                update_player_stats(players_df, player1_id, match_points=1, ties=1, opponent_id=player2_id)
                update_player_stats(players_df, player2_id, match_points=1, ties=1, opponent_id=player1_id)
            
            match_count += 1
    
    if verbose:
        print(f"Completed {match_count} matches and {bye_count} byes")
        
        # Show current standings summary
        active_players = players_df[players_df['is_active']]
        points_distribution = active_players['match_points'].value_counts().sort_index(ascending=False)
        print("Current points distribution:")
        for points, count in points_distribution.head(10).items():
            print(f"  {points} points: {count} players")

print("🔄 Swiss pairing system loaded")


🔄 Swiss pairing system loaded


In [6]:
# 🏆 Complete Tournament Simulation
# Main tournament logic with Day 1 + Day 2 structure

def simulate_single_tournament(players_df: pd.DataFrame, config: dict, verbose: bool = True) -> dict:
    """
    Simulate a complete tournament (Day 1 + Day 2)
    
    Args:
        players_df: Fresh copy of players DataFrame
        config: Tournament configuration dictionary
        verbose: Whether to print detailed progress
        
    Returns:
        Dictionary with tournament results
    """
    
    tournament_id = f"tournament_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{random.randint(1000,9999)}"
    
    if verbose:
        print(f"🎮 TOURNAMENT SIMULATION: {tournament_id}")
        print("=" * 70)
        print(f"Players: {len(players_df):,}")
        print(f"Structure: Day 1 ({config['day1_rounds']} rounds) + Day 2 ({config['day2_rounds']} rounds)")
        print(f"Skill Factor: {config['skill_factor']} (higher = more skill influence)")
    
    # Day 1 Simulation
    if verbose:
        print(f"\\n🌅 DAY 1: {config['day1_rounds']} Swiss Rounds")
        print("=" * 40)
    
    for round_num in range(1, config['day1_rounds'] + 1):
        simulate_round(players_df, round_num, config['global_tie_rate'], config['skill_factor'], verbose)
    
    # Day 1 Cut
    day1_standings = players_df.sort_values(['match_points', 'wins'], ascending=[False, False])
    advancing_players = day1_standings[day1_standings['match_points'] >= config['points_to_advance']].copy()
    
    if verbose:
        print(f"\\n📊 DAY 1 RESULTS:")
        print(f"Players with {config['points_to_advance']}+ points advance to Day 2: {len(advancing_players)}")
        
        # Show Day 1 zone performance
        if len(advancing_players) > 0:
            print(f"\\n📈 DAY 1 ADVANCEMENT BY ZONE:")
            zone_stats = advancing_players.groupby('rating_zone').agg({
                'match_points': ['count', 'mean'],
                'cp': 'mean',
                'wins': 'mean'
            }).round(1)
            
            total_advancing = len(advancing_players)
            for zone in ['NA', 'EU', 'LATAM', 'OCE', 'MESA']:
                if zone in zone_stats.index:
                    stats = zone_stats.loc[zone]
                    count = int(stats['match_points']['count'])
                    avg_pts = stats['match_points']['mean']
                    avg_cp = stats['cp']['mean']
                    avg_wins = stats['wins']['mean']
                    percentage = count / total_advancing * 100
                    print(f"  {zone}: {count} players ({percentage:.1f}%) | "
                          f"Avg: {avg_pts:.1f} pts, {avg_cp:.0f} CP, {avg_wins:.1f} wins")
    
    # Mark non-advancing players as inactive
    players_df.loc[players_df['match_points'] < config['points_to_advance'], 'is_active'] = False
    
    if len(advancing_players) == 0:
        if verbose:
            print("❌ No players advanced to Day 2!")
        return {'error': 'No advancement to Day 2', 'tournament_id': tournament_id}
    
    # Day 2 Simulation
    if verbose:
        print(f"\\n🌆 DAY 2: {config['day2_rounds']} Swiss Rounds")
        print("=" * 40)
        print(f"Starting with {len(advancing_players)} players")
    
    for round_num in range(config['day1_rounds'] + 1, config['day1_rounds'] + config['day2_rounds'] + 1):
        simulate_round(players_df, round_num, config['global_tie_rate'], config['skill_factor'], verbose)
    
    # Final standings
    final_standings = players_df[players_df['is_active']].sort_values(
        ['match_points', 'wins'], ascending=[False, False]
    ).reset_index(drop=True)
    
    if verbose and len(final_standings) > 0:
        print(f"\\n🏆 FINAL STANDINGS:")
        champion = final_standings.iloc[0]
        print(f"Champion: {champion['name']} ({champion['rating_zone']}, {champion['cp']} CP)")
        print(f"Record: {champion['wins']}-{champion['losses']}-{champion['ties']} ({champion['match_points']} pts)")
        
        # Top Cut Analysis
        print(f"\\n🏅 TOP CUT ANALYSIS:")
        for cut_size in [8, 16, 32, 64]:
            if cut_size <= len(final_standings):
                top_cut = final_standings.head(cut_size)
                intl_count = len(top_cut[top_cut['rating_zone'] != 'NA'])
                na_count = cut_size - intl_count
                intl_pct = intl_count / cut_size * 100
                print(f"  Top {cut_size:2d}: {na_count:2d} NA ({100-intl_pct:4.1f}%) | "
                      f"{intl_count:2d} International ({intl_pct:4.1f}%)")
    
    return {
        'tournament_id': tournament_id,
        'final_standings': final_standings,
        'day1_advancement': len(advancing_players),
        'champion': final_standings.iloc[0].to_dict() if len(final_standings) > 0 else None,
        'total_players': len(players_df)
    }

print("🏆 Tournament simulation engine loaded")


🏆 Tournament simulation engine loaded


In [7]:
# 🔄 Multi-Tournament Simulation & Analysis
# Run many tournaments and aggregate results for season projections

def run_multiple_tournaments(base_players_df: pd.DataFrame, config: dict) -> dict:
    """
    Run multiple tournament simulations and aggregate results
    
    Args:
        base_players_df: Clean player data to copy for each simulation
        config: Tournament configuration
        
    Returns:
        Aggregated results across all simulations
    """
    
    num_sims = config['num_simulations']
    print(f"🔄 Running {num_sims} tournament simulation(s)...")
    print(f"⚙️  Skill Factor: {config['skill_factor']}")
    print(f"👥 Field Size: {len(base_players_df):,} players")
    print()
    
    all_results = []
    champion_history = []
    zone_performance = defaultdict(list)
    placement_history = defaultdict(list)  # Track each player's placements
    
    for sim_num in range(num_sims):
        # Create fresh copy of players for this simulation
        players_df = base_players_df.copy()
        
        # Reset all tournament stats
        players_df['match_points'] = 0
        players_df['wins'] = 0 
        players_df['losses'] = 0
        players_df['ties'] = 0
        players_df['opponents_played'] = players_df['opponents_played'].apply(lambda x: set())
        players_df['received_bye'] = False
        players_df['is_active'] = True
        
        # Run tournament
        verbose = (num_sims <= 5)  # Only show details for small runs
        if verbose:
            print(f"\\n{'='*20} SIMULATION {sim_num + 1}/{num_sims} {'='*20}")
        elif sim_num % max(1, num_sims // 10) == 0:
            print(f"Completed {sim_num}/{num_sims} simulations...")
            
        result = simulate_single_tournament(players_df, config, verbose)
        
        if 'error' not in result:
            all_results.append(result)
            
            # Track champion
            champion = result['champion']
            champion_history.append({
                'name': champion['name'],
                'rating_zone': champion['rating_zone'],
                'cp': champion['cp'],
                'simulation': sim_num + 1
            })
            
            # Track zone performance in top cuts
            final_standings = result['final_standings']
            for cut_size in [8, 16, 32]:
                if cut_size <= len(final_standings):
                    top_cut = final_standings.head(cut_size)
                    zone_counts = top_cut['rating_zone'].value_counts()
                    for zone in ['NA', 'EU', 'LATAM', 'OCE', 'MESA']:
                        count = zone_counts.get(zone, 0)
                        percentage = count / cut_size * 100
                        zone_performance[f'{zone}_top{cut_size}'].append(percentage)
            
            # Track individual player placements (for top players)
            for idx, row in final_standings.head(100).iterrows():
                placement_history[row['player_id']].append(idx + 1)
    
    print(f"\\n✅ Completed {len(all_results)} successful tournaments!")
    
    return {
        'results': all_results,
        'champion_history': champion_history,
        'zone_performance': zone_performance,
        'placement_history': placement_history,
        'config': config
    }

def analyze_simulation_results(aggregated_results: dict):
    """
    Analyze and display results from multiple tournament simulations
    """
    
    results = aggregated_results['results']
    champion_history = aggregated_results['champion_history']
    zone_performance = aggregated_results['zone_performance']
    config = aggregated_results['config']
    
    num_sims = len(results)
    
    print(f"\\n📊 SEASON PROJECTION ANALYSIS")
    print(f"Based on {num_sims} tournament simulation(s)")
    print("=" * 60)
    
    # Champion Analysis
    print(f"\\n🏆 CHAMPIONSHIP PREDICTIONS:")
    if champion_history:
        champion_df = pd.DataFrame(champion_history)
        
        # Most frequent champions
        champion_counts = champion_df['name'].value_counts()
        print(f"Most likely champions:")
        for i, (name, count) in enumerate(champion_counts.head(10).items()):
            percentage = count / num_sims * 100
            zone = champion_df[champion_df['name'] == name]['rating_zone'].iloc[0]
            avg_cp = champion_df[champion_df['name'] == name]['cp'].mean()
            print(f"  {i+1:2d}. {name} ({zone}): {count}/{num_sims} ({percentage:.1f}%) | Avg CP: {avg_cp:.0f}")
        
        # Zone championship distribution
        print(f"\\n🌍 CHAMPIONSHIP BY ZONE:")
        zone_champions = champion_df['rating_zone'].value_counts()
        for zone in ['NA', 'EU', 'LATAM', 'OCE', 'MESA']:
            if zone in zone_champions.index:
                count = zone_champions[zone]
                percentage = count / num_sims * 100
                print(f"  {zone}: {count}/{num_sims} ({percentage:.1f}%)")
    
    # Top Cut Analysis
    if zone_performance and num_sims > 1:
        print(f"\\n🏅 TOP CUT PROJECTIONS (Average %):")
        for cut_size in [8, 16, 32]:
            print(f"\\n  Top {cut_size}:")
            for zone in ['NA', 'EU', 'LATAM', 'OCE', 'MESA']:
                key = f'{zone}_top{cut_size}'
                if key in zone_performance and zone_performance[key]:
                    avg_pct = np.mean(zone_performance[key])
                    std_pct = np.std(zone_performance[key])
                    print(f"    {zone}: {avg_pct:.1f}% ± {std_pct:.1f}%")
    
    # Player consistency analysis (if multiple simulations)
    if num_sims > 1:
        placement_history = aggregated_results['placement_history']
        if placement_history:
            print(f"\\n⭐ MOST CONSISTENT PERFORMERS:")
            consistency_data = []
            for player_id, placements in placement_history.items():
                if len(placements) >= max(3, num_sims * 0.3):  # Player made top 100 in at least 30% of sims
                    avg_placement = np.mean(placements)
                    std_placement = np.std(placements)
                    consistency_data.append({
                        'player_id': player_id,
                        'avg_placement': avg_placement,
                        'std_placement': std_placement,
                        'appearances': len(placements)
                    })
            
            if consistency_data:
                consistency_df = pd.DataFrame(consistency_data)
                # Sort by average placement
                top_consistent = consistency_df.nsmallest(20, 'avg_placement')
                
                print("Top 20 most consistent high performers:")
                for idx, row in top_consistent.iterrows():
                    player_name = base_players_df[base_players_df['player_id'] == row['player_id']]['name'].iloc[0]
                    player_zone = base_players_df[base_players_df['player_id'] == row['player_id']]['rating_zone'].iloc[0]
                    print(f"  {player_name} ({player_zone}): Avg {row['avg_placement']:.1f} ± {row['std_placement']:.1f} ({row['appearances']}/{num_sims} appearances)")

print("🔄 Multi-tournament analysis system loaded")


🔄 Multi-tournament analysis system loaded


In [8]:
# 🚀 RUN SIMULATION
# Execute tournament simulation(s) based on configuration

# Modify TOURNAMENT_CONFIG above to change parameters before running

print(f"🚀 STARTING TOURNAMENT SIMULATION")
print(f"Configuration: {TOURNAMENT_CONFIG['num_simulations']} simulation(s) with skill factor {TOURNAMENT_CONFIG['skill_factor']}")
print()

# Run the simulation(s)
aggregated_results = run_multiple_tournaments(base_players_df, TOURNAMENT_CONFIG)

# Analyze results
analyze_simulation_results(aggregated_results)

print(f"\\n✅ Simulation complete!")
print(f"💡 To run more simulations:")
print(f"   1. Change TOURNAMENT_CONFIG['num_simulations'] to 1000+ for season projections")
print(f"   2. Adjust 'skill_factor' to test different scenarios (0.0-1.0)")
print(f"   3. Re-run this cell")


🚀 STARTING TOURNAMENT SIMULATION
Configuration: 1 simulation(s) with skill factor 0.85

🔄 Running 1 tournament simulation(s)...
⚙️  Skill Factor: 0.85
👥 Field Size: 3,700 players

🎮 TOURNAMENT SIMULATION: tournament_20250608_163734_2824
Players: 3,700
Structure: Day 1 (9 rounds) + Day 2 (4 rounds)
Skill Factor: 0.85 (higher = more skill influence)
\n🌅 DAY 1: 9 Swiss Rounds
\n--- Round 1 ---
Generated 1850 pairings
Completed 1850 matches and 0 byes
Current points distribution:
  3 points: 1715 players
  1 points: 270 players
  0 points: 1715 players
\n--- Round 2 ---
Generated 1850 pairings
Completed 1850 matches and 0 byes
Current points distribution:
  6 points: 791 players
  4 points: 254 players
  3 points: 1593 players
  2 points: 26 players
  1 points: 236 players
  0 points: 800 players
\n--- Round 3 ---
Generated 1850 pairings
Completed 1850 matches and 0 byes
Current points distribution:
  9 points: 365 players
  7 points: 176 players
  6 points: 1097 players
  5 points: 33 pla

In [9]:
# 🏆 TOURNAMENT STANDINGS DISPLAY
# Show detailed standings from the most recent tournament

def show_tournament_standings(results_dict, num_display: int = 50):
    """
    Display nice tournament standings table
    
    Args:
        results_dict: Results from a single tournament simulation
        num_display: Number of top players to show
    """
    
    if not results_dict or 'final_standings' not in results_dict:
        print("❌ No tournament results available. Run a simulation first!")
        return
    
    final_standings = results_dict['final_standings']
    
    print(f"🏆 TOURNAMENT STANDINGS")
    print("=" * 100)
    print(f"Tournament ID: {results_dict['tournament_id']}")
    print(f"Total Players: {len(final_standings):,}")
    print(f"Day 1 Advancement: {results_dict['day1_advancement']} players made Day 2")
    
    # Header
    print(f"\n{'Rank':<6} {'Name':<25} {'Zone':<8} {'CP':<6} {'Record':<12} {'Points':<8} {'Status'}")
    print("-" * 100)
    
    # Display standings
    for i in range(min(num_display, len(final_standings))):
        row = final_standings.iloc[i]
        rank = i + 1
        record = f"{row['wins']}-{row['losses']}-{row['ties']}"
        
        # Status indicators
        if rank == 1:
            status = "🥇 CHAMPION"
        elif rank <= 8:
            status = "🏆 Top Cut"
        elif rank <= 16:
            status = "⭐ Top 16"
        elif rank <= 32:
            status = "💫 Top 32"
        elif row['match_points'] >= 19:
            status = "✅ Made Day 2"
        else:
            status = "❌ Day 1 Exit"
        
        # Truncate long names
        name = row['name'][:24] if len(row['name']) > 24 else row['name']
        
        print(f"{rank:<6} {name:<25} {row['rating_zone']:<8} {row['cp']:<6} {record:<12} {row['match_points']:<8} {status}")
    
    if len(final_standings) > num_display:
        print(f"\n... and {len(final_standings) - num_display:,} more players")
    
    # Summary stats
    print(f"\n📊 TOURNAMENT SUMMARY:")
    print("-" * 50)
    
    # Champion details
    champion = final_standings.iloc[0]
    print(f"🥇 Champion: {champion['name']} ({champion['rating_zone']})")
    print(f"   CP: {champion['cp']:,} | Record: {champion['wins']}-{champion['losses']}-{champion['ties']} ({champion['match_points']} pts)")
    
    # Zone performance in top cuts
    print(f"\n🌍 Zone Performance:")
    total_intl = 0
    total_na = 0
    
    for cut_size in [8, 16, 32]:
        if cut_size <= len(final_standings):
            top_cut = final_standings.head(cut_size)
            zone_counts = top_cut['rating_zone'].value_counts()
            
            # Calculate international vs NA
            intl_zones = ['EU', 'LATAM', 'OCE', 'MESA']
            intl_count = sum(zone_counts.get(zone, 0) for zone in intl_zones)
            na_count = zone_counts.get('NA', 0)
            intl_pct = intl_count / cut_size * 100
            
            if cut_size == 8:
                total_intl = intl_count
                total_na = na_count
            
            print(f"   Top {cut_size:2d}: {na_count:2d} NA, {intl_count:2d} International ({intl_pct:.1f}%)")
            
            # Show zone breakdown for Top 8
            if cut_size == 8 and intl_count > 0:
                print(f"           International breakdown:", end="")
                for zone in intl_zones:
                    count = zone_counts.get(zone, 0)
                    if count > 0:
                        print(f" {zone}:{count}", end="")
                print()
    
    # Real vs Fake player performance (based on CP threshold)
    real_threshold = 332  # CP threshold between real and fake players
    real_players_standings = final_standings[final_standings['cp'] >= real_threshold]
    fake_players_standings = final_standings[final_standings['cp'] < real_threshold]
    
    print(f"\n🎯 Real vs Fake Player Performance:")
    print(f"   Real players in Top 32: {len(real_players_standings.head(32))} / 32")
    print(f"   Real players in Top 100: {len(real_players_standings.head(100))} / 100")
    print(f"   Highest fake player rank: {len(final_standings[final_standings['cp'] >= real_threshold]) + 1}")
    
    return final_standings

def show_latest_standings(num_display: int = 50):
    """Show standings from the most recent tournament"""
    
    if 'aggregated_results' not in globals():
        print("❌ No simulation results found!")
        print("💡 Run the simulation cell first, then call: show_latest_standings()")
        return
    
    results = aggregated_results['results']
    if not results:
        print("❌ No tournament results available!")
        return
    
    # Show the latest tournament
    latest_result = results[-1]
    return show_tournament_standings(latest_result, num_display)

print("🏆 Tournament standings display functions loaded")
print("💡 After running simulation, use: show_latest_standings() to see detailed standings")


🏆 Tournament standings display functions loaded
💡 After running simulation, use: show_latest_standings() to see detailed standings


In [10]:
# 🧪 QUICK PARAMETER EXPERIMENTS
# Easy way to test different scenarios without modifying the main config

# Example: Compare different skill factors
def compare_skill_factors():
    """Compare tournament outcomes under different skill factors"""
    
    skill_factors_to_test = [0.2, 0.5, 0.8]  # Low, medium, high skill influence
    comparison_results = {}
    
    for skill_factor in skill_factors_to_test:
        print(f"\\n{'='*50}")
        print(f"TESTING SKILL FACTOR: {skill_factor}")
        print(f"{'='*50}")
        
        # Create temporary config
        temp_config = TOURNAMENT_CONFIG.copy()
        temp_config['skill_factor'] = skill_factor
        temp_config['num_simulations'] = 10  # Quick test with 10 simulations
        
        # Run simulation
        results = run_multiple_tournaments(base_players_df, temp_config)
        comparison_results[skill_factor] = results
        
        # Quick analysis
        champion_history = results['champion_history']
        if champion_history:
            champion_df = pd.DataFrame(champion_history)
            intl_champions = len(champion_df[champion_df['rating_zone'] != 'NA'])
            intl_pct = intl_champions / len(champion_df) * 100
            print(f"\\n🏆 Quick Summary for skill_factor={skill_factor}:")
            print(f"   International champions: {intl_champions}/10 ({intl_pct:.0f}%)")
            
            # Top 3 most frequent champions
            top_champions = champion_df['name'].value_counts().head(3)
            print(f"   Most frequent champions:")
            for name, count in top_champions.items():
                zone = champion_df[champion_df['name'] == name]['rating_zone'].iloc[0]
                print(f"     {name} ({zone}): {count}/10")
    
    return comparison_results

# Uncomment the line below to run skill factor comparison:
# comparison_results = compare_skill_factors()

print("🧪 Parameter experiment functions loaded")
print("💡 Uncomment 'comparison_results = compare_skill_factors()' above to test different skill levels")


🧪 Parameter experiment functions loaded
💡 Uncomment 'comparison_results = compare_skill_factors()' above to test different skill levels


In [11]:
show_latest_standings(256)

🏆 TOURNAMENT STANDINGS
Tournament ID: tournament_20250608_163734_2824
Total Players: 381
Day 1 Advancement: 381 players made Day 2

Rank   Name                      Zone     CP     Record       Points   Status
----------------------------------------------------------------------------------------------------
1      Jonah Alter               MESA     762    12-0-1       37       🥇 CHAMPION
2      Brent Tonisson            OCE      2020   11-1-1       34       🏆 Top Cut
3      Isabella Campbell         NA       328    11-1-1       34       🏆 Top Cut
4      Nathan Ramos              NA       251    11-1-1       34       🏆 Top Cut
5      Connor Jackson            NA       134    11-1-1       34       🏆 Top Cut
6      Lily Walker316            NA       161    11-1-1       34       🏆 Top Cut
7      Daniel X. Sanders         NA       200    11-1-1       34       🏆 Top Cut
8      Rahul Reddy               NA       1525   11-2-0       33       🏆 Top Cut
9      Julian Gort-Barry         NA     

Unnamed: 0,player_id,name,rating_zone,cp,match_points,wins,losses,ties,opponents_played,received_bye,is_active
0,712,Jonah Alter,MESA,762,37,12,0,1,"{771, 1220, 3395, 711, 713, 715, 2797, 912, 13...",False,True
1,701,Brent Tonisson,OCE,2020,34,11,1,1,"{704, 997, 712, 909, 656, 625, 786, 629, 696, ...",False,True
2,1867,Isabella Campbell,,328,34,11,1,1,"{1920, 2080, 1924, 1862, 1865, 1898, 1868, 190...",False,True
3,2080,Nathan Ramos,,251,34,11,1,1,"{2081, 3395, 1924, 2025, 1867, 2061, 2004, 207...",False,True
4,2797,Connor Jackson,,134,34,11,1,1,"{1442, 231, 712, 2795, 2701, 2798, 2990, 3152,...",False,True
...,...,...,...,...,...,...,...,...,...,...,...
376,526,Gregory Lim,,351,19,6,6,1,"{544, 549, 486, 524, 525, 398, 527, 528, 400, ...",False,True
377,971,Wyatt Young,,70,19,6,6,1,"{963, 968, 936, 970, 1033, 972, 974, 976, 978,...",False,True
378,1255,Landon Baker,,203,19,6,6,1,"{1251, 1284, 1253, 1256, 1257, 1258, 1195, 126...",False,True
379,1842,Nathaniel Nelson,,228,19,6,6,1,"{1891, 1829, 1575, 1801, 1836, 1742, 1839, 184...",False,True


In [12]:
# Test if the new skill function is loaded:
try:
    skill_50 = calculate_skill_level(50)
    skill_1600 = calculate_skill_level(1600)
    
    print(f"✅ New skill function loaded!")
    print(f"50 CP skill level: {skill_50:.3f} (should be ~0.106)")
    print(f"1600 CP skill level: {skill_1600:.3f} (should be ~0.879)")
    
    # Test win probability
    from pandas import Series
    p1 = Series({'cp': 50})
    p2 = Series({'cp': 1600})
    
    # Simulate 100 matches
    wins = 0
    for i in range(100):
        result = skill_based_match_simulation(p1, p2, skill_factor=0.70)
        if result == 'player1_wins':
            wins += 1
    
    win_rate = wins / 100
    print(f"50 CP vs 1600 CP win rate: {win_rate:.1%} (should be ~10-15%)")
    
    if win_rate < 0.25:
        print("🎉 NEW SYSTEM IS WORKING!")
    else:
        print("⚠️  Still using old system - win rate too high")
        
except NameError:
    print("❌ New skill function NOT loaded!")
    print("💡 You need to re-run Cell 3 (Match Simulation Engine)")

✅ New skill function loaded!
50 CP skill level: 0.010 (should be ~0.106)
1600 CP skill level: 0.943 (should be ~0.879)
50 CP vs 1600 CP win rate: 1.0% (should be ~10-15%)
🎉 NEW SYSTEM IS WORKING!
