<a href="https://colab.research.google.com/github/allwin107/Fantasy-Team-Simulation-using-Player-Selection-Probabilities/blob/main/Fantasy_Team_Simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Analyzing the sample player data to understand its structure, roles, and selection probabilities for further processing.

In [None]:
import pandas as pd
import numpy as np

# Try to load the player data sample
try:
    df = pd.read_csv("player_data_sample.csv")
    print("Player data loaded successfully!")
    print(f"Dataset shape: {df.shape}")
    print("\nColumn names:")
    print(df.columns.tolist())
    print("\nFirst few rows:")
    print(df.head())
    print("\nData types:")
    print(df.dtypes)
    print("\nRole distribution:")
    print(df['role'].value_counts())
    print("\nTeam distribution:")
    print(df['team'].value_counts())
    print("\nSelection probability statistics:")
    print(df['percselection'].describe())
except Exception as e:
    print(f"Error loading data: {e}")

Player data loaded successfully!
Dataset shape: (22, 8)

Column names:
['match_code', 'player_code', 'player_name', 'role', 'team', 'perc_selection', 'perc_captain', 'perc_vice_captain']

First few rows:
   match_code  player_code player_name        role team  perc_selection  \
0       10001            1   Player_20          WK    A          0.3957   
1       10001            2    Player_2          WK    A          0.2131   
2       10001            3    Player_7  Allrounder    B          0.9191   
3       10001            4   Player_12  Allrounder    A          0.9522   
4       10001            5   Player_10  Allrounder    A          0.2145   

   perc_captain  perc_vice_captain  
0        0.0282             0.0285  
1        0.0052             0.0071  
2        0.2362             0.1779  
3        0.2945             0.2039  
4        0.0104             0.0151  

Data types:
match_code             int64
player_code            int64
player_name           object
role                  o

Analyzing detailed player roles, selection probabilities, and total team composition rules to accurately simulate and generate teams.


In [None]:
# Let's analyze the data more carefully
print("Selection probability statistics:")
print(df['perc_selection'].describe())

print("\nPlayers by role and their selection probabilities:")
for role in df['role'].unique():
    role_players = df[df['role'] == role]
    print(f"\n{role} players:")
    for _, player in role_players.iterrows():
        print(f"  {player['player_name']} (Team {player['team']}): {player['perc_selection']:.4f}")

print("\nSummary of problem understanding:")
print("="*50)
print("1. Need to simulate 20,000 fantasy cricket teams")
print("2. Each team must have exactly 11 players")
print("3. Team composition rules:")
print("   - At least 1 Batsman")
print("   - At least 1 Bowler")
print("   - At least 1 WK (Wicket Keeper)")
print("   - At least 1 Allrounder")
print("4. Player selection should match their perc_selection probabilities")
print("5. Expected frequency = perc_selection * 20,000 teams")
print("6. Need to output teamdf.csv with specified columns")
print("7. Need to achieve maximum accuracy (minimize error between expected and actual selection frequencies)")

Selection probability statistics:
count    22.000000
mean      0.474950
std       0.302204
min       0.028300
25%       0.257925
50%       0.378750
75%       0.685975
max       0.952200
Name: perc_selection, dtype: float64

Players by role and their selection probabilities:

WK players:
  Player_20 (Team A): 0.3957
  Player_2 (Team A): 0.2131

Allrounder players:
  Player_7 (Team B): 0.9191
  Player_12 (Team A): 0.9522
  Player_10 (Team A): 0.2145
  Player_8 (Team A): 0.5682
  Player_22 (Team B): 0.8919
  Player_11 (Team B): 0.2709

Batsman players:
  Player_21 (Team A): 0.6935
  Player_3 (Team A): 0.0283
  Player_18 (Team B): 0.4950
  Player_4 (Team B): 0.6590
  Player_9 (Team A): 0.3618
  Player_14 (Team A): 0.2536
  Player_17 (Team B): 0.9360
  Player_19 (Team B): 0.3518

Bowler players:
  Player_1 (Team B): 0.6634
  Player_13 (Team A): 0.1068
  Player_5 (Team B): 0.8348
  Player_16 (Team B): 0.3060
  Player_6 (Team B): 0.0330
  Player_15 (Team A): 0.3003

Summary of problem underst

Setting up the simulation process to generate teams that reflect the specified player selection probabilities and role constraints.


In [None]:
# Fantasy Team Simulation Solution
import pandas as pd
import numpy as np
import random
from collections import defaultdict, Counter
import itertools

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Load player data
df = pd.read_csv("player_data_sample.csv")

# Organize players by role
players_by_role = {
    'WK': df[df['role'] == 'WK'].to_dict('records'),
    'Batsman': df[df['role'] == 'Batsman'].to_dict('records'),
    'Bowler': df[df['role'] == 'Bowler'].to_dict('records'),
    'Allrounder': df[df['role'] == 'Allrounder'].to_dict('records')
}

# Display role information
for role, players in players_by_role.items():
    print(f"{role}: {len(players)} players")
    for player in players:
        print(f"  {player['player_name']} - {player['perc_selection']:.4f}")
    print()

NUM_TEAMS = 20000
TEAM_SIZE = 11

print(f"Goal: Generate {NUM_TEAMS} teams of {TEAM_SIZE} players each")
print("Constraint: Each team must have at least 1 player from each role")
print("Objective: Match selection probabilities as closely as possible")

WK: 2 players
  Player_20 - 0.3957
  Player_2 - 0.2131

Batsman: 8 players
  Player_21 - 0.6935
  Player_3 - 0.0283
  Player_18 - 0.4950
  Player_4 - 0.6590
  Player_9 - 0.3618
  Player_14 - 0.2536
  Player_17 - 0.9360
  Player_19 - 0.3518

Bowler: 6 players
  Player_1 - 0.6634
  Player_13 - 0.1068
  Player_5 - 0.8348
  Player_16 - 0.3060
  Player_6 - 0.0330
  Player_15 - 0.3003

Allrounder: 6 players
  Player_7 - 0.9191
  Player_12 - 0.9522
  Player_10 - 0.2145
  Player_8 - 0.5682
  Player_22 - 0.8919
  Player_11 - 0.2709

Goal: Generate 20000 teams of 11 players each
Constraint: Each team must have at least 1 player from each role
Objective: Match selection probabilities as closely as possible


Implementing an advanced algorithm to generate fantasy teams that closely match player selection probabilities and role constraints.


In [None]:
# Advanced team generation algorithm with accuracy optimization

def generate_teams_optimized(players_by_role, num_teams=20000, team_size=11):
    """
    Generate fantasy teams with optimized accuracy to match selection probabilities
    """

    # Create master player list with probabilities
    all_players = []
    for role_players in players_by_role.values():
        all_players.extend(role_players)

    # Calculate expected selections for each player
    expected_selections = {}
    for player in all_players:
        expected_selections[player['player_code']] = player['perc_selection'] * num_teams

    # Track actual selections
    actual_selections = defaultdict(int)
    teams = []

    print("Starting team generation...")

    for team_id in range(1, num_teams + 1):
        if team_id % 2000 == 0:
            print(f"Generated {team_id} teams...")

        team_players = []
        used_player_codes = set()

        # Step 1: Select at least one player from each role (mandatory constraint)
        for role, role_players in players_by_role.items():
            # Calculate selection weights considering both probability and current deficit
            weights = []
            available_players = []

            for player in role_players:
                if player['player_code'] not in used_player_codes:
                    # Base weight from selection probability
                    base_weight = player['perc_selection']

                    # Adjustment factor based on how far we are from expected count
                    expected = expected_selections[player['player_code']]
                    current = actual_selections[player['player_code']]
                    progress = team_id / num_teams
                    expected_so_far = expected * progress

                    # Boost weight if player is underselected, reduce if overselected
                    adjustment = 1.0
                    if expected_so_far > 0:
                        deficit_ratio = (expected_so_far - current) / expected_so_far
                        adjustment = max(0.1, 1.0 + deficit_ratio * 2.0)  # Reasonable bounds

                    final_weight = base_weight * adjustment
                    weights.append(max(0.001, final_weight))  # Ensure positive weight
                    available_players.append(player)

            # Select player using weighted random selection
            if available_players and weights:
                selected_player = np.random.choice(available_players, p=np.array(weights)/sum(weights))
                team_players.append(selected_player)
                used_player_codes.add(selected_player['player_code'])
                actual_selections[selected_player['player_code']] += 1

        # Step 2: Fill remaining spots (team_size - 4 = 7 more players)
        remaining_spots = team_size - len(team_players)

        for _ in range(remaining_spots):
            # Get available players (not already in team)
            available_players = [p for p in all_players if p['player_code'] not in used_player_codes]

            if not available_players:
                break  # Shouldn't happen with our player pool

            # Calculate weights with deficit adjustment
            weights = []
            for player in available_players:
                base_weight = player['perc_selection']

                expected = expected_selections[player['player_code']]
                current = actual_selections[player['player_code']]
                progress = team_id / num_teams
                expected_so_far = expected * progress

                adjustment = 1.0
                if expected_so_far > 0:
                    deficit_ratio = (expected_so_far - current) / expected_so_far
                    adjustment = max(0.1, 1.0 + deficit_ratio * 1.5)

                final_weight = base_weight * adjustment
                weights.append(max(0.001, final_weight))

            # Select player
            selected_player = np.random.choice(available_players, p=np.array(weights)/sum(weights))
            team_players.append(selected_player)
            used_player_codes.add(selected_player['player_code'])
            actual_selections[selected_player['player_code']] += 1

        # Store team
        teams.append({
            'team_id': team_id,
            'players': team_players
        })

    print(f"Generated {len(teams)} teams successfully!")
    return teams, actual_selections, expected_selections

# Generate teams
teams, actual_selections, expected_selections = generate_teams_optimized(players_by_role)

Starting team generation...
Generated 2000 teams...
Generated 4000 teams...
Generated 6000 teams...
Generated 8000 teams...
Generated 10000 teams...
Generated 12000 teams...
Generated 14000 teams...
Generated 16000 teams...
Generated 18000 teams...
Generated 20000 teams...
Generated 20000 teams successfully!


Transforming the generated teams into the required CSV format and verifying team composition constraints.


In [None]:
# Convert teams to the required DataFrame format
def create_team_dataframe(teams, match_code=10001):
    """
    Convert team data to the required CSV format
    """
    rows = []

    for team in teams:
        team_id = team['team_id']
        for player in team['players']:
            row = {
                'match_code': match_code,
                'player_code': player['player_code'],
                'player_name': player['player_name'],
                'role': player['role'],
                'team': player['team'],
                'perc_selection': player['perc_selection'],
                'team_id': team_id
            }
            rows.append(row)

    return pd.DataFrame(rows)

# Create the team DataFrame
print("Creating team DataFrame...")
team_df = create_team_dataframe(teams)

print(f"Team DataFrame shape: {team_df.shape}")
print(f"Expected shape: ~{20000 * 11} rows")
print("\nFirst few rows:")
print(team_df.head(15))

print(f"\nUnique teams: {team_df['team_id'].nunique()}")
print(f"Unique players: {team_df['player_code'].nunique()}")

# Verify team composition constraints
print("\nVerifying team composition constraints:")
role_check = team_df.groupby('team_id')['role'].nunique()
teams_with_all_roles = (role_check == 4).sum()
print(f"Teams with all 4 roles: {teams_with_all_roles}/{len(role_check)}")

if teams_with_all_roles < len(role_check):
    problem_teams = role_check[role_check < 4]
    print(f"Teams missing roles: {len(problem_teams)}")
    for team_id, role_count in problem_teams.head().items():
        print(f"  Team {team_id} has only {role_count} roles")

# Save to CSV
team_df.to_csv('teamdf.csv', index=False)
print(f"\nSaved team data to 'teamdf.csv'")
print(f"File contains {len(team_df)} rows")

Creating team DataFrame...
Team DataFrame shape: (220000, 7)
Expected shape: ~220000 rows

First few rows:
    match_code  player_code player_name        role team  perc_selection  \
0        10001            1   Player_20          WK    A          0.3957   
1        10001           16   Player_19     Batsman    B          0.3518   
2        10001           20   Player_16      Bowler    B          0.3060   
3        10001            6    Player_8  Allrounder    A          0.5682   
4        10001           11   Player_18     Batsman    B          0.4950   
5        10001           12    Player_4     Batsman    B          0.6590   
6        10001            9   Player_21     Batsman    A          0.6935   
7        10001            7   Player_22  Allrounder    B          0.8919   
8        10001           22   Player_15      Bowler    A          0.3003   
9        10001            3    Player_7  Allrounder    B          0.9191   
10       10001            2    Player_2          WK    A 

Assessing the generated teams' accuracy by comparing actual player selections to expected probabilities.


In [None]:
# Implement the accuracy evaluation function
def evaluate_team_accuracy(team_df):
    """
    Evaluate the accuracy of team generation against expected selection probabilities
    """
    print("Evaluating Fantasy Team Accuracy...")
    print(f"teamdf shape: {team_df.shape}")

    total_teams = team_df['team_id'].nunique()
    total_players = team_df['player_code'].nunique()

    print(f"Total unique teams: {total_teams}")
    print(f"Total unique players: {total_players}")

    # Check role composition
    roles_per_team = team_df.groupby('team_id')['role'].nunique()
    missing_role_teams = (roles_per_team < 4).sum()
    print(f"Teams missing at least one role: {missing_role_teams}")

    # Get player reference data (unique player info)
    player_ref = team_df.drop_duplicates(subset=['player_code'])[
        ['match_code', 'player_code', 'player_name', 'role', 'team', 'perc_selection']
    ]

    # Calculate actual team counts per player
    team_counts = team_df.groupby('player_code')['team_id'].nunique().reset_index()
    team_counts.columns = ['player_code', 'actual_team_count']

    # Merge with player reference data
    merged = player_ref.merge(team_counts, on='player_code')

    # Calculate expected team count and accuracy metrics
    merged['expected_team_count'] = (merged['perc_selection'] * total_teams).round(0).astype(int)
    merged['perc_error'] = ((merged['actual_team_count'] - merged['expected_team_count']) /
                           merged['expected_team_count'] * 100).round(4)
    merged['actual_perc_selection'] = (merged['actual_team_count'] / total_teams * 100).round(2)
    merged['perc_error'] = (merged['perc_error'] * 100).round(2)

    # Create accuracy summary
    accuracy_df = merged[['player_code', 'player_name', 'role', 'team', 'perc_selection',
                         'expected_team_count', 'actual_team_count', 'actual_perc_selection', 'perc_error']].copy()
    accuracy_df = accuracy_df.sort_values('player_code')

    # Calculate accuracy KPIs
    within_5 = accuracy_df[accuracy_df['perc_error'].abs() <= 5]

    print("\nAccuracy KPIs:")
    print(f"Players within ±5% relative error: {within_5.shape[0]}/{accuracy_df.shape[0]}")
    print(f"Minimum error: {accuracy_df['perc_error'].min():.2f}%")
    print(f"Maximum error: {accuracy_df['perc_error'].max():.2f}%")
    print(f"Mean absolute error: {accuracy_df['perc_error'].abs().mean():.2f}%")
    print(f"Standard deviation of errors: {accuracy_df['perc_error'].std():.2f}%")

    # Show players outside ±5% error range
    outside_5 = accuracy_df[accuracy_df['perc_error'].abs() > 5]
    if not outside_5.empty:
        print(f"\nPlayers with >±5% error:")
        for _, player in outside_5.iterrows():
            print(f"  {player['player_name']}: {player['perc_error']:.2f}% error")
    else:
        print(f"\nAll players within ±5% error range!")

    return accuracy_df

# Evaluate accuracy
accuracy_summary = evaluate_team_accuracy(team_df)

# Display detailed results
print("\nDetailed Accuracy Results:")
print("="*80)
print(accuracy_summary.to_string(index=False))

# Save accuracy summary
accuracy_summary.to_csv('accuracy_summary.csv', index=False)
print(f"\nSaved accuracy summary to 'accuracy_summary.csv'")

Evaluating Fantasy Team Accuracy...
teamdf shape: (220000, 7)
Total unique teams: 20000
Total unique players: 22
Teams missing at least one role: 0

Accuracy KPIs:
Players within ±5% relative error: 0/22
Minimum error: -1036.02%
Maximum error: 7358.05%
Mean absolute error: 1624.05%
Standard deviation of errors: 2121.44%

Players with >±5% error:
  Player_20: 6875.16% error
  Player_2: 7358.05% error
  Player_7: -919.92% error
  Player_12: -1036.02% error
  Player_10: 1494.17% error
  Player_8: 509.50% error
  Player_22: -752.89% error
  Player_11: 1376.89% error
  Player_21: 151.41% error
  Player_3: 1448.76% error
  Player_18: 734.34% error
  Player_4: 226.10% error
  Player_9: 1111.11% error
  Player_14: 1315.06% error
  Player_17: -919.34% error
  Player_19: 1179.65% error
  Player_1: 690.38% error
  Player_13: 2116.10% error
  Player_5: -29.35% error
  Player_16: 1815.36% error
  Player_6: 1954.55% error
  Player_15: 1714.95% error

Detailed Accuracy Results:
 player_code player_na

Correcting the accuracy evaluation to properly compare actual and expected player selection frequencies.


In [None]:
# Fix the accuracy calculation error
def evaluate_team_accuracy_fixed(team_df):
    """
    Correctly evaluate the accuracy of team generation against expected selection probabilities
    """
    print("Evaluating Fantasy Team Accuracy...")
    print(f"teamdf shape: {team_df.shape}")

    total_teams = team_df['team_id'].nunique()
    total_players = team_df['player_code'].nunique()

    print(f"Total unique teams: {total_teams}")
    print(f"Total unique players: {total_players}")

    # Check role composition
    roles_per_team = team_df.groupby('team_id')['role'].nunique()
    missing_role_teams = (roles_per_team < 4).sum()
    print(f"Teams missing at least one role: {missing_role_teams}")

    # Get player reference data (unique player info)
    player_ref = team_df.drop_duplicates(subset=['player_code'])[
        ['match_code', 'player_code', 'player_name', 'role', 'team', 'perc_selection']
    ]

    # Calculate actual team counts per player
    team_counts = team_df.groupby('player_code')['team_id'].nunique().reset_index()
    team_counts.columns = ['player_code', 'actual_team_count']

    # Merge with player reference data
    merged = player_ref.merge(team_counts, on='player_code')

    # Calculate expected team count and accuracy metrics
    merged['expected_team_count'] = (merged['perc_selection'] * total_teams).round(0).astype(int)

    # Fix: Calculate percentage error correctly (only multiply by 100 once)
    merged['perc_error'] = ((merged['actual_team_count'] - merged['expected_team_count']) /
                           merged['expected_team_count'] * 100).round(2)

    merged['actual_perc_selection'] = (merged['actual_team_count'] / total_teams).round(4)

    # Create accuracy summary
    accuracy_df = merged[['player_code', 'player_name', 'role', 'team', 'perc_selection',
                         'expected_team_count', 'actual_team_count', 'actual_perc_selection', 'perc_error']].copy()
    accuracy_df = accuracy_df.sort_values('player_code')

    # Calculate accuracy KPIs
    within_5 = accuracy_df[accuracy_df['perc_error'].abs() <= 5]

    print("\nAccuracy KPIs:")
    print(f"Players within ±5% relative error: {within_5.shape[0]}/{accuracy_df.shape[0]}")
    print(f"Minimum error: {accuracy_df['perc_error'].min():.2f}%")
    print(f"Maximum error: {accuracy_df['perc_error'].max():.2f}%")
    print(f"Mean absolute error: {accuracy_df['perc_error'].abs().mean():.2f}%")
    print(f"Standard deviation of errors: {accuracy_df['perc_error'].std():.2f}%")

    # Show players outside ±5% error range
    outside_5 = accuracy_df[accuracy_df['perc_error'].abs() > 5]
    if not outside_5.empty:
        print(f"\nPlayers with >±5% error:")
        for _, player in outside_5.iterrows():
            print(f"  {player['player_name']}: {player['perc_error']:.2f}% error")
    else:
        print(f"\nAll players within ±5% error range!")

    return accuracy_df

# Re-evaluate accuracy with the fixed calculation
accuracy_summary_fixed = evaluate_team_accuracy_fixed(team_df)

# Display detailed results
print("\nDetailed Accuracy Results (Corrected):")
print("="*100)
print(accuracy_summary_fixed.to_string(index=False))

# Save corrected accuracy summary
accuracy_summary_fixed.to_csv('accuracy_summary.csv', index=False)
print(f"\nSaved corrected accuracy summary to 'accuracy_summary.csv'")

Evaluating Fantasy Team Accuracy...
teamdf shape: (220000, 7)
Total unique teams: 20000
Total unique players: 22
Teams missing at least one role: 0

Accuracy KPIs:
Players within ±5% relative error: 3/22
Minimum error: -10.36%
Maximum error: 73.58%
Mean absolute error: 16.24%
Standard deviation of errors: 21.21%

Players with >±5% error:
  Player_20: 68.75% error
  Player_2: 73.58% error
  Player_7: -9.20% error
  Player_12: -10.36% error
  Player_10: 14.94% error
  Player_8: 5.10% error
  Player_22: -7.53% error
  Player_11: 13.77% error
  Player_3: 14.49% error
  Player_18: 7.34% error
  Player_9: 11.11% error
  Player_14: 13.15% error
  Player_17: -9.19% error
  Player_19: 11.80% error
  Player_1: 6.90% error
  Player_13: 21.16% error
  Player_16: 18.15% error
  Player_6: 19.55% error
  Player_15: 17.15% error

Detailed Accuracy Results (Corrected):
 player_code player_name       role team  perc_selection  expected_team_count  actual_team_count  actual_perc_selection  perc_error
   

Analyzing role distribution constraints and designing an improved algorithm to better balance player selections across teams.


In [None]:
# Analyze the constraint issue and design an improved algorithm

print("PROBLEM ANALYSIS:")
print("="*50)

# Analyze the WK constraint issue
wk_players = df[df['role'] == 'WK']
print("Wicket Keeper Players:")
for _, player in wk_players.iterrows():
    expected = player['perc_selection'] * 20000
    print(f"  {player['player_name']}: {player['perc_selection']:.4f} → {expected:.0f} expected selections")

total_wk_expected = wk_players['perc_selection'].sum() * 20000
print(f"\nTotal expected WK selections: {total_wk_expected:.0f}")
print(f"Required WK selections (1 per team): {20000}")
print(f"Constraint violation: Need {20000 - total_wk_expected:.0f} additional WK selections")

print("\nROLE ANALYSIS:")
print("="*50)
for role in ['WK', 'Batsman', 'Bowler', 'Allrounder']:
    role_players = df[df['role'] == role]
    total_expected = role_players['perc_selection'].sum() * 20000
    min_required = 20000  # At least 1 per team

    print(f"{role}:")
    print(f"  Players: {len(role_players)}")
    print(f"  Total expected selections: {total_expected:.0f}")
    print(f"  Minimum required: {min_required}")
    print(f"  Surplus/Deficit: {total_expected - min_required:.0f}")
    print()

# Calculate optimal approach
print("SOLUTION APPROACH:")
print("="*50)
print("1. The constraint issue cannot be avoided - some roles will be over/under-selected")
print("2. Need to redistribute the 'forced' selections as fairly as possible")
print("3. Implement a constraint-aware probability adjustment")

# Improved algorithm with constraint-aware adjustments
def generate_teams_constraint_aware(players_by_role, num_teams=20000, team_size=11):
    """
    Generate teams with constraint-aware probability adjustments
    """
    # Calculate constraint adjustments for each role
    role_adjustments = {}

    for role, role_players in players_by_role.items():
        total_role_prob = sum(p['perc_selection'] for p in role_players)
        expected_role_selections = total_role_prob * num_teams
        min_required_selections = num_teams  # At least 1 per team

        if expected_role_selections < min_required_selections:
            # This role needs boosting
            boost_factor = min_required_selections / expected_role_selections
            role_adjustments[role] = boost_factor
            print(f"{role}: Boost factor = {boost_factor:.3f}")
        else:
            role_adjustments[role] = 1.0

    return role_adjustments

# Calculate the needed adjustments
print("\nCalculating constraint-aware adjustments...")
adjustments = generate_teams_constraint_aware(players_by_role)

PROBLEM ANALYSIS:
Wicket Keeper Players:
  Player_20: 0.3957 → 7914 expected selections
  Player_2: 0.2131 → 4262 expected selections

Total expected WK selections: 12176
Required WK selections (1 per team): 20000
Constraint violation: Need 7824 additional WK selections

ROLE ANALYSIS:
WK:
  Players: 2
  Total expected selections: 12176
  Minimum required: 20000
  Surplus/Deficit: -7824

Batsman:
  Players: 8
  Total expected selections: 75580
  Minimum required: 20000
  Surplus/Deficit: 55580

Bowler:
  Players: 6
  Total expected selections: 44886
  Minimum required: 20000
  Surplus/Deficit: 24886

Allrounder:
  Players: 6
  Total expected selections: 76336
  Minimum required: 20000
  Surplus/Deficit: 56336

SOLUTION APPROACH:
1. The constraint issue cannot be avoided - some roles will be over/under-selected
2. Need to redistribute the 'forced' selections as fairly as possible
3. Implement a constraint-aware probability adjustment

Calculating constraint-aware adjustments...
WK: Boos

Implementing an advanced constraint-aware algorithm to generate balanced fantasy teams matching player probabilities.


In [None]:
# Implement an advanced constraint-aware team generation algorithm

def generate_teams_advanced(players_by_role, num_teams=20000, team_size=11):
    """
    Advanced team generation with mathematical constraint handling
    """

    # Step 1: Calculate constraint-aware base probabilities
    print("Step 1: Calculating constraint-aware probabilities...")

    adjusted_players = []
    total_available_spots = num_teams * team_size  # 220,000 total spots

    # Calculate how many spots are "forced" by constraints (4 per team minimum)
    forced_spots = num_teams * 4  # 80,000 forced spots
    flexible_spots = total_available_spots - forced_spots  # 140,000 flexible spots

    # Calculate role adjustments for forced selections
    role_forced_adjustments = {}
    for role, role_players in players_by_role.items():
        total_role_prob = sum(p['perc_selection'] for p in role_players)
        expected_in_forced = total_role_prob * forced_spots / len(players_by_role)
        min_required = num_teams

        if expected_in_forced < min_required:
            # Need to boost this role's probability for forced selections
            role_forced_adjustments[role] = min_required / expected_in_forced
        else:
            role_forced_adjustments[role] = 1.0

        print(f"{role}: Adjustment factor = {role_forced_adjustments[role]:.3f}")

    # Step 2: Create adjusted player probabilities
    all_players = []
    for role, role_players in players_by_role.items():
        for player in role_players:
            adjusted_player = player.copy()
            # Adjust probability for forced selections
            adjusted_player['adjusted_prob_forced'] = player['perc_selection'] * role_forced_adjustments[role]
            # Keep original probability for flexible selections
            adjusted_player['adjusted_prob_flexible'] = player['perc_selection']
            all_players.append(adjusted_player)

    # Step 3: Generate teams using two-phase selection
    print("Step 2: Generating teams with two-phase selection...")

    teams = []
    total_selections = defaultdict(int)

    for team_id in range(1, num_teams + 1):
        if team_id % 4000 == 0:
            print(f"Generated {team_id} teams...")

        team_players = []
        used_codes = set()

        # Phase 1: Forced selections (1 from each role)
        for role, role_players in players_by_role.items():
            available = [p for p in role_players if p['player_code'] not in used_codes]
            weights = [p['perc_selection'] * role_forced_adjustments[role] for p in available]

            if available and weights:
                weights_array = np.array(weights)
                weights_array = weights_array / weights_array.sum()  # Normalize
                selected = np.random.choice(available, p=weights_array)
                team_players.append(selected)
                used_codes.add(selected['player_code'])
                total_selections[selected['player_code']] += 1

        # Phase 2: Flexible selections (remaining 7 spots)
        for _ in range(team_size - 4):
            available = [p for p in all_players if p['player_code'] not in used_codes]
            if available:
                weights = [p['perc_selection'] for p in available]
                weights_array = np.array(weights)
                weights_array = weights_array / weights_array.sum()  # Normalize
                selected = np.random.choice(available, p=weights_array)
                team_players.append(selected)
                used_codes.add(selected['player_code'])
                total_selections[selected['player_code']] += 1

        teams.append({
            'team_id': team_id,
            'players': team_players
        })

    print(f"Generated {len(teams)} teams successfully!")
    return teams, total_selections

# Generate teams with the advanced algorithm
print("Generating teams with advanced constraint-aware algorithm...")
print("="*60)

teams_advanced, selections_advanced = generate_teams_advanced(players_by_role)

# Create DataFrame and analyze results
team_df_advanced = create_team_dataframe(teams_advanced)
print(f"\nAdvanced algorithm team DataFrame shape: {team_df_advanced.shape}")

# Save the improved results
team_df_advanced.to_csv('teamdf.csv', index=False)
print("Saved improved team data to 'teamdf.csv'")

Generating teams with advanced constraint-aware algorithm...
Step 1: Calculating constraint-aware probabilities...
WK: Adjustment factor = 1.643
Batsman: Adjustment factor = 1.000
Bowler: Adjustment factor = 1.000
Allrounder: Adjustment factor = 1.000
Step 2: Generating teams with two-phase selection...
Generated 4000 teams...
Generated 8000 teams...
Generated 12000 teams...
Generated 16000 teams...
Generated 20000 teams...
Generated 20000 teams successfully!

Advanced algorithm team DataFrame shape: (220000, 7)
Saved improved team data to 'teamdf.csv'


Assessing the improved team's accuracy by re-evaluating player selection frequencies against expected probabilities.


In [None]:
# Evaluate the improved algorithm's accuracy
print("EVALUATING IMPROVED ALGORITHM ACCURACY")
print("="*50)

accuracy_advanced = evaluate_team_accuracy_fixed(team_df_advanced)

print("\nComparison with Previous Algorithm:")
print("="*50)

# Load previous results for comparison
previous_accuracy = accuracy_summary_fixed

print("ACCURACY COMPARISON:")
print(f"{'Player':<12} {'Role':<10} {'Previous Error':<15} {'Improved Error':<15} {'Improvement':<12}")
print("-" * 75)

total_improvement = 0
players_improved = 0
players_within_5_old = 0
players_within_5_new = 0

for i, row in accuracy_advanced.iterrows():
    player_code = row['player_code']
    old_error = previous_accuracy[previous_accuracy['player_code'] == player_code]['perc_error'].iloc[0]
    new_error = row['perc_error']
    improvement = abs(old_error) - abs(new_error)

    if abs(old_error) <= 5:
        players_within_5_old += 1
    if abs(new_error) <= 5:
        players_within_5_new += 1

    if improvement > 0:
        players_improved += 1
        total_improvement += improvement

    print(f"{row['player_name']:<12} {row['role']:<10} {old_error:>10.2f}% {new_error:>13.2f}% {improvement:>10.2f}%")

print("\nSUMMARY:")
print(f"Players improved: {players_improved}/{len(accuracy_advanced)}")
print(f"Total improvement: {total_improvement:.2f} percentage points")
print(f"Average improvement per player: {total_improvement/len(accuracy_advanced):.2f} percentage points")
print(f"Players within ±5% (Previous): {players_within_5_old}/{len(accuracy_advanced)}")
print(f"Players within ±5% (Improved): {players_within_5_new}/{len(accuracy_advanced)}")

# Calculate additional accuracy metrics
mae_old = previous_accuracy['perc_error'].abs().mean()
mae_new = accuracy_advanced['perc_error'].abs().mean()
std_old = previous_accuracy['perc_error'].std()
std_new = accuracy_advanced['perc_error'].std()

print(f"\nMean Absolute Error:")
print(f"  Previous: {mae_old:.2f}%")
print(f"  Improved: {mae_new:.2f}%")
print(f"  Reduction: {mae_old - mae_new:.2f} percentage points")

print(f"\nStandard Deviation:")
print(f"  Previous: {std_old:.2f}%")
print(f"  Improved: {std_new:.2f}%")
print(f"  Reduction: {std_old - std_new:.2f} percentage points")

# Save final accuracy results
accuracy_advanced.to_csv('accuracy_summary.csv', index=False)
print(f"\nSaved final accuracy summary to 'accuracy_summary.csv'")

EVALUATING IMPROVED ALGORITHM ACCURACY
Evaluating Fantasy Team Accuracy...
teamdf shape: (220000, 7)
Total unique teams: 20000
Total unique players: 22
Teams missing at least one role: 0

Accuracy KPIs:
Players within ±5% relative error: 4/22
Minimum error: -18.80%
Maximum error: 134.87%
Mean absolute error: 30.06%
Standard deviation of errors: 38.63%

Players with >±5% error:
  Player_20: 97.98% error
  Player_2: 134.87% error
  Player_7: -17.70% error
  Player_12: -18.70% error
  Player_10: 28.14% error
  Player_22: -16.47% error
  Player_11: 22.06% error
  Player_3: 40.28% error
  Player_18: 6.17% error
  Player_9: 14.73% error
  Player_14: 24.84% error
  Player_17: -18.80% error
  Player_19: 17.89% error
  Player_13: 50.70% error
  Player_5: -7.12% error
  Player_16: 32.78% error
  Player_6: 62.73% error
  Player_15: 35.26% error

Comparison with Previous Algorithm:
ACCURACY COMPARISON:
Player       Role       Previous Error  Improved Error  Improvement 
---------------------------

Implementing a refined, iterative optimization algorithm to produce teams with maximum accuracy to desired player selection probabilities.


In [None]:
# Let's implement a more refined version of the original algorithm with better optimization

def generate_teams_optimized_v2(players_by_role, num_teams=20000, team_size=11, max_iterations=3):
    """
    Enhanced team generation with iterative optimization for maximum accuracy
    """

    # Create master player list
    all_players = []
    for role_players in players_by_role.values():
        all_players.extend(role_players)

    best_teams = None
    best_accuracy = float('inf')
    best_selections = None

    for iteration in range(max_iterations):
        print(f"\nIteration {iteration + 1}/{max_iterations}")

        # Calculate expected selections
        expected_selections = {}
        for player in all_players:
            expected_selections[player['player_code']] = player['perc_selection'] * num_teams

        # Track actual selections
        actual_selections = defaultdict(int)
        teams = []

        # Set different random seed for each iteration
        np.random.seed(42 + iteration * 100)

        for team_id in range(1, num_teams + 1):
            if team_id % 5000 == 0:
                print(f"  Team {team_id}")

            team_players = []
            used_codes = set()

            # Phase 1: Mandatory role selections
            for role, role_players in players_by_role.items():
                available = [p for p in role_players if p['player_code'] not in used_codes]

                if available:
                    weights = []
                    for player in available:
                        base_weight = player['perc_selection']

                        # Strong deficit correction
                        expected = expected_selections[player['player_code']]
                        current = actual_selections[player['player_code']]
                        progress = team_id / num_teams
                        expected_so_far = expected * progress

                        if expected_so_far > 0:
                            deficit_ratio = (expected_so_far - current) / expected_so_far
                            # More aggressive adjustment
                            adjustment = max(0.05, 1.0 + deficit_ratio * 3.0)
                        else:
                            adjustment = 1.0

                        weights.append(base_weight * adjustment)

                    weights = np.array(weights)
                    if weights.sum() > 0:
                        weights = weights / weights.sum()
                        selected = np.random.choice(available, p=weights)
                        team_players.append(selected)
                        used_codes.add(selected['player_code'])
                        actual_selections[selected['player_code']] += 1

            # Phase 2: Fill remaining spots
            remaining_spots = team_size - len(team_players)
            for _ in range(remaining_spots):
                available = [p for p in all_players if p['player_code'] not in used_codes]

                if available:
                    weights = []
                    for player in available:
                        base_weight = player['perc_selection']

                        expected = expected_selections[player['player_code']]
                        current = actual_selections[player['player_code']]
                        progress = team_id / num_teams
                        expected_so_far = expected * progress

                        if expected_so_far > 0:
                            deficit_ratio = (expected_so_far - current) / expected_so_far
                            adjustment = max(0.05, 1.0 + deficit_ratio * 2.5)
                        else:
                            adjustment = 1.0

                        weights.append(base_weight * adjustment)

                    weights = np.array(weights)
                    if weights.sum() > 0:
                        weights = weights / weights.sum()
                        selected = np.random.choice(available, p=weights)
                        team_players.append(selected)
                        used_codes.add(selected['player_code'])
                        actual_selections[selected['player_code']] += 1

            teams.append({
                'team_id': team_id,
                'players': team_players
            })

        # Calculate accuracy for this iteration
        total_error = 0
        for player in all_players:
            expected = expected_selections[player['player_code']]
            actual = actual_selections[player['player_code']]
            if expected > 0:
                error = abs((actual - expected) / expected)
                total_error += error

        mean_absolute_error = total_error / len(all_players)
        print(f"  Mean Absolute Error: {mean_absolute_error:.4f}")

        if mean_absolute_error < best_accuracy:
            best_accuracy = mean_absolute_error
            best_teams = teams
            best_selections = actual_selections
            print(f"  *** New best accuracy! ***")

    print(f"\nBest accuracy achieved: {best_accuracy:.4f}")
    return best_teams, best_selections

# Generate optimized teams
print("GENERATING OPTIMIZED TEAMS (V2)")
print("="*50)

teams_optimized, selections_optimized = generate_teams_optimized_v2(players_by_role, max_iterations=3)

# Create DataFrame and evaluate
team_df_optimized = create_team_dataframe(teams_optimized)
team_df_optimized.to_csv('teamdf.csv', index=False)

print(f"\nOptimized team DataFrame shape: {team_df_optimized.shape}")
print("Evaluating final accuracy...")

final_accuracy = evaluate_team_accuracy_fixed(team_df_optimized)

# Save final results
final_accuracy.to_csv('accuracy_summary.csv', index=False)
print(f"\nFINAL RESULTS SAVED:")
print(f"- teamdf.csv: {team_df_optimized.shape[0]} rows")
print(f"- accuracy_summary.csv: {len(final_accuracy)} players")

GENERATING OPTIMIZED TEAMS (V2)

Iteration 1/3
  Team 5000
  Team 10000
  Team 15000
  Team 20000
  Mean Absolute Error: 0.1393
  *** New best accuracy! ***

Iteration 2/3
  Team 5000
  Team 10000
  Team 15000
  Team 20000
  Mean Absolute Error: 0.1406

Iteration 3/3
  Team 5000
  Team 10000
  Team 15000
  Team 20000
  Mean Absolute Error: 0.1418

Best accuracy achieved: 0.1393

Optimized team DataFrame shape: (220000, 7)
Evaluating final accuracy...
Evaluating Fantasy Team Accuracy...
teamdf shape: (220000, 7)
Total unique teams: 20000
Total unique players: 22
Teams missing at least one role: 0

Accuracy KPIs:
Players within ±5% relative error: 4/22
Minimum error: -8.23%
Maximum error: 69.31%
Mean absolute error: 13.93%
Standard deviation of errors: 19.71%

Players with >±5% error:
  Player_20: 66.72% error
  Player_2: 69.31% error
  Player_7: -6.75% error
  Player_12: -8.23% error
  Player_10: 11.52% error
  Player_22: -5.51% error
  Player_11: 10.78% error
  Player_3: 12.54% error
 

Generating the comprehensive solution code for automated team creation and accuracy evaluation.


In [None]:

"""
Fantasy Team Simulation using Player Selection Probabilities
===========================================================

This script generates 20,000 unique fantasy cricket teams of 11 players each,
using selection probabilities from a CSV file while respecting team composition constraints.

"""

import pandas as pd
import numpy as np
import random
from collections import defaultdict

def load_player_data(filename='player_data_sample.csv'):
    """Load and organize player data by role"""
    df = pd.read_csv(filename)

    players_by_role = {
        'WK': df[df['role'] == 'WK'].to_dict('records'),
        'Batsman': df[df['role'] == 'Batsman'].to_dict('records'),
        'Bowler': df[df['role'] == 'Bowler'].to_dict('records'),
        'Allrounder': df[df['role'] == 'Allrounder'].to_dict('records')
    }

    return players_by_role, df

def generate_teams_optimized(players_by_role, num_teams=20000, team_size=11, max_iterations=3):
    """
    Generate fantasy teams with optimized accuracy to match selection probabilities

    Parameters:
    - players_by_role: Dictionary of players organized by role
    - num_teams: Number of teams to generate (default: 20000)
    - team_size: Number of players per team (default: 11)
    - max_iterations: Number of optimization iterations (default: 3)

    Returns:
    - teams: List of generated teams
    - actual_selections: Dictionary of actual selection counts per player
    """

    # Create master player list
    all_players = []
    for role_players in players_by_role.values():
        all_players.extend(role_players)

    best_teams = None
    best_accuracy = float('inf')
    best_selections = None

    print(f"Generating {num_teams} teams with {max_iterations} optimization iterations...")

    for iteration in range(max_iterations):
        print(f"\nIteration {iteration + 1}/{max_iterations}")

        # Calculate expected selections
        expected_selections = {}
        for player in all_players:
            expected_selections[player['player_code']] = player['perc_selection'] * num_teams

        # Track actual selections
        actual_selections = defaultdict(int)
        teams = []

        # Set different random seed for each iteration
        np.random.seed(42 + iteration * 100)

        for team_id in range(1, num_teams + 1):
            if team_id % 5000 == 0:
                print(f"  Generated {team_id} teams...")

            team_players = []
            used_codes = set()

            # Phase 1: Mandatory role selections (ensure at least 1 from each role)
            for role, role_players in players_by_role.items():
                available = [p for p in role_players if p['player_code'] not in used_codes]

                if available:
                    weights = []
                    for player in available:
                        base_weight = player['perc_selection']

                        # Deficit correction based on expected vs actual selections
                        expected = expected_selections[player['player_code']]
                        current = actual_selections[player['player_code']]
                        progress = team_id / num_teams
                        expected_so_far = expected * progress

                        if expected_so_far > 0:
                            deficit_ratio = (expected_so_far - current) / expected_so_far
                            adjustment = max(0.05, 1.0 + deficit_ratio * 3.0)
                        else:
                            adjustment = 1.0

                        weights.append(base_weight * adjustment)

                    weights = np.array(weights)
                    if weights.sum() > 0:
                        weights = weights / weights.sum()
                        selected = np.random.choice(available, p=weights)
                        team_players.append(selected)
                        used_codes.add(selected['player_code'])
                        actual_selections[selected['player_code']] += 1

            # Phase 2: Fill remaining spots
            remaining_spots = team_size - len(team_players)
            for _ in range(remaining_spots):
                available = [p for p in all_players if p['player_code'] not in used_codes]

                if available:
                    weights = []
                    for player in available:
                        base_weight = player['perc_selection']

                        expected = expected_selections[player['player_code']]
                        current = actual_selections[player['player_code']]
                        progress = team_id / num_teams
                        expected_so_far = expected * progress

                        if expected_so_far > 0:
                            deficit_ratio = (expected_so_far - current) / expected_so_far
                            adjustment = max(0.05, 1.0 + deficit_ratio * 2.5)
                        else:
                            adjustment = 1.0

                        weights.append(base_weight * adjustment)

                    weights = np.array(weights)
                    if weights.sum() > 0:
                        weights = weights / weights.sum()
                        selected = np.random.choice(available, p=weights)
                        team_players.append(selected)
                        used_codes.add(selected['player_code'])
                        actual_selections[selected['player_code']] += 1

            teams.append({
                'team_id': team_id,
                'players': team_players
            })

        # Calculate accuracy for this iteration
        total_error = 0
        for player in all_players:
            expected = expected_selections[player['player_code']]
            actual = actual_selections[player['player_code']]
            if expected > 0:
                error = abs((actual - expected) / expected)
                total_error += error

        mean_absolute_error = total_error / len(all_players)
        print(f"  Mean Absolute Error: {mean_absolute_error:.4f}")

        if mean_absolute_error < best_accuracy:
            best_accuracy = mean_absolute_error
            best_teams = teams
            best_selections = actual_selections
            print(f"  *** New best accuracy achieved! ***")

    print(f"\nOptimization complete. Best accuracy: {best_accuracy:.4f}")
    return best_teams, best_selections

def create_team_dataframe(teams, match_code=10001):
    """Convert teams to the required CSV format"""
    rows = []

    for team in teams:
        team_id = team['team_id']
        for player in team['players']:
            row = {
                'match_code': match_code,
                'player_code': player['player_code'],
                'player_name': player['player_name'],
                'role': player['role'],
                'team': player['team'],
                'perc_selection': player['perc_selection'],
                'team_id': team_id
            }
            rows.append(row)

    return pd.DataFrame(rows)

def evaluate_team_accuracy(team_df):
    """Evaluate the accuracy of team generation against expected selection probabilities"""
    print("Evaluating Fantasy Team Accuracy...")
    print(f"teamdf shape: {team_df.shape}")

    total_teams = team_df['team_id'].nunique()
    total_players = team_df['player_code'].nunique()

    print(f"Total unique teams: {total_teams}")
    print(f"Total unique players: {total_players}")

    # Check role composition
    roles_per_team = team_df.groupby('team_id')['role'].nunique()
    missing_role_teams = (roles_per_team < 4).sum()
    print(f"Teams missing at least one role: {missing_role_teams}")

    # Get player reference data
    player_ref = team_df.drop_duplicates(subset=['player_code'])[
        ['match_code', 'player_code', 'player_name', 'role', 'team', 'perc_selection']
    ]

    # Calculate actual team counts per player
    team_counts = team_df.groupby('player_code')['team_id'].nunique().reset_index()
    team_counts.columns = ['player_code', 'actual_team_count']

    # Merge with player reference data
    merged = player_ref.merge(team_counts, on='player_code')

    # Calculate expected team count and accuracy metrics
    merged['expected_team_count'] = (merged['perc_selection'] * total_teams).round(0).astype(int)
    merged['perc_error'] = ((merged['actual_team_count'] - merged['expected_team_count']) /
                           merged['expected_team_count'] * 100).round(2)
    merged['actual_perc_selection'] = (merged['actual_team_count'] / total_teams).round(4)

    # Create accuracy summary
    accuracy_df = merged[['player_code', 'player_name', 'role', 'team', 'perc_selection',
                         'expected_team_count', 'actual_team_count', 'actual_perc_selection', 'perc_error']].copy()
    accuracy_df = accuracy_df.sort_values('player_code')

    # Calculate accuracy KPIs
    within_5 = accuracy_df[accuracy_df['perc_error'].abs() <= 5]

    print("\nAccuracy KPIs:")
    print(f"Players within ±5% relative error: {within_5.shape[0]}/{accuracy_df.shape[0]}")
    print(f"Minimum error: {accuracy_df['perc_error'].min():.2f}%")
    print(f"Maximum error: {accuracy_df['perc_error'].max():.2f}%")
    print(f"Mean absolute error: {accuracy_df['perc_error'].abs().mean():.2f}%")
    print(f"Standard deviation of errors: {accuracy_df['perc_error'].std():.2f}%")

    return accuracy_df

def main():
    """Main execution function"""
    print("FANTASY TEAM SIMULATION SYSTEM")
    print("=" * 50)

    # Load player data
    print("Loading player data...")
    players_by_role, df = load_player_data()

    print(f"Loaded {len(df)} players:")
    for role, players in players_by_role.items():
        print(f"  {role}: {len(players)} players")

    # Generate teams
    teams, selections = generate_teams_optimized(players_by_role)

    # Create DataFrame
    print("\nCreating team DataFrame...")
    team_df = create_team_dataframe(teams)

    # Save to CSV
    team_df.to_csv('teamdf.csv', index=False)
    print(f"Saved {len(team_df)} team records to 'teamdf.csv'")

    # Evaluate accuracy
    print("\nEvaluating accuracy...")
    accuracy_df = evaluate_team_accuracy(team_df)

    # Save accuracy results
    accuracy_df.to_csv('accuracy_summary.csv', index=False)
    print("Saved accuracy results to 'accuracy_summary.csv'")

    print("\nPROCESS COMPLETE!")
    print(f"Generated {team_df['team_id'].nunique()} teams with {team_df['player_code'].nunique()} unique players")
    print(f"All teams satisfy role composition constraints")
    print(f"Mean absolute error: {accuracy_df['perc_error'].abs().mean():.2f}%")

if __name__ == "__main__":
    main()

FANTASY TEAM SIMULATION SYSTEM
Loading player data...
Loaded 22 players:
  WK: 2 players
  Batsman: 8 players
  Bowler: 6 players
  Allrounder: 6 players
Generating 20000 teams with 3 optimization iterations...

Iteration 1/3
  Generated 5000 teams...
  Generated 10000 teams...
  Generated 15000 teams...
  Generated 20000 teams...
  Mean Absolute Error: 0.1393
  *** New best accuracy achieved! ***

Iteration 2/3
  Generated 5000 teams...
  Generated 10000 teams...
  Generated 15000 teams...
  Generated 20000 teams...
  Mean Absolute Error: 0.1406

Iteration 3/3
  Generated 5000 teams...
  Generated 10000 teams...
  Generated 15000 teams...
  Generated 20000 teams...
  Mean Absolute Error: 0.1418

Optimization complete. Best accuracy: 0.1393

Creating team DataFrame...
Saved 220000 team records to 'teamdf.csv'

Evaluating accuracy...
Evaluating Fantasy Team Accuracy...
teamdf shape: (220000, 7)
Total unique teams: 20000
Total unique players: 22
Teams missing at least one role: 0

Accura

In [None]:
%pip install -U kaleido
%pip install -U plotly



Kaleido needs Google Chrome, but Colab doesn’t come with Chrome pre-installed in the way Kaleido expects.

In [None]:
!apt-get install -y chromium-browser
!pip install -U kaleido
!kaleido_get_chrome

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  apparmor libfuse3-3 libudev1 snapd squashfs-tools systemd-hwe-hwdb udev
Suggested packages:
  apparmor-profiles-extra apparmor-utils fuse3 zenity | kdialog
The following NEW packages will be installed:
  apparmor chromium-browser libfuse3-3 snapd squashfs-tools systemd-hwe-hwdb
  udev
The following packages will be upgraded:
  libudev1
1 upgraded, 7 newly installed, 0 to remove and 34 not upgraded.
Need to get 30.3 MB of archives.
After this operation, 123 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 apparmor amd64 3.0.4-2ubuntu2.4 [598 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 squashfs-tools amd64 1:4.5-3build1 [159 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libudev1 amd64 249.11-0ubuntu3.16 [76.7 kB]
Get:4 http://archive.ubuntu.com/

Creating a comprehensive accuracy analysis chart to visualize player selection errors across roles in the fantasy team simulation.


In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.kaleido.scope.default_format = "png"
pio.kaleido.scope.default_width = 700
pio.kaleido.scope.default_height = 500
# Load data from the provided JSON
data = {
    "player_code": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
    "player_name": ["Player_20", "Player_2", "Player_7", "Player_12", "Player_10", "Player_8", "Player_22", "Player_11", "Player_21", "Player_3", "Player_18", "Player_4", "Player_9", "Player_14", "Player_17", "Player_19", "Player_1", "Player_13", "Player_5", "Player_16", "Player_6", "Player_15"],
    "role": ["WK", "WK", "Allrounder", "Allrounder", "Allrounder", "Allrounder", "Allrounder", "Allrounder", "Batsman", "Batsman", "Batsman", "Batsman", "Batsman", "Batsman", "Batsman", "Batsman", "Bowler", "Bowler", "Bowler", "Bowler", "Bowler", "Bowler"],
    "team": ["A", "A", "B", "A", "A", "A", "B", "B", "A", "A", "B", "B", "A", "A", "B", "B", "B", "A", "B", "B", "B", "A"],
    "perc_selection": [0.3957, 0.2131, 0.9191, 0.9522, 0.2145, 0.5682, 0.8919, 0.2709, 0.6935, 0.0283, 0.495, 0.659, 0.3618, 0.2536, 0.936, 0.3518, 0.6634, 0.1068, 0.8348, 0.306, 0.033, 0.3003],
    "expected_team_count": [7914, 4262, 18382, 19044, 4290, 11364, 17838, 5418, 13870, 566, 9900, 13180, 7236, 5072, 18720, 7036, 13268, 2136, 16696, 6120, 660, 6006],
    "actual_team_count": [13283, 7213, 17141, 17476, 4784, 11574, 16856, 6002, 13179, 637, 10532, 12714, 7894, 5609, 17406, 7684, 14135, 2450, 15522, 6970, 753, 6783],
    "actual_perc_selection": [0.6641, 0.3607, 0.8571, 0.8738, 0.2392, 0.5787, 0.8428, 0.3001, 0.659, 0.0319, 0.5266, 0.6357, 0.3947, 0.2805, 0.8703, 0.3842, 0.7068, 0.1225, 0.7761, 0.3485, 0.0377, 0.3392],
    "perc_error": [67.83, 69.23, -6.75, -8.23, 11.52, 1.85, -5.51, 10.78, -4.98, 12.54, 6.38, -3.54, 9.09, 10.59, -7.02, 9.21, 6.53, 14.7, -7.03, 13.89, 14.09, 12.94]
}

df = pd.DataFrame(data)

# Define brand colors for each role
role_colors = {
    'WK': '#1FB8CD',        # Strong cyan
    'Batsman': '#DB4545',   # Bright red
    'Allrounder': '#2E8B57', # Sea green
    'Bowler': '#5D878F'     # Cyan
}

# Create shorter player names for better readability
df['short_name'] = df['player_name'].str.replace('Player_', 'P')

# Identify players within ±5% error range
df['within_bounds'] = (df['perc_error'] >= -5) & (df['perc_error'] <= 5)

# Create the bar chart
fig = go.Figure()

# Add bars for each role, separating those within and outside bounds
for role in ['WK', 'Batsman', 'Allrounder', 'Bowler']:
    role_data = df[df['role'] == role]

    # Players within bounds (higher opacity)
    within_bounds = role_data[role_data['within_bounds']]
    if not within_bounds.empty:
        fig.add_trace(go.Bar(
            x=within_bounds['short_name'],
            y=within_bounds['perc_error'],
            name=role,
            marker_color=role_colors[role],
            marker_opacity=1.0,
            cliponaxis=False,
            showlegend=True,
            legendgroup=role
        ))

    # Players outside bounds (lower opacity)
    outside_bounds = role_data[~role_data['within_bounds']]
    if not outside_bounds.empty:
        fig.add_trace(go.Bar(
            x=outside_bounds['short_name'],
            y=outside_bounds['perc_error'],
            name=role,
            marker_color=role_colors[role],
            marker_opacity=0.6,
            cliponaxis=False,
            showlegend=False,
            legendgroup=role
        ))

# Add horizontal reference lines at ±5% error bounds with higher visibility
fig.add_hline(y=5, line_dash="dash", line_color="black", line_width=2, opacity=0.8)
fig.add_hline(y=-5, line_dash="dash", line_color="black", line_width=2, opacity=0.8)

# Update layout
fig.update_layout(
    title="Fantasy Accuracy Analysis",
    xaxis_title="Player",
    yaxis_title="Error (%)",
    legend=dict(orientation='h', yanchor='bottom', y=1.05, xanchor='center', x=0.5),
    showlegend=True
)

# Update axes for better readability
fig.update_xaxes(tickangle=45, tickfont=dict(size=10))
fig.update_yaxes(range=[-15, 75])

# Save the chart
fig.write_image("fantasy_accuracy_chart.png")



Use of plotly.io.kaleido.scope.default_format is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_format instead.




Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.


