In [7]:
import random
import pandas as pd
from collections import Counter

# Keep Player and Pitcher class definitions (simplified)
class Player:
    def __init__(self, name, avg, obp, slg):
        self.name = name
        # For simplicity, we derive very basic probabilities from these data
        # Real simulation would use more detailed data (e.g., K%, BB%, HR_rate, etc.)
        try:
            self.hit_chance = float(avg)
            self.on_base_chance = float(obp)
            # Very rough estimate of extra-base hit rate
            self.extra_base_chance = (float(slg) - float(avg)) / 3 if float(slg) > float(avg) else 0.05
            # Generic strikeout and walk rates (could be derived from more detailed data)
            self.strikeout_chance = 0.20
            self.walk_chance = (float(obp) - float(avg)) if float(obp) > float(avg) else 0.08
        except ValueError: # Handle possible empty or non-numeric data
            self.name = name + " (default_stats)" # Mark as using default values
            self.hit_chance = 0.250
            self.on_base_chance = 0.320
            self.extra_base_chance = 0.100
            self.strikeout_chance = 0.22
            self.walk_chance = 0.08


    def __str__(self):
        return self.name

class Pitcher:
    def __init__(self, name, era):
        self.name = name
        try:
            # ERA may influence batter outcomes, but we keep it simple
            # Lower ERA means higher control factor (harder to hit)
            self.control_factor = (5.0 - min(float(era), 5.0)) / 5.0
            self.strikeout_bonus = (5.0 - min(float(era), 5.0)) * 0.02 # Low ERA pitchers get a small strikeout bonus
        except ValueError: # Handle possible empty or non-numeric data
            self.name = name + " (default_stats)"
            self.control_factor = 0.9 # Default control factor
            self.strikeout_bonus = 0.01


    def __str__(self):
        return self.name

class GameState:
    def __init__(self):
        self.inning = 1
        self.top_of_inning = True
        self.outs = 0
        self.bases = [0, 0, 0] # 0 for empty, 1 for runner (1st, 2nd, 3rd base)
        self.home_score = 0
        self.away_score = 0
        self.current_batter_index_home = 0
        self.current_batter_index_away = 0

    def record_out(self):
        self.outs += 1

    def advance_runners(self, hit_type):
        runs_scored_this_play = 0
        if hit_type == 4: # Home run
            runs_scored_this_play = sum(self.bases) + 1
            self.bases = [0, 0, 0]
        elif hit_type == 0: # Walk or HBP (simple advance)
            if self.bases[0] == 1 and self.bases[1] == 1 and self.bases[2] == 1: # Bases loaded
                runs_scored_this_play += 1
                # Bases remain loaded (new runner to first, others advance)
            elif self.bases[0] == 1 and self.bases[1] == 1: # Runners on 1st and 2nd
                self.bases[2] = 1
                # self.bases[1] = 1 # Batter to 1st, runner from 1st to 2nd, runner from 2nd to 3rd
                # self.bases[0] = 1
            elif self.bases[0] == 1: # Runner on 1st
                self.bases[1] = 1
                # self.bases[0] = 1
            # else: # No forced advancement
            self.bases[0] = 1 # Batter to 1st
        else: # Hit (simplified advancement logic)
            # Handle runner on 3rd
            if self.bases[2] == 1:
                runs_scored_this_play += 1
                self.bases[2] = 0
            # Handle runner on 2nd
            if self.bases[1] == 1:
                if hit_type >= 2: # Double or further, runner from 2nd scores
                    runs_scored_this_play += 1
                    self.bases[1] = 0
                elif hit_type == 1: # Single, runner from 2nd to 3rd
                    self.bases[2] = 1
                    self.bases[1] = 0
            # Handle runner on 1st
            if self.bases[0] == 1:
                if hit_type >= 3: # Triple or home run, runner from 1st scores
                    runs_scored_this_play += 1
                    self.bases[0] = 0
                elif hit_type == 2: # Double, runner from 1st to 3rd
                    self.bases[2] = 1
                    self.bases[0] = 0
                elif hit_type == 1: # Single, runner from 1st to 2nd
                    self.bases[1] = 1
                    self.bases[0] = 0
            # Place batter
            if hit_type > 0: # If it's a hit (not a walk)
                self.bases[hit_type -1] = 1


        if self.top_of_inning:
            self.away_score += runs_scored_this_play
        else:
            self.home_score += runs_scored_this_play
        return runs_scored_this_play


    def new_inning_half(self):
        self.outs = 0
        self.bases = [0, 0, 0]
        if not self.top_of_inning: # If bottom half ends, go to next inning top half
            self.inning += 1
            self.top_of_inning = True
        else: # If top half ends, go to bottom half of the same inning
            self.top_of_inning = False

def simulate_at_bat_no_print(batter, pitcher, game_state):
    """Simulate a single at-bat without printing details."""
    adj_hit_chance = batter.hit_chance * pitcher.control_factor
    adj_walk_chance = batter.walk_chance / (pitcher.control_factor if pitcher.control_factor > 0 else 1) # Prevent division by zero
    adj_strikeout_chance = batter.strikeout_chance + pitcher.strikeout_bonus

    # Ensure probability values are in reasonable range
    adj_hit_chance = max(0.01, min(adj_hit_chance, 0.9))
    adj_walk_chance = max(0.01, min(adj_walk_chance, 0.5))
    adj_strikeout_chance = max(0.05, min(adj_strikeout_chance, 0.7))


    total_prob_event = adj_hit_chance + adj_walk_chance + adj_strikeout_chance
    other_out_chance = max(0.05, 1.0 - total_prob_event) # Ensure at least 5% chance for other outs
    total_prob_normalized = adj_hit_chance + adj_walk_chance + adj_strikeout_chance + other_out_chance


    # Normalize probabilities
    if total_prob_normalized == 0: # Avoid division by zero
        adj_hit_chance = 0.1 / 0.4
        adj_walk_chance = 0.05 / 0.4
        adj_strikeout_chance = 0.2 / 0.4
        other_out_chance = 0.05 /0.4
    else:
        adj_hit_chance /= total_prob_normalized
        adj_walk_chance /= total_prob_normalized
        adj_strikeout_chance /= total_prob_normalized
        other_out_chance /= total_prob_normalized


    outcome_rand = random.random()

    if outcome_rand < adj_strikeout_chance:
        game_state.record_out()
    elif outcome_rand < adj_strikeout_chance + adj_walk_chance:
        game_state.advance_runners(0) # 0 represents a walk
    elif outcome_rand < adj_strikeout_chance + adj_walk_chance + adj_hit_chance:
        hit_rand = random.random()
        # Simplified hit type determination
        # These probabilities should also be based on more detailed player data
        if hit_rand < (batter.extra_base_chance / batter.hit_chance if batter.hit_chance > 0 else 0.05) * 0.2: # Small probability for home run
            game_state.advance_runners(4)
        elif hit_rand < (batter.extra_base_chance / batter.hit_chance if batter.hit_chance > 0 else 0.05) * 0.5: # Small probability for triple
            game_state.advance_runners(3)
        elif hit_rand < (batter.extra_base_chance / batter.hit_chance if batter.hit_chance > 0 else 0.05): # Small probability for double
            game_state.advance_runners(2)
        else:
            game_state.advance_runners(1) # Single
    else: # Other outs
        game_state.record_out()


def simulate_inning_half_no_print(batting_team_lineup, pitching_team_pitcher, game_state, current_batter_index):
    """Simulate half inning without printing details."""
    if not batting_team_lineup: # If lineup is empty, return directly
        game_state.outs = 3
        return current_batter_index

    batter_idx = current_batter_index
    while game_state.outs < 3:
        # Ensure batter_idx doesn't exceed lineup range
        current_batter = batting_team_lineup[batter_idx % len(batting_team_lineup)]
        simulate_at_bat_no_print(current_batter, pitching_team_pitcher, game_state)
        batter_idx += 1
        if game_state.outs >= 3:
            break
    return batter_idx % len(batting_team_lineup) # Return the index of the team's next batter


def simulate_game_no_print(home_team_name, away_team_name, home_lineup, away_lineup, home_pitcher, away_pitcher, num_innings=9):
    """Simulate a complete game without printing the process."""
    game = GameState()

    # If any team's lineup or pitcher is empty, the game cannot proceed
    if not home_lineup or not away_lineup or not home_pitcher or not away_pitcher:
        # print(f"Cannot start game {away_team_name} vs {home_team_name}: Missing player or pitcher data.")
        # Can return a special value or handle this error as needed
        return -1, -1, home_team_name, away_team_name # Indicate invalid game

    for _ in range(num_innings):
        # Top half
        # If it's 9th inning or later, and home team is leading, away team doesn't bat anymore
        if game.inning > num_innings and game.home_score > game.away_score:
             break
        game.top_of_inning = True
        game.outs = 0 # Reset outs for new half inning
        game.bases = [0,0,0]
        game.current_batter_index_away = simulate_inning_half_no_print(away_lineup, home_pitcher, game, game.current_batter_index_away)

        # Bottom half
        # If it's 9th inning or extra innings, and home team is already ahead after away team's at-bat, game over
        if game.inning >= num_innings and game.home_score > game.away_score:
            break
        # If it's 9th inning or extra innings, away team leads, home team still needs to bat
        if game.inning >= num_innings and game.away_score > game.home_score and game.outs == 3 and game.top_of_inning: # Ensure top half ended
            pass # Allow bottom half to proceed
        elif game.inning >= num_innings and game.away_score <= game.home_score: # If home team ties or takes lead
             pass # Also need to complete bottom half, unless home team wins after scoring

        if game.inning > num_innings and game.home_score < game.away_score and not game.top_of_inning and game.outs == 3: # Away team wins in extra innings bottom half
            break


        game.top_of_inning = False
        game.outs = 0 # Reset outs for new half inning
        game.bases = [0,0,0]
        game.current_batter_index_home = simulate_inning_half_no_print(home_lineup, away_pitcher, game, game.current_batter_index_home)

        # If in bottom of 9th or extra innings, home team scores to take the lead, game over
        if game.inning >= num_innings and not game.top_of_inning and game.home_score > game.away_score:
            break

        # Increment inning (if game not over)
        if game.inning < num_innings: # Only increment in regular innings
            game.inning += 1
        elif game.inning >= num_innings and game.home_score == game.away_score: # If tied, continue to extra innings
            game.inning += 1


    return game.home_score, game.away_score, home_team_name, away_team_name

def load_batting_data(filepath, num_players=9):
    """Load batting data from CSV"""
    try:
        df = pd.read_csv(filepath)
        # Filter out 'Team Totals' rows
        df = df[~df['Player'].str.contains("Team Totals", na=False)]
        # Ensure there are enough player data, if not, reuse existing player data (simplified handling)
        if len(df) == 0:
            return []
        while len(df) < num_players:
            df = pd.concat([df, df], ignore_index=True)

        batters = []
        # Select first num_players players as starting lineup
        for _, row in df.head(num_players).iterrows():
            # Try to convert stats to float, if failed use default values
            try:
                avg = float(row['BA'])
                obp = float(row['OBP'])
                slg = float(row['SLG'])
            except ValueError:
                avg = 0.250 # Default value
                obp = 0.320 # Default value
                slg = 0.400 # Default value
            batters.append(Player(row['Player'], avg, obp, slg))
        return batters
    except FileNotFoundError:
        print(f"Error: File not found {filepath}")
        return []
    except KeyError as e:
        print(f"Error: CSV file {filepath} is missing column: {e}")
        return []


def load_pitching_data(filepath):
    """Load pitching data from CSV"""
    try:
        df = pd.read_csv(filepath)
        df = df[~df['Player'].str.contains("Team Totals", na=False)]
        if df.empty:
            return None
        # Select first pitcher as the starter (simplified handling)
        pitcher_row = df.iloc[0]
        try:
            era = float(pitcher_row['ERA'])
        except ValueError:
            era = 4.00 # Default ERA
        return Pitcher(pitcher_row['Player'], era)
    except FileNotFoundError:
        print(f"Error: File not found {filepath}")
        return None
    except KeyError as e:
        print(f"Error: CSV file {filepath} is missing column: {e}")
        return None


# --- Simulate 1000 games ---
if __name__ == "__main__":
    num_simulations = 10000
    home_team_wins = 0
    away_team_wins = 0

    # Define file paths - please ensure these paths are correct relative to where you run the script
    # or use absolute paths
    base_path = "./Pitch-by-Pitch-Pro-e7f394f922d3273f744deeec630a25dce5b00571/data/clean/"
    cubs_batting_file = "cubs_standard_batting_clean.csv"
    cubs_pitching_file = "cubs_standard_pitching_clean.csv"
    sox_batting_file = "whitesox_standard_batting_clean.csv"
    sox_pitching_file = "whitesox_standard_pitching_clean.csv"

    # Load data once
    cubs_batters_roster = load_batting_data(cubs_batting_file)
    cubs_pitcher_staff = load_pitching_data(cubs_pitching_file)
    sox_batters_roster = load_batting_data(sox_batting_file)
    sox_pitcher_staff = load_pitching_data(sox_pitching_file)

    home_team_name_const = "Chicago White Sox"
    away_team_name_const = "Chicago Cubs"

    if not cubs_batters_roster or not sox_batters_roster or not cubs_pitcher_staff or not sox_pitcher_staff:
        print("Data loading failed, cannot run simulation. Please check file paths and contents.")
    else:
        print(f"Running {num_simulations} simulations...")
        for i in range(num_simulations):
            if (i + 1) % 100 == 0: # Print progress every 100 simulations
                print(f"Completed {i + 1}/{num_simulations} simulations...")

            # At the start of each simulation, we can simply select players from the loaded roster
            # More complex simulations would consider rotations, injuries, etc.
            # Here we simply use the first 9 players from the loaded roster
            current_cubs_lineup = cubs_batters_roster[:9]
            current_sox_lineup = sox_batters_roster[:9]
            current_cubs_pitcher = cubs_pitcher_staff # Always use the same pitcher, in reality should rotate
            current_sox_pitcher = sox_pitcher_staff   # Always use the same pitcher, in reality should rotate

            home_s, away_s, _, _ = simulate_game_no_print(
                home_team_name=home_team_name_const,
                away_team_name=away_team_name_const,
                home_lineup=current_sox_lineup,
                away_lineup=current_cubs_lineup,
                home_pitcher=current_sox_pitcher,
                away_pitcher=current_cubs_pitcher
            )

            if home_s == -1 and away_s == -1: # Mark invalid game
                continue # Skip this invalid simulation

            if home_s > away_s:
                home_team_wins += 1
            elif away_s > home_s:
                away_team_wins += 1
            # In this version, ties count as games without a winner

        print("\n--- Simulation Results Prediction ---")
        print(f"Total simulations: {num_simulations}")
        if num_simulations > 0:
            valid_simulations = home_team_wins + away_team_wins
            if valid_simulations > 0: # Ensure at least one valid game
                print(f"{home_team_name_const} wins: {home_team_wins} (Win rate: {(home_team_wins/valid_simulations)*100:.2f}%)")
                print(f"{away_team_name_const} wins: {away_team_wins} (Win rate: {(away_team_wins/valid_simulations)*100:.2f}%)")
                ties = num_simulations - valid_simulations
                if ties > 0:
                     print(f"Ties (still tied after 9 innings or incomplete due to data issues): {ties} ({(ties/num_simulations)*100:.2f}%)")
            else:
                print("No valid game results to calculate win rates.")
        else:
            print("No simulations were conducted.")

Running 10000 simulations...
Completed 100/10000 simulations...
Completed 200/10000 simulations...
Completed 300/10000 simulations...
Completed 400/10000 simulations...
Completed 500/10000 simulations...
Completed 600/10000 simulations...
Completed 700/10000 simulations...
Completed 800/10000 simulations...
Completed 900/10000 simulations...
Completed 1000/10000 simulations...
Completed 1100/10000 simulations...
Completed 1200/10000 simulations...
Completed 1300/10000 simulations...
Completed 1400/10000 simulations...
Completed 1500/10000 simulations...
Completed 1600/10000 simulations...
Completed 1700/10000 simulations...
Completed 1800/10000 simulations...
Completed 1900/10000 simulations...
Completed 2000/10000 simulations...
Completed 2100/10000 simulations...
Completed 2200/10000 simulations...
Completed 2300/10000 simulations...
Completed 2400/10000 simulations...
Completed 2500/10000 simulations...
Completed 2600/10000 simulations...
Completed 2700/10000 simulations...
Complete