In [5]:
import pandas as pd
import numpy as np
import random

In [9]:
batter_data = pd.read_csv('API Output-Batters.csv')


# Calculate number of singles
batter_data["singles"] = (
    batter_data["hits"] - batter_data["doubles"] - batter_data["triples"] - batter_data["homeRuns"]
)

# Replace missing or zero hits to avoid division errors
batter_data["hits"] = batter_data["hits"].replace(0, np.nan)

# Calculate each as a percentage of total hits
batter_data["singles_pct"] = (batter_data["singles"] / batter_data["hits"]) * 100
batter_data["doubles_pct"] = (batter_data["doubles"] / batter_data["hits"]) * 100
batter_data["triples_pct"] = (batter_data["triples"] / batter_data["hits"]) * 100
batter_data["homeRuns_pct"] = (batter_data["homeRuns"] / batter_data["hits"]) * 100

# Fill NaN percentages with 0 if player has 0 hits
batter_data= batter_data.fillna(0)

# Round to one decimal place for readability
batter_data[["singles_pct", "doubles_pct", "triples_pct", "homeRuns_pct"]] = (
    batter_data[["singles_pct", "doubles_pct", "triples_pct", "homeRuns_pct"]].round(1))

batter_data['TeamName'].unique()
batter_data.columns



Index(['PlayerId', 'PlayerName', 'TeamName', 'Position', 'age', 'gamesPlayed',
       'groundOuts', 'airOuts', 'runs', 'doubles', 'triples', 'homeRuns',
       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',
       'avg', 'atBats', 'obp', 'slg', 'ops', 'caughtStealing', 'stolenBases',
       'stolenBasePercentage', 'caughtStealingPercentage',
       'groundIntoDoublePlay', 'numberOfPitches', 'plateAppearances',
       'totalBases', 'rbi', 'leftOnBase', 'sacBunts', 'sacFlies', 'babip',
       'groundOutsToAirouts', 'catchersInterference', 'atBatsPerHomeRun',
       'singles', 'singles_pct', 'doubles_pct', 'triples_pct', 'homeRuns_pct'],
      dtype='object')

In [12]:
team_names = ['Athletics', 'Pittsburgh Pirates', 'San Diego Padres',
       'Seattle Mariners', 'San Francisco Giants', 'St. Louis Cardinals',
       'Tampa Bay Rays', 'Texas Rangers', 'Toronto Blue Jays',
       'Minnesota Twins', 'Philadelphia Phillies', 'Atlanta Braves',
       'Chicago White Sox', 'Miami Marlins', 'New York Yankees',
       'Milwaukee Brewers', 'Los Angeles Angels', 'Arizona Diamondbacks',
       'Baltimore Orioles', 'Boston Red Sox', 'Chicago Cubs',
       'Cincinnati Reds', 'Cleveland Guardians', 'Colorado Rockies',
       'Detroit Tigers', 'Houston Astros', 'Kansas City Royals',
       'Los Angeles Dodgers', 'Washington Nationals', 'New York Mets']

team_dfs = {team: data for team, data in batter_data.groupby("TeamName")}


team_dfs["Milwaukee Brewers"]


Unnamed: 0,PlayerId,PlayerName,TeamName,Position,age,gamesPlayed,groundOuts,airOuts,runs,doubles,...,sacFlies,babip,groundOutsToAirouts,catchersInterference,atBatsPerHomeRun,singles,singles_pct,doubles_pct,triples_pct,homeRuns_pct
244,683734,Andrew Vaughn,Milwaukee Brewers,First Base,0.0,112.0,110.0,120.0,35.0,22.0,...,6.0,0.28,0.92,0.0,29.00,67.0,65.0,21.4,0.0,13.6
245,655316,Andruw Monasterio,Milwaukee Brewers,Shortstop,28.0,68.0,19.0,41.0,19.0,9.0,...,0.0,0.333,0.46,0.0,31.50,21.0,61.8,26.5,0.0,11.8
246,678011,Anthony Seigler,Milwaukee Brewers,Second Base,26.0,34.0,19.0,17.0,6.0,1.0,...,1.0,0.255,1.12,0.0,-.--,11.0,91.7,8.3,0.0,0.0
247,663368,Blake Perkins,Milwaukee Brewers,Outfielder,28.0,54.0,41.0,32.0,25.0,6.0,...,0.0,0.305,1.28,0.0,51.67,24.0,68.6,17.1,5.7,8.6
248,663604,Brandon Lockridge,Milwaukee Brewers,Outfielder,0.0,67.0,44.0,28.0,17.0,7.0,...,2.0,0.31,1.57,0.0,-.--,23.0,74.2,22.6,3.2,0.0
249,668930,Brice Turang,Milwaukee Brewers,Second Base,25.0,156.0,132.0,141.0,97.0,28.0,...,5.0,0.356,0.94,0.0,32.44,120.0,71.4,16.7,1.2,10.7
250,702332,Caleb Durbin,Milwaukee Brewers,Third Base,25.0,136.0,134.0,154.0,60.0,25.0,...,4.0,0.265,0.87,0.0,40.46,78.0,68.4,21.9,0.0,9.6
251,592885,Christian Yelich,Milwaukee Brewers,Outfielder,33.0,150.0,174.0,82.0,88.0,21.0,...,1.0,0.323,2.12,0.0,19.76,101.0,66.9,13.9,0.0,19.2
252,686555,Isaac Collins,Milwaukee Brewers,Outfielder,27.0,130.0,91.0,96.0,56.0,22.0,...,3.0,0.326,0.95,0.0,41.33,64.0,65.3,22.4,3.1,9.2
253,694192,Jackson Chourio,Milwaukee Brewers,Outfielder,21.0,131.0,129.0,158.0,88.0,35.0,...,6.0,0.308,0.82,0.0,26.14,88.0,59.5,23.6,2.7,14.2


In [8]:
pitcher_data = pd.read_csv('API Output-Pitchers.csv')

pitcher_data.columns


Index(['PlayerId', 'PlayerName', 'TeamName', 'Position', 'age', 'gamesPlayed',
       'gamesStarted', 'groundOuts', 'airOuts', 'runs', 'doubles', 'triples',
       'homeRuns', 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits',
       'hitByPitch', 'avg', 'atBats', 'obp', 'slg', 'ops', 'caughtStealing',
       'stolenBases', 'stolenBasePercentage', 'caughtStealingPercentage',
       'groundIntoDoublePlay', 'numberOfPitches', 'era', 'inningsPitched',
       'wins', 'losses', 'saves', 'saveOpportunities', 'holds', 'blownSaves',
       'earnedRuns', 'whip', 'battersFaced', 'outs', 'gamesPitched',
       'completeGames', 'shutouts', 'strikes', 'strikePercentage',
       'hitBatsmen', 'balks', 'wildPitches', 'pickoffs', 'totalBases',
       'groundOutsToAirouts', 'winPercentage', 'pitchesPerInning',
       'gamesFinished', 'strikeoutWalkRatio', 'strikeoutsPer9Inn',
       'walksPer9Inn', 'hitsPer9Inn', 'runsScoredPer9', 'homeRunsPer9',
       'inheritedRunners', 'inheritedRunnersScored

In [25]:
# import pandas as pd
# import numpy as np
# from typing import Dict, List, Tuple
# from dataclasses import dataclass
# from collections import defaultdict

# @dataclass
# class GameState:
#     """Track the current state of the game"""
#     inning: int = 1
#     outs: int = 0
#     bases: List[bool] = None  # [1st, 2nd, 3rd]
#     home_score: int = 0
#     away_score: int = 0
    
#     def __post_init__(self):
#         if self.bases is None:
#             self.bases = [False, False, False]
    
#     def clear_bases(self):
#         self.bases = [False, False, False]
    
#     def advance_runners(self, bases_advanced: int):
#         """Advance runners and return runs scored"""
#         runs = 0
#         # Go backwards through bases to avoid double-counting
#         for i in range(2, -1, -1):
#             if self.bases[i]:
#                 new_base = i + bases_advanced
#                 if new_base >= 3:
#                     runs += 1
#                     self.bases[i] = False
#                 else:
#                     self.bases[new_base] = True
#                     self.bases[i] = False
#         return runs

# @dataclass
# class PlayerGameStats:
#     """Track individual player stats during simulation"""
#     player_name: str
#     at_bats: int = 0
#     hits: int = 0
#     singles: int = 0
#     doubles: int = 0
#     triples: int = 0
#     home_runs: int = 0
#     walks: int = 0
#     strikeouts: int = 0
#     rbis: int = 0
#     runs: int = 0

# class BaseballGameSimulator:
#     def __init__(self, pitcher_df: pd.DataFrame, batter_df: pd.DataFrame):
#         self.pitcher_df = pitcher_df
#         self.batter_df = batter_df
        
#     def get_pitcher_stats(self, pitcher_name: str) -> Dict:
#         """Extract relevant pitcher stats by name"""
#         pitcher_matches = self.pitcher_df.loc[self.pitcher_df['PlayerName'] == pitcher_name]
        
#         if len(pitcher_matches) == 0:
#             raise ValueError(f"Pitcher '{pitcher_name}' not found in pitcher dataframe")
        
#         pitcher = pitcher_matches.iloc[0]
        
#         # Calculate per-batter-faced probabilities
#         bf = pitcher['battersFaced']
#         if bf == 0:
#             bf = 1  # Avoid division by zero
            
#         return {
#             'name': pitcher['PlayerName'],
#             'k_rate': pitcher['strikeOuts'] / bf,
#             'bb_rate': pitcher['baseOnBalls'] / bf,
#             'hit_rate': pitcher['hits'] / bf,
#             'hr_rate': pitcher['homeRuns'] / bf,
#             'double_rate': pitcher['doubles'] / bf,
#             'triple_rate': pitcher['triples'] / bf,
#         }
    
#     def get_batter_stats(self, batter_name: str) -> Dict:
#         """Extract relevant batter stats by name"""
#         batter_matches = self.batter_df.loc[self.batter_df['PlayerName'] == batter_name]
        
#         if len(batter_matches) == 0:
#             raise ValueError(f"Batter '{batter_name}' not found in batter dataframe")
        
#         batter = batter_matches.iloc[0]
        
#         # Calculate probabilities
#         pa = batter['plateAppearances']
#         if pa == 0:
#             pa = 1
            
#         return {
#             'name': batter['PlayerName'],
#             'k_rate': batter['strikeOuts'] / pa,
#             'bb_rate': batter['baseOnBalls'] / pa,
#             'hit_rate': batter['hits'] / pa,
#             'hr_rate': batter['homeRuns'] / pa,
#             'double_rate': batter['doubles'] / pa,
#             'triple_rate': batter['triples'] / pa,
#             'single_rate': batter.get('singles', 0) / pa if 'singles' in batter else 0,
#         }
    
#     def simulate_plate_appearance(self, batter_stats: Dict, pitcher_stats: Dict) -> Tuple[str, int]:
#         """
#         Simulate a single plate appearance
#         Returns: (outcome, bases_advanced)
#         Outcomes: 'K', 'BB', 'single', 'double', 'triple', 'HR', 'out'
#         """
#         # Blend batter and pitcher stats (60% batter, 40% pitcher weight)
#         k_prob = 0.6 * batter_stats['k_rate'] + 0.4 * pitcher_stats['k_rate']
#         bb_prob = 0.6 * batter_stats['bb_rate'] + 0.4 * pitcher_stats['bb_rate']
#         hr_prob = 0.6 * batter_stats['hr_rate'] + 0.4 * pitcher_stats['hr_rate']
#         double_prob = 0.6 * batter_stats['double_rate'] + 0.4 * pitcher_stats['double_rate']
#         triple_prob = 0.6 * batter_stats['triple_rate'] + 0.4 * pitcher_stats['triple_rate']
        
#         # Single rate (hits minus extra base hits)
#         single_prob = (0.6 * batter_stats['hit_rate'] + 0.4 * pitcher_stats['hit_rate']) - \
#                       (hr_prob + double_prob + triple_prob)
#         single_prob = max(0, single_prob)  # Can't be negative
        
#         # Remaining probability is outs (includes ground outs, fly outs, etc.)
#         total_prob = k_prob + bb_prob + hr_prob + double_prob + triple_prob + single_prob
#         out_prob = max(0, 1 - total_prob)
        
#         # Simulate outcome
#         rand = np.random.random()
#         cumulative = 0
        
#         cumulative += k_prob
#         if rand < cumulative:
#             return 'K', 0
        
#         cumulative += bb_prob
#         if rand < cumulative:
#             return 'BB', 1
        
#         cumulative += hr_prob
#         if rand < cumulative:
#             return 'HR', 4
        
#         cumulative += triple_prob
#         if rand < cumulative:
#             return 'triple', 3
        
#         cumulative += double_prob
#         if rand < cumulative:
#             return 'double', 2
        
#         cumulative += single_prob
#         if rand < cumulative:
#             return 'single', 1
        
#         return 'out', 0
    
#     def simulate_half_inning(self, batting_lineup: List[str], pitcher_name: str, 
#                             batter_idx: int, game_state: GameState, 
#                             player_stats: Dict[str, PlayerGameStats]) -> Tuple[int, int]:
#         """
#         Simulate a half inning
#         Returns: (runs_scored, next_batter_idx)
#         """
#         runs = 0
#         outs = 0
#         game_state.clear_bases()
        
#         pitcher_stats = self.get_pitcher_stats(pitcher_name)
        
#         while outs < 3:
#             batter_name = batting_lineup[batter_idx]
#             batter_stats = self.get_batter_stats(batter_name)
            
#             # Get or create player stats
#             if batter_name not in player_stats:
#                 player_stats[batter_name] = PlayerGameStats(
#                     player_name=batter_name
#                 )
            
#             outcome, bases_advanced = self.simulate_plate_appearance(batter_stats, pitcher_stats)
            
#             if outcome == 'K':
#                 outs += 1
#                 player_stats[batter_name].at_bats += 1
#                 player_stats[batter_name].strikeouts += 1
                
#             elif outcome == 'out':
#                 outs += 1
#                 player_stats[batter_name].at_bats += 1
                
#             elif outcome == 'BB':
#                 player_stats[batter_name].walks += 1
#                 # Walk advances runners if bases loaded
#                 if all(game_state.bases):
#                     runs += 1
#                     player_stats[batter_name].rbis += 1
#                 else:
#                     # Walk with force
#                     if game_state.bases[0]:
#                         if game_state.bases[1]:
#                             if game_state.bases[2]:
#                                 runs += 1
#                                 player_stats[batter_name].rbis += 1
#                             else:
#                                 game_state.bases[2] = True
#                         else:
#                             game_state.bases[1] = True
#                     game_state.bases[0] = True
                    
#             elif outcome == 'HR':
#                 player_stats[batter_name].at_bats += 1
#                 player_stats[batter_name].hits += 1
#                 player_stats[batter_name].home_runs += 1
#                 # Count runners on base
#                 runners_on = sum(game_state.bases)
#                 runs += runners_on + 1  # Runners plus batter
#                 player_stats[batter_name].rbis += runners_on + 1
#                 player_stats[batter_name].runs += 1
#                 game_state.clear_bases()
                
#             elif outcome in ['single', 'double', 'triple']:
#                 player_stats[batter_name].at_bats += 1
#                 player_stats[batter_name].hits += 1
                
#                 if outcome == 'single':
#                     player_stats[batter_name].singles += 1
#                 elif outcome == 'double':
#                     player_stats[batter_name].doubles += 1
#                 else:
#                     player_stats[batter_name].triples += 1
                
#                 # Advance runners
#                 runs_scored = game_state.advance_runners(bases_advanced)
#                 runs += runs_scored
#                 player_stats[batter_name].rbis += runs_scored
                
#                 # Place batter on base
#                 game_state.bases[bases_advanced - 1] = True
            
#             # Move to next batter
#             batter_idx = (batter_idx + 1) % len(batting_lineup)
        
#         return runs, batter_idx
    
#     def simulate_game(self, away_lineup: List[str], away_pitcher: str,
#                      home_lineup: List[str], home_pitcher: str,
#                      innings: int = 9) -> Dict:
#         """
#         Simulate a complete game
#         Returns dictionary with game results
#         """
#         game_state = GameState()
#         away_batter_idx = 0
#         home_batter_idx = 0
        
#         # Track player stats
#         player_stats = {}
        
#         inning_scores = {'away': [], 'home': []}
        
#         # Simulate regulation innings
#         for inning in range(1, innings + 1):
#             # Away team bats (top of inning)
#             runs, away_batter_idx = self.simulate_half_inning(
#                 away_lineup, home_pitcher, away_batter_idx, game_state, player_stats
#             )
#             game_state.away_score += runs
#             inning_scores['away'].append(runs)
            
#             # Home team bats (bottom of inning)
#             # Don't bat in bottom of 9th if already winning
#             if inning == innings and game_state.home_score > game_state.away_score:
#                 inning_scores['home'].append(0)
#                 break
                
#             runs, home_batter_idx = self.simulate_half_inning(
#                 home_lineup, away_pitcher, home_batter_idx, game_state, player_stats
#             )
#             game_state.home_score += runs
#             inning_scores['home'].append(runs)
            
#             # Check for walk-off
#             if inning >= innings and game_state.home_score > game_state.away_score:
#                 break
        
#         # Extra innings if tied
#         extra_inning = innings + 1
#         while game_state.away_score == game_state.home_score:
#             # Away team
#             runs, away_batter_idx = self.simulate_half_inning(
#                 away_lineup, home_pitcher, away_batter_idx, game_state, player_stats
#             )
#             game_state.away_score += runs
#             inning_scores['away'].append(runs)
            
#             # Home team
#             runs, home_batter_idx = self.simulate_half_inning(
#                 home_lineup, away_pitcher, home_batter_idx, game_state, player_stats
#             )
#             game_state.home_score += runs
#             inning_scores['home'].append(runs)
            
#             if game_state.home_score > game_state.away_score:
#                 break
                
#             extra_inning += 1
#             if extra_inning > 15:  # Safety limit
#                 break
        
#         # Calculate total strikeouts
#         total_strikeouts = sum(ps.strikeouts for ps in player_stats.values())
        
#         return {
#             'away_score': game_state.away_score,
#             'home_score': game_state.home_score,
#             'winner': 'home' if game_state.home_score > game_state.away_score else 'away',
#             'total_strikeouts': total_strikeouts,
#             'inning_scores': inning_scores,
#             'player_stats': player_stats,
#             'innings_played': len(inning_scores['away'])
#         }
    
#     def simulate_multiple_games(self, away_lineup: List[str], away_pitcher: str,
#                                home_lineup: List[str], home_pitcher: str,
#                                n_simulations: int = 1000) -> Dict:
#         """
#         Simulate multiple games and aggregate results
#         """
#         results = {
#             'away_wins': 0,
#             'home_wins': 0,
#             'away_scores': [],
#             'home_scores': [],
#             'total_strikeouts': [],
#             'player_hit_frequency': defaultdict(int),
#             'player_avg_stats': defaultdict(lambda: {
#                 'at_bats': 0, 'hits': 0, 'hr': 0, 'rbi': 0, 'k': 0
#             })
#         }
        
#         for _ in range(n_simulations):
#             game_result = self.simulate_game(away_lineup, away_pitcher, 
#                                             home_lineup, home_pitcher)
            
#             # Aggregate results
#             results['away_scores'].append(game_result['away_score'])
#             results['home_scores'].append(game_result['home_score'])
#             results['total_strikeouts'].append(game_result['total_strikeouts'])
            
#             if game_result['winner'] == 'home':
#                 results['home_wins'] += 1
#             else:
#                 results['away_wins'] += 1
            
#             # Track player stats
#             for player_name, stats in game_result['player_stats'].items():
#                 if stats.hits > 0:
#                     results['player_hit_frequency'][player_name] += 1
                
#                 results['player_avg_stats'][player_name]['at_bats'] += stats.at_bats
#                 results['player_avg_stats'][player_name]['hits'] += stats.hits
#                 results['player_avg_stats'][player_name]['hr'] += stats.home_runs
#                 results['player_avg_stats'][player_name]['rbi'] += stats.rbis
#                 results['player_avg_stats'][player_name]['k'] += stats.strikeouts
        
#         # Calculate averages
#         results['away_win_pct'] = results['away_wins'] / n_simulations
#         results['home_win_pct'] = results['home_wins'] / n_simulations
#         results['avg_away_score'] = np.mean(results['away_scores'])
#         results['avg_home_score'] = np.mean(results['home_scores'])
#         results['avg_total_strikeouts'] = np.mean(results['total_strikeouts'])
        
#         # Convert player stats to averages
#         for player_name in results['player_avg_stats']:
#             for stat in results['player_avg_stats'][player_name]:
#                 results['player_avg_stats'][player_name][stat] /= n_simulations
        
#         return results
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
from dataclasses import dataclass
from collections import defaultdict

@dataclass
class GameState:
    """Track the current state of the game"""
    inning: int = 1
    outs: int = 0
    bases: List[bool] = None  # [1st, 2nd, 3rd]
    home_score: int = 0
    away_score: int = 0
    
    def __post_init__(self):
        if self.bases is None:
            self.bases = [False, False, False]
    
    def clear_bases(self):
        self.bases = [False, False, False]
    
    def advance_runners(self, bases_advanced: int):
        """Advance runners and return runs scored"""
        runs = 0
        # Go backwards through bases to avoid double-counting
        for i in range(2, -1, -1):
            if self.bases[i]:
                new_base = i + bases_advanced
                if new_base >= 3:
                    runs += 1
                    self.bases[i] = False
                else:
                    self.bases[new_base] = True
                    self.bases[i] = False
        return runs

@dataclass
class PlayerGameStats:
    """Track individual player stats during simulation"""
    player_name: str
    at_bats: int = 0
    hits: int = 0
    singles: int = 0
    doubles: int = 0
    triples: int = 0
    home_runs: int = 0
    walks: int = 0
    strikeouts: int = 0
    rbis: int = 0
    runs: int = 0

@dataclass
class PitcherGameStats:
    """Track pitcher stats during simulation"""
    pitcher_name: str
    strikeouts: int = 0
    hits_allowed: int = 0
    walks_allowed: int = 0
    runs_allowed: int = 0
    pitches_thrown: int = 0

class BaseballGameSimulator:
    def __init__(self, pitcher_df: pd.DataFrame, batter_df: pd.DataFrame):
        self.pitcher_df = pitcher_df
        self.batter_df = batter_df
        
    def get_pitcher_stats(self, pitcher_name: str) -> Dict:
        """Extract relevant pitcher stats by name"""
        pitcher_matches = self.pitcher_df.loc[self.pitcher_df['PlayerName'] == pitcher_name]
        
        if len(pitcher_matches) == 0:
            raise ValueError(f"Pitcher '{pitcher_name}' not found in pitcher dataframe")
        
        pitcher = pitcher_matches.iloc[0]
        
        # Calculate per-batter-faced probabilities
        bf = pitcher['battersFaced']
        if bf == 0:
            bf = 1  # Avoid division by zero
            
        return {
            'name': pitcher['PlayerName'],
            'k_rate': pitcher['strikeOuts'] / bf,
            'bb_rate': pitcher['baseOnBalls'] / bf,
            'hit_rate': pitcher['hits'] / bf,
            'hr_rate': pitcher['homeRuns'] / bf,
            'double_rate': pitcher['doubles'] / bf,
            'triple_rate': pitcher['triples'] / bf,
        }
    
    def get_batter_stats(self, batter_name: str) -> Dict:
        """Extract relevant batter stats by name"""
        batter_matches = self.batter_df.loc[self.batter_df['PlayerName'] == batter_name]
        
        if len(batter_matches) == 0:
            raise ValueError(f"Batter '{batter_name}' not found in batter dataframe")
        
        batter = batter_matches.iloc[0]
        
        # Calculate probabilities
        pa = batter['plateAppearances']
        if pa == 0:
            pa = 1
            
        return {
            'name': batter['PlayerName'],
            'k_rate': batter['strikeOuts'] / pa,
            'bb_rate': batter['baseOnBalls'] / pa,
            'hit_rate': batter['hits'] / pa,
            'hr_rate': batter['homeRuns'] / pa,
            'double_rate': batter['doubles'] / pa,
            'triple_rate': batter['triples'] / pa,
            'single_rate': batter.get('singles', 0) / pa if 'singles' in batter else 0,
        }
    
    def simulate_plate_appearance(self, batter_stats: Dict, pitcher_stats: Dict) -> Tuple[str, int]:
        """
        Simulate a single plate appearance
        Returns: (outcome, bases_advanced)
        Outcomes: 'K', 'BB', 'single', 'double', 'triple', 'HR', 'out'
        """
        # Blend batter and pitcher stats (60% batter, 40% pitcher weight)
        k_prob = 0.6 * batter_stats['k_rate'] + 0.4 * pitcher_stats['k_rate']
        bb_prob = 0.6 * batter_stats['bb_rate'] + 0.4 * pitcher_stats['bb_rate']
        hr_prob = 0.6 * batter_stats['hr_rate'] + 0.4 * pitcher_stats['hr_rate']
        double_prob = 0.6 * batter_stats['double_rate'] + 0.4 * pitcher_stats['double_rate']
        triple_prob = 0.6 * batter_stats['triple_rate'] + 0.4 * pitcher_stats['triple_rate']
        
        # Single rate (hits minus extra base hits)
        single_prob = (0.6 * batter_stats['hit_rate'] + 0.4 * pitcher_stats['hit_rate']) - \
                      (hr_prob + double_prob + triple_prob)
        single_prob = max(0, single_prob)  # Can't be negative
        
        # Remaining probability is outs (includes ground outs, fly outs, etc.)
        total_prob = k_prob + bb_prob + hr_prob + double_prob + triple_prob + single_prob
        out_prob = max(0, 1 - total_prob)
        
        # Simulate outcome
        rand = np.random.random()
        cumulative = 0
        
        cumulative += k_prob
        if rand < cumulative:
            return 'K', 0
        
        cumulative += bb_prob
        if rand < cumulative:
            return 'BB', 1
        
        cumulative += hr_prob
        if rand < cumulative:
            return 'HR', 4
        
        cumulative += triple_prob
        if rand < cumulative:
            return 'triple', 3
        
        cumulative += double_prob
        if rand < cumulative:
            return 'double', 2
        
        cumulative += single_prob
        if rand < cumulative:
            return 'single', 1
        
        return 'out', 0
    
    def simulate_half_inning(self, batting_lineup: List[str], pitcher_name: str, 
                            batter_idx: int, game_state: GameState, 
                            player_stats: Dict[str, PlayerGameStats],
                            pitcher_stats_tracker: Dict[str, PitcherGameStats]) -> Tuple[int, int]:
        """
        Simulate a half inning
        Returns: (runs_scored, next_batter_idx)
        """
        runs = 0
        outs = 0
        game_state.clear_bases()
        
        pitcher_stats = self.get_pitcher_stats(pitcher_name)
        
        # Initialize pitcher stats tracker if needed
        if pitcher_name not in pitcher_stats_tracker:
            pitcher_stats_tracker[pitcher_name] = PitcherGameStats(pitcher_name=pitcher_name)
        
        while outs < 3:
            batter_name = batting_lineup[batter_idx]
            batter_stats = self.get_batter_stats(batter_name)
            
            # Get or create player stats
            if batter_name not in player_stats:
                player_stats[batter_name] = PlayerGameStats(
                    player_name=batter_name
                )
            
            outcome, bases_advanced = self.simulate_plate_appearance(batter_stats, pitcher_stats)
            
            # Track pitcher stats
            pitcher_stats_tracker[pitcher_name].pitches_thrown += 1
            
            if outcome == 'K':
                outs += 1
                player_stats[batter_name].at_bats += 1
                player_stats[batter_name].strikeouts += 1
                pitcher_stats_tracker[pitcher_name].strikeouts += 1
                
            elif outcome == 'out':
                outs += 1
                player_stats[batter_name].at_bats += 1
                
            elif outcome == 'BB':
                player_stats[batter_name].walks += 1
                pitcher_stats_tracker[pitcher_name].walks_allowed += 1
                # Walk advances runners if bases loaded
                if all(game_state.bases):
                    runs += 1
                    player_stats[batter_name].rbis += 1
                else:
                    # Walk with force
                    if game_state.bases[0]:
                        if game_state.bases[1]:
                            if game_state.bases[2]:
                                runs += 1
                                player_stats[batter_name].rbis += 1
                            else:
                                game_state.bases[2] = True
                        else:
                            game_state.bases[1] = True
                    game_state.bases[0] = True
                    
            elif outcome == 'HR':
                player_stats[batter_name].at_bats += 1
                player_stats[batter_name].hits += 1
                player_stats[batter_name].home_runs += 1
                pitcher_stats_tracker[pitcher_name].hits_allowed += 1
                # Count runners on base
                runners_on = sum(game_state.bases)
                runs += runners_on + 1  # Runners plus batter
                player_stats[batter_name].rbis += runners_on + 1
                player_stats[batter_name].runs += 1
                game_state.clear_bases()
                
            elif outcome in ['single', 'double', 'triple']:
                player_stats[batter_name].at_bats += 1
                player_stats[batter_name].hits += 1
                pitcher_stats_tracker[pitcher_name].hits_allowed += 1
                
                if outcome == 'single':
                    player_stats[batter_name].singles += 1
                elif outcome == 'double':
                    player_stats[batter_name].doubles += 1
                else:
                    player_stats[batter_name].triples += 1
                
                # Advance runners
                runs_scored = game_state.advance_runners(bases_advanced)
                runs += runs_scored
                player_stats[batter_name].rbis += runs_scored
                
                # Place batter on base
                game_state.bases[bases_advanced - 1] = True
            
            # Move to next batter
            batter_idx = (batter_idx + 1) % len(batting_lineup)
        
        # Track runs allowed by pitcher
        pitcher_stats_tracker[pitcher_name].runs_allowed += runs
        
        return runs, batter_idx
    
    def simulate_game(self, away_lineup: List[str], away_pitcher: str,
                     home_lineup: List[str], home_pitcher: str,
                     innings: int = 9) -> Dict:
        """
        Simulate a complete game
        Returns dictionary with game results
        """
        game_state = GameState()
        away_batter_idx = 0
        home_batter_idx = 0
        
        # Track player stats
        player_stats = {}
        pitcher_stats_tracker = {}
        
        inning_scores = {'away': [], 'home': []}
        
        # Simulate regulation innings
        for inning in range(1, innings + 1):
            # Away team bats (top of inning)
            runs, away_batter_idx = self.simulate_half_inning(
                away_lineup, home_pitcher, away_batter_idx, game_state, player_stats, pitcher_stats_tracker
            )
            game_state.away_score += runs
            inning_scores['away'].append(runs)
            
            # Home team bats (bottom of inning)
            # Don't bat in bottom of 9th if already winning
            if inning == innings and game_state.home_score > game_state.away_score:
                inning_scores['home'].append(0)
                break
                
            runs, home_batter_idx = self.simulate_half_inning(
                home_lineup, away_pitcher, home_batter_idx, game_state, player_stats, pitcher_stats_tracker
            )
            game_state.home_score += runs
            inning_scores['home'].append(runs)
            
            # Check for walk-off
            if inning >= innings and game_state.home_score > game_state.away_score:
                break
        
        # Extra innings if tied
        extra_inning = innings + 1
        while game_state.away_score == game_state.home_score:
            # Away team
            runs, away_batter_idx = self.simulate_half_inning(
                away_lineup, home_pitcher, away_batter_idx, game_state, player_stats, pitcher_stats_tracker
            )
            game_state.away_score += runs
            inning_scores['away'].append(runs)
            
            # Home team
            runs, home_batter_idx = self.simulate_half_inning(
                home_lineup, away_pitcher, home_batter_idx, game_state, player_stats, pitcher_stats_tracker
            )
            game_state.home_score += runs
            inning_scores['home'].append(runs)
            
            if game_state.home_score > game_state.away_score:
                break
                
            extra_inning += 1
            if extra_inning > 15:  # Safety limit
                break
        
        # Calculate total strikeouts
        total_strikeouts = sum(ps.strikeouts for ps in player_stats.values())
        
        return {
            'away_score': game_state.away_score,
            'home_score': game_state.home_score,
            'winner': 'home' if game_state.home_score > game_state.away_score else 'away',
            'total_strikeouts': total_strikeouts,
            'inning_scores': inning_scores,
            'player_stats': player_stats,
            'pitcher_stats': pitcher_stats_tracker,
            'innings_played': len(inning_scores['away'])
        }
    
    def simulate_multiple_games(self, away_lineup: List[str], away_pitcher: str,
                               home_lineup: List[str], home_pitcher: str,
                               n_simulations: int = 1000) -> Dict:
        """
        Simulate multiple games and aggregate results
        """
        results = {
            'away_wins': 0,
            'home_wins': 0,
            'away_scores': [],
            'home_scores': [],
            'total_strikeouts': [],
            'player_hit_frequency': defaultdict(int),
            'player_avg_stats': defaultdict(lambda: {
                'at_bats': 0, 'hits': 0, 'hr': 0, 'rbi': 0, 'k': 0
            }),
            'pitcher_avg_stats': defaultdict(lambda: {
                'strikeouts': 0, 'hits_allowed': 0, 'walks_allowed': 0, 'runs_allowed': 0
            })
        }
        
        for _ in range(n_simulations):
            game_result = self.simulate_game(away_lineup, away_pitcher, 
                                            home_lineup, home_pitcher)
            
            # Aggregate results
            results['away_scores'].append(game_result['away_score'])
            results['home_scores'].append(game_result['home_score'])
            results['total_strikeouts'].append(game_result['total_strikeouts'])
            
            if game_result['winner'] == 'home':
                results['home_wins'] += 1
            else:
                results['away_wins'] += 1
            
            # Track player stats
            for player_name, stats in game_result['player_stats'].items():
                if stats.hits > 0:
                    results['player_hit_frequency'][player_name] += 1
                
                results['player_avg_stats'][player_name]['at_bats'] += stats.at_bats
                results['player_avg_stats'][player_name]['hits'] += stats.hits
                results['player_avg_stats'][player_name]['hr'] += stats.home_runs
                results['player_avg_stats'][player_name]['rbi'] += stats.rbis
                results['player_avg_stats'][player_name]['k'] += stats.strikeouts
            
            # Track pitcher stats
            for pitcher_name, stats in game_result['pitcher_stats'].items():
                results['pitcher_avg_stats'][pitcher_name]['strikeouts'] += stats.strikeouts
                results['pitcher_avg_stats'][pitcher_name]['hits_allowed'] += stats.hits_allowed
                results['pitcher_avg_stats'][pitcher_name]['walks_allowed'] += stats.walks_allowed
                results['pitcher_avg_stats'][pitcher_name]['runs_allowed'] += stats.runs_allowed
        
        # Calculate averages
        results['away_win_pct'] = results['away_wins'] / n_simulations
        results['home_win_pct'] = results['home_wins'] / n_simulations
        results['avg_away_score'] = np.mean(results['away_scores'])
        results['avg_home_score'] = np.mean(results['home_scores'])
        results['avg_total_strikeouts'] = np.mean(results['total_strikeouts'])
        
        # Convert player stats to averages
        for player_name in results['player_avg_stats']:
            for stat in results['player_avg_stats'][player_name]:
                results['player_avg_stats'][player_name][stat] /= n_simulations
        
        # Convert pitcher stats to averages
        for pitcher_name in results['pitcher_avg_stats']:
            for stat in results['pitcher_avg_stats'][pitcher_name]:
                results['pitcher_avg_stats'][pitcher_name][stat] /= n_simulations
        
        return results



In [27]:

# Example usage with player names:
simulator = BaseballGameSimulator(pitcher_data, batter_data)

away_lineup = ['Lawrence Butler', 'Brent Rooker', 'Nick Kurtz', 
               'Tyler Soderstrom', 'Jacob Wilson', 'Carlos Cortes',
               'Darell Hernaiz', 'Colby Thomas', 'Max Schuemann']

home_lineup = ['Oneil Cruz', 'Jared Triolo', 'Bryan Reynolds',
               'Spencer Horwitz', 'Alexander Canario', 'Nick Gonzales',
               'Jack Suwinski', 'Joey Bart', 'Nick Yorke']

away_pitcher = 'Mitch Spence'
home_pitcher = 'Carmen Mlodzinski'


results = simulator.simulate_multiple_games(
    away_lineup=away_lineup,
    away_pitcher=away_pitcher,
    home_lineup=home_lineup,
    home_pitcher=home_pitcher,
    n_simulations=1000
)

print(f"Home team wins: {results['home_win_pct']:.1%}")
print(f"Away team wins: {results['away_win_pct']:.1%}")
print(f"Average home score: {results['avg_home_score']:.2f}")
print(f"Average away score: {results['avg_away_score']:.2f}")
print(f"Average strikeouts: {results['avg_total_strikeouts']:.1f}")

print("\nPitcher Stats:")
for pitcher_name, stats in results['pitcher_avg_stats'].items():
    print(f"\n{pitcher_name}:")
    print(f"  Avg Strikeouts: {stats['strikeouts']:.2f}")
    print(f"  Avg Hits Allowed: {stats['hits_allowed']:.2f}")
    print(f"  Avg Walks Allowed: {stats['walks_allowed']:.2f}")
    print(f"  Avg Runs Allowed: {stats['runs_allowed']:.2f}")


Home team wins: 47.1%
Away team wins: 52.9%
Average home score: 3.22
Average away score: 3.69
Average strikeouts: 17.4

Pitcher Stats:

Carmen Mlodzinski:
  Avg Strikeouts: 8.72
  Avg Hits Allowed: 9.93
  Avg Walks Allowed: 3.21
  Avg Runs Allowed: 3.69

Mitch Spence:
  Avg Strikeouts: 8.68
  Avg Hits Allowed: 8.66
  Avg Walks Allowed: 3.22
  Avg Runs Allowed: 3.22
