In [1]:
# Import dependencies

import pandas as pd
import numpy as np

In [2]:
# Read in CSV files from FanGraphs (exported directly from site daily)

# Files contain pitching and hitting stats from two different time periods:
    # - Beginning of 2023 season - present ("year")
    # - Beginning of 2024 season - present ("season")
    
# Filtered stats include:
    # - Pitching stats vs. left- and right-handed batters the first time through the order
    # - Batting stats vs. left- and right-handed pitchers
    
pitching_vs_left_year_df = pd.read_csv('pitching_vs_left_year.csv')
pitching_vs_right_year_df = pd.read_csv('pitching_vs_right_year.csv')
pitching_vs_left_season_df = pd.read_csv('pitching_vs_left_season.csv')
pitching_vs_right_season_df = pd.read_csv('pitching_vs_right_season.csv')
batting_vs_left_year_df = pd.read_csv('batting_vs_left_year.csv')
batting_vs_right_year_df = pd.read_csv('batting_vs_right_year.csv')
batting_vs_left_season_df = pd.read_csv('batting_vs_left_season.csv')
batting_vs_right_season_df = pd.read_csv('batting_vs_right_season.csv')

# MLB season CSV contains league-wide stats from 2024 season to later compare K rate with home plate umpire's career K rate

mlb_season_df = pd.read_csv('MLB_season.csv')

In [3]:
# Create new columns:
    # - K Rate column for both pitchers and batters
    # - Opponent on-base percentage column for pitchers
    # - On-base percentage column for batters
    # - Singles, doubles, triples and home run rate columns for both pitchers and batters
    # - Walk rate column for both pitchers and batters

pitching_vs_left_year_df['K_Rate'] = pitching_vs_left_year_df['SO'] / pitching_vs_left_year_df['TBF']
pitching_vs_right_year_df['K_Rate'] = pitching_vs_right_year_df['SO'] / pitching_vs_right_year_df['TBF']
pitching_vs_left_season_df['K_Rate'] = pitching_vs_left_season_df['SO'] / pitching_vs_left_season_df['TBF']
pitching_vs_right_season_df['K_Rate'] = pitching_vs_right_season_df['SO'] / pitching_vs_right_season_df['TBF']

pitching_vs_left_year_df['Opp_OBP'] = (pitching_vs_left_year_df['BB'] + pitching_vs_left_year_df['H']) / pitching_vs_left_year_df['TBF']
pitching_vs_right_year_df['Opp_OBP'] = (pitching_vs_right_year_df['BB'] + pitching_vs_right_year_df['H']) / pitching_vs_right_year_df['TBF']
pitching_vs_left_season_df['Opp_OBP'] = (pitching_vs_left_season_df['BB'] + pitching_vs_left_season_df['H']) / pitching_vs_left_season_df['TBF']
pitching_vs_right_season_df['Opp_OBP'] = (pitching_vs_right_season_df['BB'] + pitching_vs_right_season_df['H']) / pitching_vs_right_season_df['TBF']

pitching_vs_left_year_df['Opp_1B'] = (pitching_vs_left_year_df['H'] - (pitching_vs_left_year_df['2B'] + pitching_vs_left_year_df['3B'] + pitching_vs_left_year_df['HR'])) / pitching_vs_left_year_df['TBF']
pitching_vs_right_year_df['Opp_1B'] = (pitching_vs_right_year_df['H'] - (pitching_vs_right_year_df['2B'] + pitching_vs_right_year_df['3B'] + pitching_vs_right_year_df['HR'])) / pitching_vs_right_year_df['TBF']
pitching_vs_left_season_df['Opp_1B'] = (pitching_vs_left_season_df['H'] - (pitching_vs_left_season_df['2B'] + pitching_vs_left_season_df['3B'] + pitching_vs_left_season_df['HR'])) / pitching_vs_left_season_df['TBF']
pitching_vs_right_season_df['Opp_1B'] = (pitching_vs_right_season_df['H'] - (pitching_vs_right_season_df['2B'] + pitching_vs_right_season_df['3B'] + pitching_vs_right_season_df['HR'])) / pitching_vs_right_season_df['TBF']

batting_vs_left_year_df['K_Rate'] = batting_vs_left_year_df['SO'] / batting_vs_left_year_df['PA']
batting_vs_right_year_df['K_Rate'] = batting_vs_right_year_df['SO'] / batting_vs_right_year_df['PA']
batting_vs_right_season_df['K_Rate'] = batting_vs_right_season_df['SO'] / batting_vs_right_season_df['PA']
batting_vs_left_season_df['K_Rate'] = batting_vs_left_season_df['SO'] / batting_vs_left_season_df['PA']

batting_vs_left_year_df['OBP'] = (batting_vs_left_year_df['H'] + batting_vs_left_year_df['BB'] + batting_vs_left_year_df['HBP']) / batting_vs_left_year_df['PA']
batting_vs_right_year_df['OBP'] = (batting_vs_right_year_df['H'] + batting_vs_right_year_df['BB'] + batting_vs_right_year_df['HBP']) / batting_vs_right_year_df['PA']
batting_vs_right_season_df['OBP'] = (batting_vs_right_season_df['H'] + batting_vs_right_season_df['BB'] + batting_vs_right_season_df['HBP']) / batting_vs_right_season_df['PA']
batting_vs_left_season_df['OBP'] = (batting_vs_left_season_df['H'] + batting_vs_left_season_df['BB'] + batting_vs_left_season_df['HBP']) / batting_vs_left_season_df['PA']

batting_vs_left_year_df['1B_Rate'] = batting_vs_left_year_df['1B'] / batting_vs_left_year_df['PA']
batting_vs_right_year_df['1B_Rate'] = batting_vs_right_year_df['1B'] / batting_vs_right_year_df['PA']
batting_vs_right_season_df['1B_Rate'] = batting_vs_right_season_df['1B'] / batting_vs_right_season_df['PA']
batting_vs_left_season_df['1B_Rate'] = batting_vs_left_season_df['1B'] / batting_vs_left_season_df['PA']

pitching_vs_left_year_df['BB_Rate'] = pitching_vs_left_year_df['BB'] / pitching_vs_left_year_df['TBF']
pitching_vs_right_year_df['BB_Rate'] = pitching_vs_right_year_df['BB'] / pitching_vs_right_year_df['TBF']
pitching_vs_left_season_df['BB_Rate'] = pitching_vs_left_season_df['BB'] / pitching_vs_left_season_df['TBF']
pitching_vs_right_season_df['BB_Rate'] = pitching_vs_right_season_df['BB'] / pitching_vs_right_season_df['TBF']

batting_vs_left_year_df['BB_Rate'] = batting_vs_left_year_df['BB'] / batting_vs_left_year_df['PA']
batting_vs_right_year_df['BB_Rate'] = batting_vs_right_year_df['BB'] / batting_vs_right_year_df['PA']
batting_vs_right_season_df['BB_Rate'] = batting_vs_right_season_df['BB'] / batting_vs_right_season_df['PA']
batting_vs_left_season_df['BB_Rate'] = batting_vs_left_season_df['BB'] / batting_vs_left_season_df['PA']

pitching_vs_left_year_df['2B_Rate'] = pitching_vs_left_year_df['2B'] / pitching_vs_left_year_df['TBF']
pitching_vs_right_year_df['2B_Rate'] = pitching_vs_right_year_df['2B'] / pitching_vs_right_year_df['TBF']
pitching_vs_left_season_df['2B_Rate'] = pitching_vs_left_season_df['2B'] / pitching_vs_left_season_df['TBF']
pitching_vs_right_season_df['2B_Rate'] = pitching_vs_right_season_df['2B'] / pitching_vs_right_season_df['TBF']

batting_vs_left_year_df['2B_Rate'] = batting_vs_left_year_df['2B'] / batting_vs_left_year_df['PA']
batting_vs_right_year_df['2B_Rate'] = batting_vs_right_year_df['2B'] / batting_vs_right_year_df['PA']
batting_vs_right_season_df['2B_Rate'] = batting_vs_right_season_df['2B'] / batting_vs_right_season_df['PA']
batting_vs_left_season_df['2B_Rate'] = batting_vs_left_season_df['2B'] / batting_vs_left_season_df['PA']

pitching_vs_left_year_df['3B_Rate'] = pitching_vs_left_year_df['3B'] / pitching_vs_left_year_df['TBF']
pitching_vs_right_year_df['3B_Rate'] = pitching_vs_right_year_df['3B'] / pitching_vs_right_year_df['TBF']
pitching_vs_left_season_df['3B_Rate'] = pitching_vs_left_season_df['3B'] / pitching_vs_left_season_df['TBF']
pitching_vs_right_season_df['3B_Rate'] = pitching_vs_right_season_df['3B'] / pitching_vs_right_season_df['TBF']

batting_vs_left_year_df['3B_Rate'] = batting_vs_left_year_df['3B'] / batting_vs_left_year_df['PA']
batting_vs_right_year_df['3B_Rate'] = batting_vs_right_year_df['3B'] / batting_vs_right_year_df['PA']
batting_vs_right_season_df['3B_Rate'] = batting_vs_right_season_df['3B'] / batting_vs_right_season_df['PA']
batting_vs_left_season_df['3B_Rate'] = batting_vs_left_season_df['3B'] / batting_vs_left_season_df['PA']

pitching_vs_left_year_df['HR_Rate'] = pitching_vs_left_year_df['HR'] / pitching_vs_left_year_df['TBF']
pitching_vs_right_year_df['HR_Rate'] = pitching_vs_right_year_df['HR'] / pitching_vs_right_year_df['TBF']
pitching_vs_left_season_df['HR_Rate'] = pitching_vs_left_season_df['HR'] / pitching_vs_left_season_df['TBF']
pitching_vs_right_season_df['HR_Rate'] = pitching_vs_right_season_df['HR'] / pitching_vs_right_season_df['TBF']

batting_vs_left_year_df['HR_Rate'] = batting_vs_left_year_df['HR'] / batting_vs_left_year_df['PA']
batting_vs_right_year_df['HR_Rate'] = batting_vs_right_year_df['HR'] / batting_vs_right_year_df['PA']
batting_vs_right_season_df['HR_Rate'] = batting_vs_right_season_df['HR'] / batting_vs_right_season_df['PA']
batting_vs_left_season_df['HR_Rate'] = batting_vs_left_season_df['HR'] / batting_vs_left_season_df['PA']

mlb_season_df['K_Rate'] = mlb_season_df['SO'] / mlb_season_df['TBF']

# Display example pitching DataFrame (pitching stats the first time through the order vs. left-handed batters during the 2024 season)

pitching_vs_left_season_df.head()

Unnamed: 0,Season,Name,Tm,G,TBF,ERA,H,2B,3B,R,...,SLG,wOBA,playerId,K_Rate,Opp_OBP,Opp_1B,BB_Rate,2B_Rate,3B_Rate,HR_Rate
0,Total,Adam Ottavino,NYM,31,55,5.25,11,2,1,8,...,0.456522,0.348188,1247,0.290909,0.309091,0.109091,0.109091,0.036364,0.018182,0.036364
1,Total,Matt Moore,LAA,32,57,5.926829,10,1,0,10,...,0.46,0.329402,1890,0.157895,0.298246,0.087719,0.122807,0.017544,0.0,0.070175
2,Total,Lance Lynn,STL,17,81,6.230769,17,5,0,15,...,0.528571,0.375912,2520,0.296296,0.333333,0.08642,0.123457,0.061728,0.0,0.061728
3,Total,Kenley Jansen,BOS,26,58,1.227273,12,2,0,2,...,0.269231,0.248079,3096,0.310345,0.293103,0.172414,0.086207,0.034483,0.0,0.0
4,Total,Max Scherzer,TEX,3,13,0.0,0,0,0,0,...,0.0,0.05318,3137,0.230769,0.076923,0.0,0.076923,0.0,0.0,0.0


In [4]:
# Display example batting DataFrame (batting stats vs. left-handed pitchers since the beginning of the 2023 season)

batting_vs_left_year_df.head()

Unnamed: 0,Season,Name,Tm,G,PA,AB,H,1B,2B,3B,...,CS,AVG,playerId,K_Rate,OBP,1B_Rate,BB_Rate,2B_Rate,3B_Rate,HR_Rate
0,Total,Miguel Cabrera,DET,47,96,83,22,17,5,0,...,0,0.26506,1744,0.239583,0.354167,0.177083,0.114583,0.052083,0.0,0.0
1,Total,David Peralta,2 Tms,36,44,41,13,11,2,0,...,0,0.317073,2136,0.25,0.363636,0.25,0.022727,0.045455,0.0,0.0
2,Total,Adam Wainwright,STL,1,1,1,0,0,0,0,...,0,0.0,2233,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Total,Carlos Santana,3 Tms,116,242,214,59,36,12,0,...,0,0.275701,2396,0.144628,0.35124,0.14876,0.103306,0.049587,0.0,0.045455
4,Total,Nelson Cruz,SDP,37,86,80,20,14,3,0,...,0,0.25,2434,0.325581,0.290698,0.162791,0.046512,0.034884,0.0,0.034884


In [5]:
# Merge pitching DataFrames together and format columns 

pitchers_combined_df = pd.merge(pitching_vs_left_year_df, pitching_vs_right_year_df, on='Name', how='outer')
pitchers_combined_df = pitchers_combined_df.rename(columns={'HR_Rate_x': 'Year_HR_Rate_LHH', 'HR_Rate_y': 'Year_HR_Rate_RHH','3B_Rate_x': 'Year_3B_Rate_LHH', '3B_Rate_y': 'Year_3B_Rate_RHH','2B_Rate_x': 'Year_2B_Rate_LHH', '2B_Rate_y': 'Year_2B_Rate_RHH','BB_Rate_x': 'Year_BB_Rate_LHH', 'BB_Rate_y': 'Year_BB_Rate_RHH', 'Opp_1B_x': 'Year_Opp_1B_LHH', 'Opp_1B_y': 'Year_Opp_1B_RHH', 'Opp_OBP_x': 'Year_Opp_OBP_LHH', 'Opp_OBP_y': 'Year_Opp_OBP_RHH', 'K_Rate_x' : 'Year_K%_LHH', 'K_Rate_y' : 'Year_K%_RHH'})
pitchers_combined_df = pd.merge(pitchers_combined_df, pitching_vs_left_season_df, on='Name', how='outer')
pitchers_combined_df = pitchers_combined_df.rename(columns={'HR_Rate': 'Season_HR_Rate_LHH','3B_Rate': 'Season_3B_Rate_LHH','2B_Rate': 'Season_2B_Rate_LHH','BB_Rate': 'Season_BB_Rate_LHH', 'Opp_1B': 'Season_Opp_1B_LHH', 'Opp_OBP': 'Season_Opp_OBP_LHH', 'K_Rate' : 'Season_K%_LHH'})
pitchers_combined_df = pitchers_combined_df.drop(columns=['playerId_x','Season_x', 'Season_y', 'Tm_x', 'Tm_y', 'G_x', 'G_y', 'TBF_x', 'TBF_y', 'ERA_x', 'ERA_y', 'H_x', 'H_y', '2B_x', '2B_y', '3B_x', '3B_y', 'R_x', 'R_y', 'ER_x', 'ER_y', 'HR_x', 'HR_y', 'BB_x', 'BB_y', 'IBB_x', 'IBB_y', 'HBP_x', 'HBP_y', 'SO_x', 'SO_y', 'AVG_x', 'AVG_y', 'OBP_x', 'OBP_y', 'SLG_x', 'SLG_y', 'wOBA_x', 'wOBA_y'])
pitchers_combined_df = pd.merge(pitchers_combined_df, pitching_vs_right_season_df, on='Name', how='outer')
pitchers_combined_df = pitchers_combined_df.rename(columns={'HR_Rate': 'Season_HR_Rate_RHH','3B_Rate': 'Season_3B_Rate_RHH','2B_Rate': 'Season_2B_Rate_RHH', 'Opp_1B': 'Season_Opp_1B_RHH', 'BB_Rate': 'Season_BB_Rate_RHH', 'Opp_1B': 'Season_Opp_1B_RHH', 'Opp_OBP': 'Season_Opp_OBP_RHH', 'K_Rate' : 'Season_K%_RHH'})
pitchers_combined_df = pitchers_combined_df.drop(columns=['playerId_y','playerId_x','Season_x', 'Season_y', 'Tm_x', 'Tm_y', 'G_x', 'G_y', 'TBF_x', 'TBF_y', 'ERA_x', 'ERA_y', 'H_x', 'H_y', '2B_x', '2B_y', '3B_x', '3B_y', 'R_x', 'R_y', 'ER_x', 'ER_y', 'HR_x', 'HR_y', 'BB_x', 'BB_y', 'IBB_x', 'IBB_y', 'HBP_x', 'HBP_y', 'SO_x', 'SO_y', 'AVG_x', 'AVG_y', 'OBP_x', 'OBP_y', 'SLG_x', 'SLG_y', 'wOBA_x', 'wOBA_y'])

In [6]:
# Merge batting DataFrames together and format columns

batters_combined_df = pd.merge(batting_vs_left_year_df, batting_vs_right_year_df, on='Name', how='outer')
batters_combined_df = batters_combined_df.rename(columns={'HR_Rate_x': 'Year_HR_Rate_LHP','HR_Rate_y': 'Year_HR_Rate_RHP','3B_Rate_x': 'Year_3B_Rate_LHP','3B_Rate_y': 'Year_3B_Rate_RHP','2B_Rate_x': 'Year_2B_Rate_LHP','2B_Rate_y': 'Year_2B_Rate_RHP', 'BB_Rate_x': 'Year_BB_Rate_LHP', 'BB_Rate_y': 'Year_BB_Rate_RHP', '1B_Rate_x': 'Year_1B_Rate_LHP', '1B_Rate_y': 'Year_1B_Rate_RHP', 'OBP_x': 'Year_OBP_LHP', 'OBP_y': 'Year_OBP_RHP', 'K_Rate_x' : 'Year_K%_LHP', 'K_Rate_y' : 'Year_K%_RHP'})
batters_combined_df = pd.merge(batters_combined_df, batting_vs_left_season_df, on='Name', how='outer')
batters_combined_df = batters_combined_df.rename(columns={'HR_Rate': 'Season_HR_Rate_LHP','3B_Rate': 'Season_3B_Rate_LHP','2B_Rate': 'Season_2B_Rate_LHP','BB_Rate': 'Season_BB_Rate_LHP', '1B_Rate': 'Season_1B_Rate_LHP', 'OBP': 'Season_OBP_LHP', 'K_Rate' : 'Season_K%_LHP'})
batters_combined_df = batters_combined_df.drop(columns=['playerId_x','Season_x', 'Season_y', 'Tm_x', 'Tm_y', 'G_x', 'G_y', 'PA_x', 'PA_y', 'AB_x', 'AB_y', 'H_x', 'H_y', '1B_x', '1B_y', '2B_x', '2B_y', '3B_x', '3B_y', 'R_x', 'R_y', 'RBI_x', 'RBI_y', 'HR_x', 'HR_y', 'BB_x', 'BB_y', 'IBB_x', 'IBB_y', 'HBP_x', 'HBP_y', 'SO_x', 'SO_y', 'AVG_x', 'AVG_y', 'SF_x', 'SF_y', 'SH_x', 'SH_y', 'GDP_x', 'GDP_y', 'SB_x', 'SB_y', 'CS_x', 'CS_y'])
batters_combined_df = pd.merge(batters_combined_df, batting_vs_right_season_df, on='Name', how='outer')
batters_combined_df = batters_combined_df.rename(columns={'HR_Rate': 'Season_HR_Rate_RHP','3B_Rate': 'Season_3B_Rate_RHP','2B_Rate': 'Season_2B_Rate_RHP','BB_Rate': 'Season_BB_Rate_RHP', '1B_Rate': 'Season_1B_Rate_RHP', 'OBP': 'Season_OBP_RHP', 'K_Rate' : 'Season_K%_RHP'})
batters_combined_df = batters_combined_df.drop(columns=['playerId_y', 'playerId_x','Season_x', 'Season_y', 'Tm_x', 'Tm_y', 'G_x', 'G_y', 'PA_x', 'PA_y', 'AB_x', 'AB_y', 'H_x', 'H_y', '1B_x', '1B_y', '2B_x', '2B_y', '3B_x', '3B_y', 'R_x', 'R_y', 'RBI_x', 'RBI_y', 'HR_x', 'HR_y', 'BB_x', 'BB_y', 'IBB_x', 'IBB_y', 'HBP_x', 'HBP_y', 'SO_x', 'SO_y', 'AVG_x', 'AVG_y', 'SF_x', 'SF_y', 'SH_x', 'SH_y', 'GDP_x', 'GDP_y', 'SB_x', 'SB_y', 'CS_x', 'CS_y'])

In [7]:
# Read in additional CSV files from FanGraphs to retrieve the following:
# - Handedness information for each pitcher/batter
# - Ground ball rates for each pitcher/batter (for double play probability calculation)

RHH_df = pd.read_csv('RHH_data.csv')
LHH_df = pd.read_csv('LHH_data.csv')
switch_df = pd.read_csv('switch_data.csv')
RHP_df = pd.read_csv('RHP_data.csv')
LHP_df = pd.read_csv('LHP_data.csv')
batting_gb_df = pd.read_csv('batting_gb_rates.csv')
pitching_gb_df = pd.read_csv('pitching_gb_rates.csv')

# Add ground ball rate columns to combined DataFrames

batters_combined_df = batters_combined_df.merge(batting_gb_df[['Name', 'GB%']], on='Name', how='outer')
pitchers_combined_df = pitchers_combined_df.merge(pitching_gb_df[['Name', 'GB%']], on='Name', how='outer')

# Display advanced hitting data DataFrame

batting_gb_df.head()

Unnamed: 0,Name,Team,BABIP,GB/FB,LD%,GB%,FB%,IFFB%,HR/FB,RS,...,Pitches,Pull%,Cent%,Oppo%,Soft%,Med%,Hard%,NameASCII,PlayerId,MLBAMID
0,Hagen Danner,TOR,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,...,7,0.0,0.0,1.0,0.0,0.0,1.0,Hagen Danner,22116,668470
1,Chris Vallimont,BAL,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0,...,11,1.0,0.0,0.0,0.0,0.0,1.0,Chris Vallimont,24935,681808
2,Miles Mastrobuoni,CHC,0.75,1.0,0.6,0.2,0.2,0.0,1.0,0,...,17,0.6,0.0,0.4,0.0,0.2,0.8,Miles Mastrobuoni,20017,670156
3,Riley Pint,COL,0.5,0.666667,0.0,0.4,0.6,0.0,0.333333,0,...,43,0.4,0.2,0.4,0.2,0.0,0.8,Riley Pint,21858,666207
4,Martín Maldonado,HOU,0.571429,0.25,0.375,0.125,0.5,0.0,0.25,0,...,30,0.5,0.375,0.125,0.0,0.25,0.75,Martin Maldonado,6887,455117


In [8]:
# Initialize Handedness columns in pitching and batting DataFrames

pitchers_combined_df['Handedness'] = np.nan
batters_combined_df['Handedness'] = np.nan

# Define a helper function to update handedness

def update_handedness(RHH_df, batters_combined_df, handedness):
    for name in RHH_df['Name']:
        batters_combined_df.loc[batters_combined_df['Name'] == name, 'Handedness'] = handedness
def update_handedness(LHH_df, batters_combined_df, handedness):
    for name in LHH_df['Name']:
        batters_combined_df.loc[batters_combined_df['Name'] == name, 'Handedness'] = handedness
def update_handedness(switch_df, batters_combined_df, handedness):
    for name in switch_df['Name']:
        batters_combined_df.loc[batters_combined_df['Name'] == name, 'Handedness'] = handedness
def update_handedness(RHP_df, pitchers_combined_df, handedness):
    for name in RHP_df['Name']:
        pitchers_combined_df.loc[pitchers_combined_df['Name'] == name, 'Handedness'] = handedness
def update_handedness(LHP_df, pitchers_combined_df, handedness):
    for name in LHP_df['Name']:
        pitchers_combined_df.loc[pitchers_combined_df['Name'] == name, 'Handedness'] = handedness

# Update handedness for pitchers

update_handedness(RHP_df, pitchers_combined_df, 'R')
update_handedness(LHP_df, pitchers_combined_df, 'L')

# Update handedness for batters

update_handedness(RHH_df, batters_combined_df, 'R')
update_handedness(LHH_df, batters_combined_df, 'L')
update_handedness(switch_df, batters_combined_df, 'S')

# Display pitching DataFrame

pitchers_combined_df.head()

Unnamed: 0,Name,Year_K%_LHH,Year_Opp_OBP_LHH,Year_Opp_1B_LHH,Year_BB_Rate_LHH,Year_2B_Rate_LHH,Year_3B_Rate_LHH,Year_HR_Rate_LHH,Year_K%_RHH,Year_Opp_OBP_RHH,...,Season_HR_Rate_LHH,Season_K%_RHH,Season_Opp_OBP_RHH,Season_Opp_1B_RHH,Season_BB_Rate_RHH,Season_2B_Rate_RHH,Season_3B_Rate_RHH,Season_HR_Rate_RHH,GB%,Handedness
0,Tommy Hunter,0.236842,0.315789,0.078947,0.078947,0.0,0.026316,0.131579,0.195652,0.326087,...,,,,,,,,,0.468354,R
1,Matt Bush,0.24,0.4,0.08,0.16,0.04,0.0,0.12,0.173913,0.304348,...,,,,,,,,,0.28125,R
2,Adam Ottavino,0.265823,0.322785,0.101266,0.132911,0.037975,0.006329,0.044304,0.260163,0.256098,...,0.036364,0.317073,0.231707,0.121951,0.073171,0.02439,0.0,0.012195,0.514286,R
3,Matt Moore,0.176471,0.310924,0.117647,0.10084,0.016807,0.008403,0.067227,0.259109,0.279352,...,0.070175,0.175824,0.307692,0.131868,0.10989,0.021978,0.0,0.043956,0.338776,L
4,Zack Greinke,0.136752,0.350427,0.222222,0.059829,0.042735,0.0,0.025641,0.229167,0.256944,...,,,,,,,,,0.43133,R


In [9]:
# Display batting DataFrame

batters_combined_df.head()

Unnamed: 0,Name,Year_K%_LHP,Year_OBP_LHP,Year_1B_Rate_LHP,Year_BB_Rate_LHP,Year_2B_Rate_LHP,Year_3B_Rate_LHP,Year_HR_Rate_LHP,Year_K%_RHP,Year_OBP_RHP,...,Season_HR_Rate_LHP,Season_K%_RHP,Season_OBP_RHP,Season_1B_Rate_RHP,Season_BB_Rate_RHP,Season_2B_Rate_RHP,Season_3B_Rate_RHP,Season_HR_Rate_RHP,GB%,Handedness
0,Miguel Cabrera,0.239583,0.354167,0.177083,0.114583,0.052083,0.0,0.0,0.188889,0.311111,...,,,,,,,,,,R
1,David Peralta,0.25,0.363636,0.25,0.022727,0.045455,0.0,0.0,0.170732,0.286031,...,0.0,0.194444,0.277778,0.152778,0.083333,0.027778,0.0,0.013889,,L
2,Adam Wainwright,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,,,,,,,,,0.401042,R
3,Carlos Santana,0.144628,0.35124,0.14876,0.103306,0.049587,0.0,0.045455,0.172166,0.311334,...,0.065789,0.15678,0.317797,0.127119,0.105932,0.04661,0.0,0.029661,,S
4,Nelson Cruz,0.325581,0.290698,0.162791,0.046512,0.034884,0.0,0.034884,0.274194,0.290323,...,,,,,,,,,,R


In [10]:
# Input starting pitcher and opposing team's lineup

pitcher_name = input("Enter the name of the starting pitcher: ")
batter_1 = input("Enter the name of the first batter: ")
batter_2 = input("Enter the name of the second batter: ")
batter_3 = input("Enter the name of the third batter: ")
batter_4 = input("Enter the name of the fourth batter: ")
batter_5 = input("Enter the name of the fifth batter: ")
batter_6 = input("Enter the name of the sixth batter: ")
batter_7 = input("Enter the name of the seventh batter: ")
batter_8 = input("Enter the name of the eighth batter: ")
batter_9 = input("Enter the name of the ninth batter: ")

Enter the name of the starting pitcher: Allan Winans
Enter the name of the first batter: Jonathan India
Enter the name of the second batter: Elly De La Cruz
Enter the name of the third batter: Jeimer Candelario
Enter the name of the fourth batter: Spencer Steer
Enter the name of the fifth batter: Tyler Stephenson
Enter the name of the sixth batter: Jake Fraley
Enter the name of the seventh batter: Noelvi Marte
Enter the name of the eighth batter: Rece Hinds
Enter the name of the ninth batter: Will Benson


In [11]:
# Store inputted batter names as strings

batter_names = [str(batter_1), str(batter_2), str(batter_3), str(batter_4), str(batter_5), str(batter_6), str(batter_7), str(batter_8), str(batter_9)]

In [12]:
# Pull relevant names for combined pitching/batting DataFrame

pitcher_data = pitchers_combined_df[pitchers_combined_df['Name'] == pitcher_name]
batter_data = batters_combined_df[batters_combined_df['Name'].isin(batter_names)]

# Order batter data by lineup

batter_data['batting_order'] = pd.Categorical(batter_data['Name'], categories=batter_names, ordered=True)
batter_data = batter_data.sort_values('batting_order').drop(columns='batting_order')

# Concatenate data vertically to stack rows and create combined DataFrame of starting pitcher and opposing team's lineup

model_data = pd.concat([pitcher_data, batter_data], axis=0, ignore_index=True)
model_data
model_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  batter_data['batting_order'] = pd.Categorical(batter_data['Name'], categories=batter_names, ordered=True)


Unnamed: 0,Name,Year_K%_LHH,Year_Opp_OBP_LHH,Year_Opp_1B_LHH,Year_BB_Rate_LHH,Year_2B_Rate_LHH,Year_3B_Rate_LHH,Year_HR_Rate_LHH,Year_K%_RHH,Year_Opp_OBP_RHH,...,Season_2B_Rate_LHP,Season_3B_Rate_LHP,Season_HR_Rate_LHP,Season_K%_RHP,Season_OBP_RHP,Season_1B_Rate_RHP,Season_BB_Rate_RHP,Season_2B_Rate_RHP,Season_3B_Rate_RHP,Season_HR_Rate_RHP
0,Allan Winans,0.225,0.35,0.15,0.1,0.05,0.0,0.05,0.217391,0.26087,...,,,,,,,,,,
1,Jonathan India,,,,,,,,,,...,0.047619,0.009524,0.009524,0.216216,0.36036,0.13964,0.117117,0.058559,0.0,0.022523
2,Elly De La Cruz,,,,,,,,,,...,0.038462,0.0,0.030769,0.320988,0.333333,0.111111,0.106996,0.045267,0.020576,0.045267
3,Jeimer Candelario,,,,,,,,,,...,0.067961,0.0,0.0,0.247573,0.271845,0.106796,0.038835,0.048544,0.009709,0.063107
4,Spencer Steer,,,,,,,,,,...,0.1,0.0,0.03,0.219608,0.317647,0.12549,0.098039,0.039216,0.003922,0.039216


In [13]:
# Input home plate umpire's career K rate

#umpire_K_rate = float(input("Enter the home plate umpire's average career K rate: "))

# Display league-wide K rate in 2024 season

#mlb_K_rate = mlb_season_df.loc[mlb_season_df['League'] == 'MLB', 'K_Rate'].values[0]
#mlb_K_rate

In [14]:
# Subtract umpire's career K rate from league-wide K rate to calculate umpire factor

#umpire_data = mlb_K_rate - umpire_K_rate

In [18]:
def simulate_inning(model_data, n_simulations=10000):
    no_strikeout_counts = 0
    strikeout_counts = 0
    no_hit_counts = 0
    one_hit_counts = 0
    two_hit_counts = 0
    three_hit_counts = 0
    four_plus_hit_counts = 0
    
    # Define average values for each statistic
    average_k_rate = 0.227  # Example average K rate
    average_BB_rate = 0.086  # Example average BB rate
    average_1B_rate = 0.141  # Example average 1B rate
    average_2B_rate = 0.045  # Example average 2B rate
    average_3B_rate = 0.004  # Example average 3B rate
    average_HR_rate = 0.032  # Example average HR rate
    average_GB_rate = 0.425  # Example average GB rate
    
    for _ in range(n_simulations):
        
        outs = 0
        strikeouts = 0
        hits = 0
        
        runner_on_first = False
        runner_on_second = False
        runner_on_third = False
        batter_idx = 1  # Start with the second row (first batter)

        while outs < 3:
            pitcher_row = model_data.iloc[0]
            pitcher_handedness = pitcher_row['Handedness']
            pitcher_GB_rate = pitcher_row['GB%']
            batter_row = model_data.iloc[batter_idx]
            batter_handedness = batter_row['Handedness']
            batter_GB_rate = batter_row['GB%']

            # Determine column names based on handedness
            if batter_handedness == 'L':
                if pitcher_handedness == 'R':
                    batter_stats = {
                        'Year_K_rate': 'Year_K%_RHP',
                        'Year_OBP': 'Year_OBP_RHP',
                        'Year_BB_rate': 'Year_BB_Rate_RHP',
                        'Year_1B_rate': 'Year_1B_Rate_RHP',
                        'Year_2B_rate': 'Year_2B_Rate_RHP',
                        'Year_3B_rate': 'Year_3B_Rate_RHP',
                        'Year_HR_rate': 'Year_HR_Rate_RHP',
                        'Season_K_rate': 'Season_K%_RHP',
                        'Season_OBP': 'Season_OBP_RHP',
                        'Season_BB_rate': 'Season_BB_Rate_RHP',
                        'Season_1B_rate': 'Season_1B_Rate_RHP',
                        'Season_2B_rate': 'Season_2B_Rate_RHP',
                        'Season_3B_rate': 'Season_3B_Rate_RHP',
                        'Season_HR_rate': 'Season_HR_Rate_RHP'
                    }
                    pitcher_stats = {
                        'Year_K_rate': 'Year_K%_LHH',
                        'Year_OBP': 'Year_Opp_OBP_LHH',
                        'Year_BB_rate': 'Year_BB_Rate_LHH',
                        'Year_1B_rate': 'Year_Opp_1B_LHH',
                        'Year_2B_rate': 'Year_2B_Rate_LHH',
                        'Year_3B_rate': 'Year_3B_Rate_LHH',
                        'Year_HR_rate': 'Year_HR_Rate_LHH',
                        'Season_K_rate': 'Season_K%_LHH',
                        'Season_OBP': 'Season_Opp_OBP_LHH',
                        'Season_BB_rate': 'Season_BB_Rate_LHH',
                        'Season_1B_rate': 'Season_Opp_1B_LHH',
                        'Season_2B_rate': 'Season_2B_Rate_LHH',
                        'Season_3B_rate': 'Season_3B_Rate_LHH',
                        'Season_HR_rate': 'Season_HR_Rate_LHH'
                    }
                else:
                    batter_stats = {
                        'Year_K_rate': 'Year_K%_LHP',
                        'Year_OBP': 'Year_OBP_LHP',
                        'Year_BB_rate': 'Year_BB_Rate_LHP',
                        'Year_1B_rate': 'Year_1B_Rate_LHP',
                        'Year_2B_rate': 'Year_2B_Rate_LHP',
                        'Year_3B_rate': 'Year_3B_Rate_LHP',
                        'Year_HR_rate': 'Year_HR_Rate_LHP',
                        'Season_K_rate': 'Season_K%_LHP',
                        'Season_OBP': 'Season_OBP_LHP',
                        'Season_BB_rate': 'Season_BB_Rate_LHP',
                        'Season_1B_rate': 'Season_1B_Rate_LHP',
                        'Season_2B_rate': 'Season_2B_Rate_LHP',
                        'Season_3B_rate': 'Season_3B_Rate_LHP',
                        'Season_HR_rate': 'Season_HR_Rate_LHP'
                    }
                    pitcher_stats = {
                       'Year_K_rate': 'Year_K%_LHH',
                        'Year_OBP': 'Year_Opp_OBP_LHH',
                        'Year_BB_rate': 'Year_BB_Rate_LHH',
                        'Year_1B_rate': 'Year_Opp_1B_LHH',
                        'Year_2B_rate': 'Year_2B_Rate_LHH',
                        'Year_3B_rate': 'Year_3B_Rate_LHH',
                        'Year_HR_rate': 'Year_HR_Rate_LHH',
                        'Season_K_rate': 'Season_K%_LHH',
                        'Season_OBP': 'Season_Opp_OBP_LHH',
                        'Season_BB_rate': 'Season_BB_Rate_LHH',
                        'Season_1B_rate': 'Season_Opp_1B_LHH',
                        'Season_2B_rate': 'Season_2B_Rate_LHH',
                        'Season_3B_rate': 'Season_3B_Rate_LHH',
                        'Season_HR_rate': 'Season_HR_Rate_LHH'
                    }
            elif batter_handedness == 'R':
                if pitcher_handedness == 'R':
                    batter_stats = {
                        'Year_K_rate': 'Year_K%_RHP',
                        'Year_OBP': 'Year_OBP_RHP',
                        'Year_BB_rate': 'Year_BB_Rate_RHP',
                        'Year_1B_rate': 'Year_1B_Rate_RHP',
                        'Year_2B_rate': 'Year_2B_Rate_RHP',
                        'Year_3B_rate': 'Year_3B_Rate_RHP',
                        'Year_HR_rate': 'Year_HR_Rate_RHP',
                        'Season_K_rate': 'Season_K%_RHP',
                        'Season_OBP': 'Season_OBP_RHP',
                        'Season_BB_rate': 'Season_BB_Rate_RHP',
                        'Season_1B_rate': 'Season_1B_Rate_RHP',
                        'Season_2B_rate': 'Season_2B_Rate_RHP',
                        'Season_3B_rate': 'Season_3B_Rate_RHP',
                        'Season_HR_rate': 'Season_HR_Rate_RHP'
                    }
                    pitcher_stats = {
                        'Year_K_rate': 'Year_K%_RHH',
                        'Year_OBP': 'Year_Opp_OBP_RHH',
                        'Year_BB_rate': 'Year_BB_Rate_RHH',
                        'Year_1B_rate': 'Year_Opp_1B_RHH',
                        'Year_2B_rate': 'Year_2B_Rate_RHH',
                        'Year_3B_rate': 'Year_3B_Rate_RHH',
                        'Year_HR_rate': 'Year_HR_Rate_RHH',
                        'Season_K_rate': 'Season_K%_RHH',
                        'Season_OBP': 'Season_Opp_OBP_RHH',
                        'Season_BB_rate': 'Season_BB_Rate_RHH',
                        'Season_1B_rate': 'Season_Opp_1B_RHH',
                        'Season_2B_rate': 'Season_2B_Rate_RHH',
                        'Season_3B_rate': 'Season_3B_Rate_RHH',
                        'Season_HR_rate': 'Season_HR_Rate_RHH'
                    }
                else:
                    batter_stats = {
                        'Year_K_rate': 'Year_K%_LHP',
                        'Year_OBP': 'Year_OBP_LHP',
                        'Year_BB_rate': 'Year_BB_Rate_LHP',
                        'Year_1B_rate': 'Year_1B_Rate_LHP',
                        'Year_2B_rate': 'Year_2B_Rate_LHP',
                        'Year_3B_rate': 'Year_3B_Rate_LHP',
                        'Year_HR_rate': 'Year_HR_Rate_LHP',
                        'Season_K_rate': 'Season_K%_LHP',
                        'Season_OBP': 'Season_OBP_LHP',
                        'Season_BB_rate': 'Season_BB_Rate_LHP',
                        'Season_1B_rate': 'Season_1B_Rate_LHP',
                        'Season_2B_rate': 'Season_2B_Rate_LHP',
                        'Season_3B_rate': 'Season_3B_Rate_LHP',
                        'Season_HR_rate': 'Season_HR_Rate_LHP'
                    }
                    pitcher_stats = {
                        'Year_K_rate': 'Year_K%_RHH',
                        'Year_OBP': 'Year_Opp_OBP_RHH',
                        'Year_BB_rate': 'Year_BB_Rate_RHH',
                        'Year_1B_rate': 'Year_Opp_1B_RHH',
                        'Year_2B_rate': 'Year_2B_Rate_RHH',
                        'Year_3B_rate': 'Year_3B_Rate_RHH',
                        'Year_HR_rate': 'Year_HR_Rate_RHH',
                        'Season_K_rate': 'Season_K%_RHH',
                        'Season_OBP': 'Season_Opp_OBP_RHH',
                        'Season_BB_rate': 'Season_BB_Rate_RHH',
                        'Season_1B_rate': 'Season_Opp_1B_RHH',
                        'Season_2B_rate': 'Season_2B_Rate_RHH',
                        'Season_3B_rate': 'Season_3B_Rate_RHH',
                        'Season_HR_rate': 'Season_HR_Rate_RHH'
                    }
            else:
                if pitcher_handedness == 'R':
                    batter_stats = {
                        'Year_K_rate': 'Year_K%_RHP',
                        'Year_OBP': 'Year_OBP_RHP',
                        'Year_BB_rate': 'Year_BB_Rate_RHP',
                        'Year_1B_rate': 'Year_1B_Rate_RHP',
                        'Year_2B_rate': 'Year_2B_Rate_RHP',
                        'Year_3B_rate': 'Year_3B_Rate_RHP',
                        'Year_HR_rate': 'Year_HR_Rate_RHP',
                        'Season_K_rate': 'Season_K%_RHP',
                        'Season_OBP': 'Season_OBP_RHP',
                        'Season_BB_rate': 'Season_BB_Rate_RHP',
                        'Season_1B_rate': 'Season_1B_Rate_RHP',
                        'Season_2B_rate': 'Season_2B_Rate_RHP',
                        'Season_3B_rate': 'Season_3B_Rate_RHP',
                        'Season_HR_rate': 'Season_HR_Rate_RHP'
                    }
                    pitcher_stats = {
                        'Year_K_rate': 'Year_K%_LHH',
                        'Year_OBP': 'Year_Opp_OBP_LHH',
                        'Year_BB_rate': 'Year_BB_Rate_LHH',
                        'Year_1B_rate': 'Year_Opp_1B_LHH',
                        'Year_2B_rate': 'Year_2B_Rate_LHH',
                        'Year_3B_rate': 'Year_3B_Rate_LHH',
                        'Year_HR_rate': 'Year_HR_Rate_LHH',
                        'Season_K_rate': 'Season_K%_LHH',
                        'Season_OBP': 'Season_Opp_OBP_LHH',
                        'Season_BB_rate': 'Season_BB_Rate_LHH',
                        'Season_1B_rate': 'Season_Opp_1B_LHH',
                        'Season_2B_rate': 'Season_2B_Rate_LHH',
                        'Season_3B_rate': 'Season_3B_Rate_LHH',
                        'Season_HR_rate': 'Season_HR_Rate_LHH'
                    }
                else:
                    batter_stats = {
                        'Year_K_rate': 'Year_K%_LHP',
                        'Year_OBP': 'Year_OBP_LHP',
                        'Year_BB_rate': 'Year_BB_Rate_LHP',
                        'Year_1B_rate': 'Year_1B_Rate_LHP',
                        'Year_2B_rate': 'Year_2B_Rate_LHP',
                        'Year_3B_rate': 'Year_3B_Rate_LHP',
                        'Year_HR_rate': 'Year_HR_Rate_LHP',
                        'Season_K_rate': 'Season_K%_LHP',
                        'Season_OBP': 'Season_OBP_LHP',
                        'Season_BB_rate': 'Season_BB_Rate_LHP',
                        'Season_1B_rate': 'Season_1B_Rate_LHP',
                        'Season_2B_rate': 'Season_2B_Rate_LHP',
                        'Season_3B_rate': 'Season_3B_Rate_LHP',
                        'Season_HR_rate': 'Season_HR_Rate_LHP'
                    }
                    pitcher_stats = {
                        'Year_K_rate': 'Year_K%_RHH',
                        'Year_OBP': 'Year_Opp_OBP_RHH',
                        'Year_BB_rate': 'Year_BB_Rate_RHH',
                        'Year_1B_rate': 'Year_Opp_1B_RHH',
                        'Year_2B_rate': 'Year_2B_Rate_RHH',
                        'Year_3B_rate': 'Year_3B_Rate_RHH',
                        'Year_HR_rate': 'Year_HR_Rate_RHH',
                        'Season_K_rate': 'Season_K%_RHH',
                        'Season_OBP': 'Season_Opp_OBP_RHH',
                        'Season_BB_rate': 'Season_BB_Rate_RHH',
                        'Season_1B_rate': 'Season_Opp_1B_RHH',
                        'Season_2B_rate': 'Season_2B_Rate_RHH',
                        'Season_3B_rate': 'Season_3B_Rate_RHH',
                        'Season_HR_rate': 'Season_HR_Rate_RHH'
                    }
            
            # Define a function to calculate combined stats with weights
            def get_weighted_stat(year_stat, season_stat, year_weight=0.4, season_weight=0.6):
                return (year_stat * year_weight) + (season_stat * season_weight)
            
            # Define a function to combine batter and pitcher stats with equal weights
            def get_combined_stat(batter_stat, pitcher_stat, average_stat, batter_weight=0.5, pitcher_weight=0.5):
                if np.isnan(batter_stat):
                    batter_stat = average_stat  # Or handle appropriately
                if np.isnan(pitcher_stat):
                    pitcher_stat = average_stat
                return (batter_stat * batter_weight) + (pitcher_stat * pitcher_weight)
            
            combined_batter_k_rate = get_weighted_stat(batter_row[batter_stats['Year_K_rate']], batter_row[batter_stats['Season_K_rate']])
            combined_pitcher_k_rate = get_weighted_stat(pitcher_row[pitcher_stats['Year_K_rate']], pitcher_row[pitcher_stats['Season_K_rate']])
            combined_batter_BB_rate = get_weighted_stat(batter_row[batter_stats['Year_BB_rate']], batter_row[batter_stats['Season_BB_rate']])
            combined_pitcher_BB_rate = get_weighted_stat(pitcher_row[pitcher_stats['Year_BB_rate']], pitcher_row[pitcher_stats['Season_BB_rate']])
            combined_batter_1B_rate = get_weighted_stat(batter_row[batter_stats['Year_1B_rate']], batter_row[batter_stats['Season_1B_rate']])
            combined_pitcher_1B_rate = get_weighted_stat(pitcher_row[pitcher_stats['Year_1B_rate']], pitcher_row[pitcher_stats['Season_1B_rate']])
            combined_batter_2B_rate = get_weighted_stat(batter_row[batter_stats['Year_2B_rate']], batter_row[batter_stats['Season_2B_rate']])
            combined_pitcher_2B_rate = get_weighted_stat(pitcher_row[pitcher_stats['Year_2B_rate']], pitcher_row[pitcher_stats['Season_2B_rate']])
            combined_batter_3B_rate = get_weighted_stat(batter_row[batter_stats['Year_3B_rate']], batter_row[batter_stats['Season_3B_rate']])
            combined_pitcher_3B_rate = get_weighted_stat(pitcher_row[pitcher_stats['Year_3B_rate']], pitcher_row[pitcher_stats['Season_3B_rate']])
            combined_batter_HR_rate = get_weighted_stat(batter_row[batter_stats['Year_HR_rate']], batter_row[batter_stats['Season_HR_rate']])
            combined_pitcher_HR_rate = get_weighted_stat(pitcher_row[pitcher_stats['Year_HR_rate']], pitcher_row[pitcher_stats['Season_HR_rate']])
        
            combined_k_rate = get_combined_stat(combined_batter_k_rate, combined_pitcher_k_rate, average_k_rate)
            combined_BB_rate = get_combined_stat(combined_batter_BB_rate, combined_pitcher_BB_rate, average_BB_rate)
            combined_1B_rate = get_combined_stat(combined_batter_1B_rate, combined_pitcher_1B_rate, average_1B_rate)
            combined_2B_rate = get_combined_stat(combined_batter_2B_rate, combined_pitcher_2B_rate, average_2B_rate)
            combined_3B_rate = get_combined_stat(combined_batter_3B_rate, combined_pitcher_3B_rate, average_3B_rate)
            combined_HR_rate = get_combined_stat(combined_batter_HR_rate, combined_pitcher_HR_rate, average_HR_rate)
            combined_in_play_rate = 1 - (combined_k_rate + combined_BB_rate)
            
            total_rate = (combined_k_rate + combined_BB_rate + combined_in_play_rate)
            
            combined_k_rate /= total_rate
            combined_BB_rate /= total_rate
            combined_1B_rate /= total_rate
            combined_2B_rate /= total_rate
            combined_3B_rate /= total_rate
            combined_HR_rate /= total_rate
            combined_in_play_rate /= total_rate
            
            # Generate a random number to determine the outcome
            outcome = np.random.rand()
            
            if outcome < combined_k_rate:
                strikeouts += 1
                outs += 1
                
            elif outcome < combined_k_rate + combined_BB_rate:
                if runner_on_first and runner_on_second and runner_on_third:
                    runner_on_first = True
                    runner_on_second = True
                    runner_on_third = True
                elif runner_on_first and runner_on_second:
                    runner_on_first = True
                    runner_on_second = True
                    runner_on_third = True
                elif runner_on_first:
                    runner_on_first = True
                    runner_on_second = True
                    runner_on_third = False
                else:
                    runner_on_first = True
                    runner_on_second = False
                    runner_on_third = False
            
            else:
                # Calculate the range of probabilities for hits
                in_play_outcome = (outcome - combined_k_rate - combined_BB_rate) / combined_in_play_rate

                if in_play_outcome < combined_1B_rate / combined_in_play_rate:
                    hits += 1
                    if runner_on_first and runner_on_second and runner_on_third:
                        runner_on_first = True
                        runner_on_second = True
                        runner_on_third = True
                    elif runner_on_first and runner_on_second:
                        runner_on_first = True
                        runner_on_second = True
                        runner_on_third = True
                    elif runner_on_first:
                        runner_on_first = True
                        runner_on_second = True
                        runner_on_third = False
                    else:
                        runner_on_first = True
                        runner_on_second = False
                        runner_on_third = False
                elif in_play_outcome < (combined_1B_rate + combined_2B_rate) / combined_in_play_rate:
                    hits += 1
                    if runner_on_first and runner_on_second and runner_on_third:
                        runner_on_first = False
                        runner_on_second = True
                        runner_on_third = True
                    elif runner_on_first and runner_on_second:
                        runner_on_first = False
                        runner_on_second = True
                        runner_on_third = True
                    elif runner_on_first:
                        runner_on_first = False
                        runner_on_second = True
                        runner_on_third = True
                    else:
                        runner_on_first = False
                        runner_on_second = True
                        runner_on_third = False
                elif in_play_outcome < (combined_1B_rate + combined_2B_rate + combined_3B_rate) / combined_in_play_rate:
                    hits += 1
                    runner_on_first = False
                    runner_on_second = False
                    runner_on_third = True
                elif in_play_outcome < (combined_1B_rate + combined_2B_rate + combined_3B_rate + combined_HR_rate) / combined_in_play_rate:
                    hits += 1
                    runner_on_first = False
                    runner_on_second = False
                    runner_on_third = False
                else:
                    if runner_on_first and outs < 2:
                        if np.random.rand() < 0.064:
                            outs += 2
                            runner_on_first = False
                            runner_on_second = False
                            runner_on_third = False
                        else:
                            outs += 1
                            if runner_on_first and runner_on_second and runner_on_third:
                                runner_on_first = True
                                runner_on_second = True
                                runner_on_third = True
                            elif runner_on_first and runner_on_second:
                                runner_on_first = True
                                runner_on_second = True
                                runner_on_third = False
                            elif runner_on_first:
                                runner_on_first = True
                                runner_on_second = False
                                runner_on_third = False
                            else:
                                runner_on_first = False
                                runner_on_second = False
                                runner_on_third = False
                    else:
                        outs += 1
                    
            batter_idx = (batter_idx + 1) % 9
        
        # Add 1 to "no strikeout" count if applicable
        if strikeouts == 0:
            no_strikeout_counts += 1
        else:
            strikeout_counts += 1
        if hits == 0:
            no_hit_counts += 1
        elif hits == 1:
            one_hit_counts += 1
        elif hits == 2:
            two_hit_counts += 1
        elif hits == 3:
            three_hit_counts += 1
        else:
            four_plus_hit_counts +=1
    
    # Return rate of "no strikeout" occurrences in 10,000 sims
    probability_no_strikeouts = no_strikeout_counts / n_simulations
    probability_no_hits = no_hit_counts / n_simulations
    probability_one_hit = one_hit_counts / n_simulations
    probability_two_hits = two_hit_counts / n_simulations
    probability_three_hits = three_hit_counts / n_simulations
    
    return probability_no_strikeouts, probability_no_hits, probability_one_hit, probability_two_hits, probability_three_hits

# Run the simulation and display estimated probability of no strikeouts occurring in decimal form

probability_no_strikeouts, probability_no_hits, probability_one_hit, probability_two_hits, probability_three_hits = simulate_inning(model_data)
print(f"Estimated Probability of Zero Strikeouts: {probability_no_strikeouts:.4f}")
print(f"Estimated Probability of Zero Hits: {probability_no_hits:.4f}")
print(f"Estimated Probability of One Hit: {probability_one_hit:.4f}")
print(f"Estimated Probability of Two Hits: {probability_two_hits:.4f}")
print(f"Estimated Probability of Three Hits: {probability_three_hits:.4f}")

Estimated Probability of Zero Strikeouts: 0.3286
Estimated Probability of Zero Hits: 0.4279
Estimated Probability of One Hit: 0.3194
Estimated Probability of Two Hits: 0.1511
Estimated Probability of Three Hits: 0.0655
