# B02. Simulations
- Simulate matchups

In [2]:
%run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"


baseball_path = r'C:\Users\james\Documents\MLB\Database'

### Scale Inputs

In [2]:
def clean_matchups(away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df, batter_stats_scaler, batter_stats_fg_scaler, pitcher_stats_scaler, pitcher_stats_fg_scaler, projected_lineups, event_share=False):       
    # Confirmed starters
    away_batter_df['confirmed'] = away_batter_df['batting_order'].notnull()
    home_batter_df['confirmed'] = home_batter_df['batting_order'].notnull()
    away_pitcher_df['confirmed'] = (away_pitcher_df['Leverage'] == 1).astype(int)
    home_pitcher_df['confirmed'] = (home_pitcher_df['Leverage'] == 1).astype(int)  
    
    ### Pitchers
    # Assign starter if missing
    if away_pitcher_df.loc[0, 'Leverage'] != 1:
        print("No away starter")
        away_pitcher_df.loc[0, 'Leverage'] = 1
    if home_pitcher_df.loc[0, 'Leverage'] != 1:
        print("No home starter")
        home_pitcher_df.loc[0, 'Leverage'] = 1
    
    # Assign IP_start if missing
    away_pitcher_df['IP_start'] = np.where(away_pitcher_df['IP_start'] == 0, 5, away_pitcher_df['IP_start'])
    home_pitcher_df['IP_start'] = np.where(home_pitcher_df['IP_start'] == 0, 5, home_pitcher_df['IP_start'])

    
    ### Batters 
    # Drop duplicated batting order values
    for df in [away_batter_df, home_batter_df]:
        duplicated_values = df[df.duplicated(subset='batting_order', keep=False)]['batting_order'].drop_duplicates().tolist()
        df['batting_order'] = np.where(df.duplicated(subset='batting_order', keep=False), np.nan, df['batting_order'])

        # print(f"Duplicated values for batting_order in: {duplicated_values}")
   
    # Fill in missing batting orders if all are missing
    # Note: this doesn't currently account for double headers
    # Function to lookup and update batting_order
    def update_batting_order(row):
        if row['id'] in projected_lineups_dict:
            return int(projected_lineups_dict[row['id']])
        else:
            return None
    
    # Away batters
    if away_batter_df['batting_order'].sum() == 0 and projected_lineups is not None:        
        print("Away batters use Baseball Monster projected lineups")
        # Create dictionary
        projected_lineups_dict = projected_lineups[[' mlb id', ' batting order']].set_index(' mlb id')[' batting order'].to_dict()
        # Update batting_order column by looking up ID in dictionary
        away_batter_df['batting_order'] = away_batter_df.apply(update_batting_order, axis=1)

    # Home batters
    if home_batter_df['batting_order'].sum() == 0 and projected_lineups is not None:
        print("Home batters use Baseball Monster projected lineups")
        # Create dictionary
        projected_lineups_dict = projected_lineups[[' mlb id', ' batting order']].set_index(' mlb id')[' batting order'].to_dict()
        # Update batting_order column by looking up ID in dictionary
        home_batter_df['batting_order'] = home_batter_df.apply(update_batting_order, axis=1)
    
    # Ensure every batting order spot exists
    for df in [away_batter_df, home_batter_df]:
        # Check for missing values in batting_order
        missing_values = set(range(1, 10)) - set(df['batting_order'].dropna().unique())

        if missing_values:
            if id(df) == id(away_batter_df):
                side = "away"
            else:
                side = "home"
            print(f"Spots in {side} batting order are imputed.")
            # Sort the dataframe by pa_r
            df.sort_values(by='pa_b_r', ascending=False, inplace=True)

            # Use iterrows to iterate over rows and assign missing values
            for index, row in df.iterrows():
                if pd.isna(row['batting_order']):
                    missing_value = missing_values.pop()
                    df.at[index, 'batting_order'] = missing_value
                    # print(f"Assigned {missing_value} to missing value in batting_order.")
                    if not missing_values:
                        break

        else:
            # print(f"All values from 1 to 9 are already represented in batting_order.")
            pass

    # Drop observations for players not in game/in depth chart
    away_pitcher_df.dropna(subset=['Leverage'], inplace=True)
    home_pitcher_df.dropna(subset=['Leverage'], inplace=True)
    away_batter_df.dropna(subset=['batting_order'], inplace=True)
    home_batter_df.dropna(subset=['batting_order'], inplace=True)
    
    
    if event_share == True:
        ### Convert to share of outs/safe (so = so/outs instead of so/pa)
        away_batter_df = pa_share(away_batter_df, "_b", "", "_l")
        away_batter_df = pa_share(away_batter_df, "_b", "", "_r")
        away_batter_df = pa_share(away_batter_df, "_b", "_long", "_l")
        away_batter_df = pa_share(away_batter_df, "_b", "_long", "_r")

        home_batter_df = pa_share(home_batter_df, "_b", "", "_l")
        home_batter_df = pa_share(home_batter_df, "_b", "", "_r")
        home_batter_df = pa_share(home_batter_df, "_b", "_long", "_l")
        home_batter_df = pa_share(home_batter_df, "_b", "_long", "_r")

        away_pitcher_df = pa_share(away_pitcher_df, "_p", "", "_l")
        away_pitcher_df = pa_share(away_pitcher_df, "_p", "", "_r")
        away_pitcher_df = pa_share(away_pitcher_df, "_p", "_long", "_l")
        away_pitcher_df = pa_share(away_pitcher_df, "_p", "_long", "_r")

        home_pitcher_df = pa_share(home_pitcher_df, "_p", "", "_l")
        home_pitcher_df = pa_share(home_pitcher_df, "_p", "", "_r")
        home_pitcher_df = pa_share(home_pitcher_df, "_p", "_long", "_l")
        home_pitcher_df = pa_share(home_pitcher_df, "_p", "_long", "_r")

    
    ### Standardize stats
    # Away batters
    away_batter_df.rename(columns=dict(zip(batter_stats_l, batter_inputs)), inplace=True)
    away_batter_df[batter_inputs] = batter_stats_scaler.transform(away_batter_df[batter_inputs])
    away_batter_df.rename(columns=dict(zip(batter_inputs, batter_stats_l)), inplace=True)

    away_batter_df.rename(columns=dict(zip(batter_stats_r, batter_inputs)), inplace=True)
    away_batter_df[batter_inputs] = batter_stats_scaler.transform(away_batter_df[batter_inputs])
    away_batter_df.rename(columns=dict(zip(batter_inputs, batter_stats_r)), inplace=True)
    
    away_batter_df[batter_stats_fg] = batter_stats_fg_scaler.transform(away_batter_df[batter_stats_fg])
    
    
    # Away pitchers
    away_pitcher_df.rename(columns=dict(zip(pitcher_stats_l, pitcher_inputs)), inplace=True)
    away_pitcher_df[pitcher_inputs] = pitcher_stats_scaler.transform(away_pitcher_df[pitcher_inputs])
    away_pitcher_df.rename(columns=dict(zip(pitcher_inputs, pitcher_stats_l)), inplace=True)
    
    away_pitcher_df.rename(columns=dict(zip(pitcher_stats_r, pitcher_inputs)), inplace=True)
    away_pitcher_df[pitcher_inputs] = pitcher_stats_scaler.transform(away_pitcher_df[pitcher_inputs])
    away_pitcher_df.rename(columns=dict(zip(pitcher_inputs, pitcher_stats_r)), inplace=True)
        
    away_pitcher_df[pitcher_stats_fg] = pitcher_stats_fg_scaler.transform(away_pitcher_df[pitcher_stats_fg])

    
    # Home batters
    home_batter_df.rename(columns=dict(zip(batter_stats_l, batter_inputs)), inplace=True)
    home_batter_df[batter_inputs] = batter_stats_scaler.transform(home_batter_df[batter_inputs])
    home_batter_df.rename(columns=dict(zip(batter_inputs, batter_stats_l)), inplace=True)

    home_batter_df.rename(columns=dict(zip(batter_stats_r, batter_inputs)), inplace=True)
    home_batter_df[batter_inputs] = batter_stats_scaler.transform(home_batter_df[batter_inputs])
    home_batter_df.rename(columns=dict(zip(batter_inputs, batter_stats_r)), inplace=True)
    
    home_batter_df[batter_stats_fg] = batter_stats_fg_scaler.transform(home_batter_df[batter_stats_fg])
    
    # Home pitchers
    home_pitcher_df.rename(columns=dict(zip(pitcher_stats_l, pitcher_inputs)), inplace=True)
    home_pitcher_df[pitcher_inputs] = pitcher_stats_scaler.transform(home_pitcher_df[pitcher_inputs])
    home_pitcher_df.rename(columns=dict(zip(pitcher_inputs, pitcher_stats_l)), inplace=True)

    home_pitcher_df.rename(columns=dict(zip(pitcher_stats_r, pitcher_inputs)), inplace=True)
    home_pitcher_df[pitcher_inputs] = pitcher_stats_scaler.transform(home_pitcher_df[pitcher_inputs])
    home_pitcher_df.rename(columns=dict(zip(pitcher_inputs, pitcher_stats_r)), inplace=True)
    
    home_pitcher_df[pitcher_stats_fg] = pitcher_stats_fg_scaler.transform(home_pitcher_df[pitcher_stats_fg])



    
    return away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df

### Impute Inputs

##### Option 1: Steamer

In [3]:
def impute_batters(batter_df, batter_imputations_model):
    ### Vs. RHP
    # Create is lefty dummy (this will include switch hitters against righties)
    batter_df['b_L'] = (batter_df['batSide'] != 'Right').astype('int')
    # Vs. RHP
    batter_df['p_L'] = 0    
    # If missing data, impute
    batter_df['imp_b_r'].fillna(1, inplace=True)
    # Impute
    try:
        prediction = batter_imputations_model.predict(batter_df.loc[batter_df['imp_b_r'] == 1, batter_stats_fg_imp])
    except:
        prediction = None
        # print("No batter imputations vs RHP")
    # Impute missing values in pitcher_stats with the predicted values
    batter_df.loc[batter_df['imp_b_r'] == 1, batter_stats_r] = prediction
    
    
    ### Vs. LHP
    # Create is lefty dummy (this will not include switch hitters against righties)
    batter_df['b_L'] = (batter_df['batSide'] == 'Left').astype('int')
    # Vs. RHP
    batter_df['p_L'] = 1  
    # If missing data, impute
    batter_df['imp_b_l'].fillna(1, inplace=True)
    # Impute
    try:
        prediction = batter_imputations_model.predict(batter_df.loc[batter_df['imp_b_l'] == 1, batter_stats_fg_imp])
    except:
        prediction = None
        # print("No batter imputations vs LHP")
    # Impute missing values in pitcher_stats with the predicted values
    batter_df.loc[batter_df['imp_b_l'] == 1, batter_stats_l] = prediction
    
    # Fill in missings
    batter_df[batter_stats_l].fillna(0, inplace=True)
    batter_df[batter_stats_r].fillna(0, inplace=True)
    
    batter_df.fillna(0, inplace=True)
    
    return batter_df

In [4]:
def impute_pitchers(pitcher_df, pitcher_imputations_model):
    ### Vs. RHB
    # Create is lefty dummy 
    pitcher_df['p_L'] = (pitcher_df['pitchHand'] == 'Left').astype('int')
    # Vs. RHB
    pitcher_df['b_L'] = 0    
    # If missing data, impute
    pitcher_df['imp_p_r'].fillna(1, inplace=True)
    # Impute
    try:
        prediction = pitcher_imputations_model.predict(pitcher_df.loc[pitcher_df['imp_p_r'] == 1, pitcher_stats_fg_imp])
    except:
        prediction = None
        # print("No pitcher imputations vs RHB")
    # Impute missing values in pitcher_stats with the predicted values
    pitcher_df.loc[pitcher_df['imp_p_r'] == 1, pitcher_stats_r] = prediction

    
    ### Vs. LHB
    # Create is lefty dummy 
    pitcher_df['p_L'] = (pitcher_df['pitchHand'] == 'Left').astype('int')
    # Vs. RHB
    pitcher_df['b_L'] = 1  
    # If missing data, impute
    pitcher_df['imp_p_l'].fillna(1, inplace=True)  
    # Impute
    try:
        prediction = pitcher_imputations_model.predict(pitcher_df.loc[pitcher_df['imp_p_l'] == 1, pitcher_stats_fg_imp])
    except:
        prediction = None
        # print("No pitcher imputations vs LHB")        
    # Impute missing values in pitcher_stats with the predicted values
    pitcher_df.loc[pitcher_df['imp_p_l'] == 1, pitcher_stats_l] = prediction
    
    # Fill in missings
    pitcher_df[pitcher_stats_l].fillna(0, inplace=True)
    pitcher_df[pitcher_stats_r].fillna(0, inplace=True)
    
    pitcher_df.fillna(0, inplace=True)
    
    return pitcher_df

##### Option 2: 0s

In [5]:
# # This doesn't actually use model anymorea
# def impute_batters(batter_df, batter_imputations_model):
#     # Fill in missings
#     batter_df[batter_stats_l].fillna(0, inplace=True)
#     batter_df[batter_stats_r].fillna(0, inplace=True)
#     batter_df[['pa_b_l', 'pa_b_r']].fillna(0, inplace=True)
    
#     # Take weighted average of existing values and 0 
#     # This can be simplified but I want to spell it out for clarity
#     for col in batter_stats_l:
#         batter_df[col] = (batter_df[col] * batter_df['pa_b_l'] + 0 * (50-batter_df['pa_b_l']))/50
#     for col in batter_stats_r:
#         batter_df[col] = (batter_df[col] * batter_df['pa_b_r'] + 0 * (50-batter_df['pa_b_r']))/50

#     batter_df.fillna(0, inplace=True)
        
#     return batter_df

In [6]:
# # This doesn't actually use model anymorea
# def impute_pitchers(pitcher_df, pitcher_imputations_model):
#     # Fill in missings
#     pitcher_df[pitcher_stats_l].fillna(0, inplace=True)
#     pitcher_df[pitcher_stats_r].fillna(0, inplace=True)
#     pitcher_df[['pa_p_l', 'pa_p_r']].fillna(0, inplace=True)
    
#     # Take weighted average of existing values and 0 
#     # This can be simplified but I want to spell it out for clarity
#     for col in pitcher_stats_l:
#         pitcher_df[col] = (pitcher_df[col] * pitcher_df['pa_p_l'] + 0 * (50-pitcher_df['pa_p_l']))/50
#     for col in pitcher_stats_r:
#         pitcher_df[col] = (pitcher_df[col] * pitcher_df['pa_p_r'] + 0 * (50-pitcher_df['pa_p_r']))/50

#     pitcher_df.fillna(0, inplace=True)
        
#     return pitcher_df

### Create Matchup Objects

In [7]:
# Create team-position objects
def create_matchup(matchup_path, batter_stats_scaler, batter_stats_fg_scaler, batter_imputations_model, pitcher_stats_scaler, pitcher_stats_fg_scaler, pitcher_imputations_model, projected_lineups):
    ### Read in data
    # Batters
    away_batter_df = pd.read_excel(matchup_path, sheet_name='AwayBatters', engine='openpyxl')
    home_batter_df = pd.read_excel(matchup_path, sheet_name='HomeBatters', engine='openpyxl')
    
    # Pitchers
    away_pitcher_df = pd.read_excel(matchup_path, sheet_name='AwayPitchers', engine='openpyxl')
    home_pitcher_df = pd.read_excel(matchup_path, sheet_name='HomePitchers', engine='openpyxl')
    
    
    # Column names
    batter_columns = away_batter_df.columns.tolist()
    pitcher_columns = away_pitcher_df.columns.tolist()
    
    
    ### Scale inputs
    away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df = clean_matchups(away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df, batter_stats_scaler, batter_stats_fg_scaler, pitcher_stats_scaler, pitcher_stats_fg_scaler, projected_lineups, event_share=False)    
    
    ### Impute inputs
    away_batter_df = impute_batters(away_batter_df, batter_imputations_model)
    home_batter_df = impute_batters(home_batter_df, batter_imputations_model)
    away_pitcher_df = impute_pitchers(away_pitcher_df, pitcher_imputations_model)
    home_pitcher_df = impute_pitchers(home_pitcher_df, pitcher_imputations_model)

    
    
   
    
    ### Create player objects
    # Lists of player objects, by Away/Home status and position group
    AwayBatters = []
    HomeBatters = []
    AwayPitchers = []
    HomePitchers = []
    
    # Away Batters
    for _, row in away_batter_df.iterrows():
        batter_data = {attr: row[attr] for attr in batter_columns + ['confirmed']}
        AwayBatters.append(Batter(**batter_data))
    
    # Home Batters
    for _, row in home_batter_df.iterrows():
        batter_data = {attr: row[attr] for attr in batter_columns + ['confirmed']}
        HomeBatters.append(Batter(**batter_data))

    # Away Pitchers
    for _, row in away_pitcher_df.iterrows():
        pitcher_data = {attr: row[attr] for attr in pitcher_columns + ['confirmed']}
        AwayPitchers.append(Pitcher(**pitcher_data))
    
    # Home Pitchers
    for _, row in home_pitcher_df.iterrows():
        pitcher_data = {attr: row[attr] for attr in pitcher_columns + ['confirmed']}
        HomePitchers.append(Pitcher(**pitcher_data))
    

    return AwayBatters, HomeBatters, AwayPitchers, HomePitchers 

### Calculate Pull Odds

In [8]:
# Calculate odds of being pulled
def pull_odds(game, model_pulls, year):
    # Determine batting and fielding team scores for use as inputs
    if game.top_bot == "Top":        
        batter_score = game.away_score
        pitcher_score = game.home_score
    else:
        batter_score = game.home_score
        pitcher_score = game.away_score

    # Year dummies
    for y in range(2015, 2025):
        setattr(game, f'year_{y}', 1 if y == int(year) else 0)
    
    # Years 
    year_inputs_pull = [getattr(game, year) for year in year_inputs]

    print(year_inputs_pull)
        
    # Pull model inputs 
    inputs_pull = [game.pitching.B1, game.pitching.B2, game.pitching.B3, game.pitching.HR, game.pitching.BB, game.pitching.HBP, game.pitching.SO,
              game.pitching.faced, game.pitching.OUT, pitcher_score, batter_score, game.inning, game.outs, game.faced_inning, game.br_inning,
              game.onFirst, game.onSecond, game.onThird, game.pitching.IP_start
            ] 
    
    inputs_pull = inputs_pull + year_inputs_pull
    
    # Reshape to match what models expect
    model_inputs = pd.Series(inputs_pull).values.reshape(1,-1)

    # Predict and make into a list
    pull_list = model_pulls.predict_proba(model_inputs).tolist()
    
    # Choose the probability that pull = 1
    odds = pull_list[0][1] 
    
    del inputs_pull, model_inputs, pull_list
    
    return odds

### Determine PA Matchup

In [9]:
def choose_pa_matchup(game, model_pulls, opener_list):  
    ### Top of the inning
    if game.top_bot == "Top":       
        print("Home Starter Pulled:", game.home_starter_pulled)
        ### Determine leverage
        # If the starter is still in the game
        if game.home_starter_pulled == False:
            # Pitcher is the starter
            game.home_pitcher_up = random.choice([pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 1]) if game.home_pitchers else None
            game.pitching = game.home_pitcher_up
            print(game.pitching.Name)
            
            # Roll to see if they'll be pulled
            pull_prob = pull_odds(game, model_pulls, year)
            print("Pull%: ", round(pull_prob, 3))
            pull_roll = random.random()
            if pull_roll < pull_prob:
                game.home_starter_pulled = True 
    
            # If they're an opener and it's the third inning,
            if game.inning >= 3 and game.home_starter.Name in opener_list:
                # Pull them
                game.home_starter_pulled = True
            
        # If the home starter has been pulled
        if game.home_starter_pulled == True:
            # Set leverage to 2 (low) by default
            game.home_leverage = 2
            # Set leverage to 3 (medium) if it's late
            if (game.inning > game.innings - 3):
                game.home_leverage = 3
            # Set leverage to 4 (high) if it's a save situation
            if (4 > (game.home_score - game.away_score) >= 0) and game.inning == game.innings and game.home_starter_pulled == True:
                game.home_leverage = 4
        
            # Choose a relief pitchers at given leverage
            eligible_pitchers = [pitcher for pitcher in game.home_pitchers if pitcher.Leverage == game.home_leverage]
            # If there isn't one at that leverage
            if eligible_pitchers == []:
                # Assume low leverage
                eligible_pitchers = [pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 2]

            # Select eligible pitcher weighted by relief_IP
            game.home_pitcher_up = random.choices(eligible_pitchers, weights=[pitcher.relief_IP for pitcher in eligible_pitchers])[0] if eligible_pitchers else None
    
        # Determine matchup
        game.pitching = game.home_pitcher_up
        game.ab = next(batter for batter in game.away_batters if batter.batting_order == game.away_order)
        # Assign pitcher
        game.ab.pitcher = game.pitching
      
    
    
    ### Bottom of the inning
    elif game.top_bot == "Bot":
        ### Determine leverage
        # If the starter is still in the game
        if game.away_starter_pulled == False:
            # Pitcher is the starter
            game.away_pitcher_up = random.choice([pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 1]) if game.away_pitchers else None
            game.pitching = game.away_pitcher_up
            
            # Roll to see if they'll be pulled
            pull_prob = pull_odds(game, model_pulls, year)
            print("Pull%: ", round(pull_prob, 3))
            pull_roll = random.random()
            if pull_roll < pull_prob:
                game.away_starter_pulled = True 
            # If they're an opener and it's the third inning,
            if game.inning >= 3 and game.away_starter.Name in opener_list:
                # Pull them
                game.away_starter_pulled = True
            
        # If the home starter has been pulled
        if game.away_starter_pulled == True:
            # Set leverage to 2 (low) by default
            game.away_leverage = 2
            # Set leverage to 3 (medium) if it's late
            if (game.inning > game.innings - 3):
                game.away_leverage = 3
            # Set leverage to 4 (high) if it's a save situation
            if (4 > (game.away_score - game.home_score) >= 0) and game.inning == game.innings and game.away_starter_pulled == True:
                game.away_leverage = 4
        
            # Choose a relief pitchers at given leverage
            eligible_pitchers = [pitcher for pitcher in game.away_pitchers if pitcher.Leverage == game.away_leverage]
            # If there isn't one at that leverage
            if eligible_pitchers == []:
                # Assume low leverage
                eligible_pitchers = [pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 2]

            # Select eligible pitcher weighted by relief_IP
            game.away_pitcher_up = random.choices(eligible_pitchers, weights=[pitcher.relief_IP for pitcher in eligible_pitchers])[0] if eligible_pitchers else None

        # Determine matchup
        game.pitching = game.away_pitcher_up
        game.ab = next(batter for batter in game.home_batters if batter.batting_order == game.home_order)
        # Assign pitcher
        game.ab.pitcher = game.pitching
    
    
    return game

### Probabilities

In [None]:
### Testing

In [None]:
out_df = pd.read_pickle(os.path.join(model_path, 'out_df.pkl'))

In [1]:
def adjust_out_df(out_df, value):
    for index, row in out_df.iterrows():
        if value < row['is_out_pred_max']:
            return value * (row['is_out_mean'] / row['is_out_pred_mean'])
    # If the value is greater than all the maximum values, use the last row for adjustment
    last_row = out_df.iloc[-1]
    return value * (last_row['is_out_mean'] / last_row['is_out_pred_mean'])


In [None]:
### End Testing

In [10]:
# Create PA event probability
def probabilities(game, model_binary, model_outs, model_safe, weather_df, year):    
    # Batter attributes
    # Set batter attributes specific to pitcher hand
    for stat in batter_inputs + ['imp_b']:
        if game.pitching.pitchHand == "Left":
            attr_name = f'{stat}_l'
        else:
            attr_name = f'{stat}_r'

        # Get the new attribute value
        new_value = getattr(game.ab, attr_name)

        # Set the attribute to the new value
        setattr(game.ab, stat, new_value)
        
    # Pitcher attributes
    # Set pitcher attributes specific to batter hand
    for stat in pitcher_inputs + ['imp_p']:
        if game.ab.batSide == "Left" or (game.ab.batSide == "Switch" and game.pitching.pitchHand == "Right"):
            attr_name = f'{stat}_l'
        else:
            attr_name = f'{stat}_r'

        # Get the new attribute value
        new_value = getattr(game.pitching, attr_name)

        # Set the attribute to the new value
        setattr(game.pitching, stat, new_value)
    
    
    # Make compatible with model
    if game.ab.batSide == "Left" or (game.ab.batSide == "Switch" and game.pitching.pitchHand == "Right"):
        game.ab.b_L = 1
    else:
        game.ab.b_L = 0
    
    if game.pitching.pitchHand == "Left":
        game.pitching.p_L = 1
    else:
        game.pitching.p_L = 0
        
    # On base dummies
    if game.on_1b is not None:
        game.onFirst = 1
    else:
        game.onFirst = 0
    if game.on_2b is not None:
        game.onSecond = 1
    else:
        game.onSecond = 0
    if game.on_3b is not None:
        game.onThird = 1
    else: 
        game.onThird = 0
           
    # Top of the inning dummy
    if game.top_bot == "Top":
        game.top = 1
    else:
        game.top = 0
        
    # Score differentials
    if game.top == 1:
        game.score_diff = game.away_score - game.home_score
    else:
        game.score_diff = game.home_score - game.away_score    


    
    
    ### Inputs
    # Batters
    batter_inputs_pa =  [getattr(game.ab, stat) for stat in batter_inputs]
    
    # Pitchers
    pitcher_inputs_pa = [getattr(game.pitching, stat) for stat in pitcher_inputs]
    
    # Player hands
    hand_inputs_pa = [game.pitching.p_L, game.ab.b_L]

    # Game state
    game_state_inputs_pa = [game.onFirst, game.onSecond, game.onThird, game.inning, game.top, game.outs, game.score_diff]
    
    # Imputation flags
    imp_inputs_pa = [game.ab.imp_b, game.pitching.imp_p]
    
    ### Testing:
    if game.top == 1:
        starter = int(game.home_starter_pulled == False)
    else:
        starter = int(game.away_starter_pulled == False)

    ### Testing:
    event_factor_inputs = [f'{event}_factor' for event in events_list]
    # If the batter is a lefty
    if game.ab.b_L == 1:
        event_factor_inputs = weather_df[['b1_factor_l', 'b2_factor_l', 'b3_factor_l', 'hr_factor_l', 'bb_factor_l', 'hbp_factor_l',
                                          'so_factor_l', 'fo_factor_l', 'go_factor_l', 'lo_factor_l', 'po_factor_l']].iloc[0].to_list()
    else:
        event_factor_inputs = weather_df[['b1_factor_r', 'b2_factor_r', 'b3_factor_r', 'hr_factor_r', 'bb_factor_r', 'hbp_factor_r',
                                          'so_factor_r', 'fo_factor_r', 'go_factor_r', 'lo_factor_r', 'po_factor_r']].iloc[0].to_list()

    ### Create inputs 
    inputs_pa = batter_inputs_pa + pitcher_inputs_pa + hand_inputs_pa + game_state_inputs_pa + imp_inputs_pa + [starter] + event_factor_inputs
    model_inputs = pd.Series(inputs_pa).values.reshape(1,-1)
    
    
    start_time = time.time()
    ### Run models
    # Out or safe
    binary_list = model_binary.predict_proba(model_inputs).tolist()
    
    # Odds of safe/out
    is_safe = binary_list[0][0]
    
    
    # ### Testing
    # # Adjust out/safe probability by actual rates (make predicted rates line up with actual rates in testing dataset)
    # is_out = 1 - is_safe
    # is_out = adjust_out_df(out_df, is_out)
    # is_safe = 1 - is_out
    # ### Testing Ends
    
    
    ### Two model approach
    # Determine out or safe. Then predict probabilities specific to out or safe.
    # Roll to determine if safe
    is_safe_roll = random.random()
    if is_safe_roll < is_safe:
        print("He's safe")
        # Safe types
        safe_list = model_safe.predict_proba(model_inputs).tolist()
        # Given that it's safe
        b1 = safe_list[0][0]
        b2 = safe_list[0][1]
        b3 = safe_list[0][2]
        bb = safe_list[0][3]
        hbp = safe_list[0][4]
        hr = safe_list[0][5]
        fo = 0
        go = 0 
        lo = 0
        po = 0
        so = 0
    else:
        print("He's out")
        # Out types
        outs_list = model_outs.predict_proba(model_inputs).tolist()
         # Given that it's an out
        b1 = 0
        b2 = 0
        b3 = 0 
        bb = 0 
        hbp = 0 
        hr = 0
        fo = outs_list[0][0]
        go = outs_list[0][1]
        lo = outs_list[0][2]
        po = outs_list[0][3]
        so = outs_list[0][4]
        
    
#     ### Three approach    
#     # # Determine probabilities of out vs. safe, out, and safe. 
#     safe_list = model_safe.predict_proba(model_inputs).tolist()
#     outs_list = model_outs.predict_proba(model_inputs).tolist()

#     is_out = 1 - is_safe
    
    
#     b1 = safe_list[0][0] * is_safe
#     b2 = safe_list[0][1] * is_safe
#     b3 = safe_list[0][2] * is_safe
#     bb = safe_list[0][3] * is_safe
#     hbp = safe_list[0][4] * is_safe
#     hr = safe_list[0][5] * is_safe
#     fo = outs_list[0][0] * is_out
#     go = outs_list[0][1] * is_out
#     lo = outs_list[0][2] * is_out
#     po = outs_list[0][3] * is_out
#     so = outs_list[0][4] * is_out

    
    # Probabilities 
    # Add stats together so they represent the end point of a range with a probability of being selected equal to their rate
    bb = hbp + bb 
    b1 = bb + b1
    b2 = b1 + b2
    b3 = b2 + b3
    hr = b3 + hr
    so = hr + so
    lo = so + lo
    po = lo + po
    go = po + go
    fo = go + fo
    # Set flyout to be 1. This model almost guarantees this. It's accurate to like, 10 decimal places, but why risk it?
    fo = 1
    
    

    end_time = time.time()
    
    elapsed_time = end_time-start_time
    print("Elapsed time:", elapsed_time)
    
    output_list = [hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo]
    
    # log_list = (model_inputs.tolist()) + output_list
    # logging.info(log_list)

    
    return hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo

### Errors

In [11]:
def is_error(game, event):    
    # Create event dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]

    ##### Model Approach
    ### Create inputs     
    error_input_list = event_dummies + [game.onFirst, game.onSecond, game.onThird]
    model_inputs = pd.Series(error_input_list).values.reshape(1,-1)
    
    # Error probabilities
    error_list = model_errors.predict_proba(model_inputs).tolist()
    
    # Odds of error
    is_error = error_list[0][1]
    print("Error%: ", round(is_error, 4))
    
    
    # ##### Table Approach
    # is_error = error_table[eventsModelInt][game.onFirst][game.onSecond][game.onThird]
    # print("Error%: ", round(is_error, 4))

    
    # Determine if there's an error (including on hits)
    error_roll = random.random()
    error = int(error_roll < is_error)    
    
    
    return error

### Double Play

In [12]:
def is_dp(game, event):        
    # Create event dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]
    
    ##### Model Approach
    ### Create inputs     
    double_play_input_list = event_dummies + [game.outs, game.onFirst, game.onSecond, game.onThird]
    model_inputs = pd.Series(double_play_input_list).values.reshape(1,-1)
    
    # DP probabilities
    dp_list = model_dp.predict_proba(model_inputs).tolist()
    is_dp = dp_list[0][1]
    print("DP%:", round(is_dp, 4))
    
    
    # ##### Table Approach
    # is_dp = dp_table[eventsModelInt][game.outs][game.onFirst][game.onSecond][game.onThird]
    # print("DP%:", round(is_dp, 4))
    
    
    
    # Determine if there's a DP (including on hits)
    dp_roll = random.random()
    double_play = int(dp_roll < is_dp)    
    
    return double_play

### Outs

In [13]:
def choose_outs(game, event, error, double_play):
    # Create event_dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]
    
    ##### Model Approach
    ### 3B
    if game.on_3b is not None:
        out_input_list = event_dummies + [0, 0, 0, 1, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
        # print("3B", "out_input_list", out_input_list)
        model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
        outs_list = model_out_bases.predict_proba(model_inputs).tolist()
        out_3b = outs_list[0][1]
    else:
        out_3b = 0
        
    ### 2B
    if game.on_2b is not None:
        out_input_list = event_dummies + [0, 0, 1, 0, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
        # print("2B", "out_input_list", out_input_list)
        model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
        outs_list = model_out_bases.predict_proba(model_inputs).tolist()
        out_2b = outs_list[0][1]
    else:
        out_2b = 0
    
    ### 1B
    if game.on_1b is not None:
        out_input_list = event_dummies + [0, 1, 0, 0, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
        # print("1B", "out_input_list", out_input_list)
        model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
        outs_list = model_out_bases.predict_proba(model_inputs).tolist()
        out_1b = outs_list[0][1]
    else:
        out_1b = 0
    
    ### AB
    out_input_list = event_dummies + [1, 0, 0, 0, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
    # print("AB", "out_input_list", out_input_list)
    model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
    outs_list = model_out_bases.predict_proba(model_inputs).tolist()
    out_ab = outs_list[0][1]
    
    
    # ##### Table Approach
    # out_3b = out_table.get((eventsModelInt, 3, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play), 0)
    # out_2b = out_table.get((eventsModelInt, 2, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play), 0)
    # out_1b = out_table.get((eventsModelInt, 1, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play), 0)
    # out_ab = out_table.get((eventsModelInt, 0, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play), 0)

    
    # Determine probability of being safe
    safe_probability = (1 + double_play) - np.sum([out_ab, out_1b, out_2b, out_3b])
    # safe_probability is for hits, not outs. Can't be negative. 
    # While there is some chance of no one being out on an out event, this will be classified as an error.
    if safe_probability < 0 or (event in ['so', 'go', 'lo', 'po', 'fo'] and error == 0):
        print("Calculated safe probability:", safe_probability)
        safe_probability = 0
    # List of probabilities
    probabilities = [out_ab, out_1b, out_2b, out_3b, safe_probability]
    probabilities = [round(item, 2) for item in probabilities]
    probabilities = [100 * x / sum(probabilities) for x in probabilities]
    
    print(np.sum(probabilities))

    print(f"Out Odds:      AB: {float(probabilities[0]/100)}, 1B: {float(probabilities[1]/100)}, 2B: {float(probabilities[2]/100)}, 3B: {float(probabilities[3]/100)}, No One: {float(probabilities[4]/100)}")
    
    # Choose an item from the list of probabilities based on their odds
    chosen_index = random.choices(range(len(probabilities)), weights=probabilities)[0]
    chosen_index2 = np.nan
    
    # If there's a double play
    if double_play == 1:
        # Remove the out you already chose
        probabilities[chosen_index] = 0
        # Failsafe: (Not sure what's up here, tbh, but it almost never happens) If there's no chance of anyone else being out, 
        if np.sum(probabilities)  <= 0:
            # Just assign a guaranteed chance to the batter. 
            probabilities[0] = 100
        
        # And choose again
        chosen_index2 = random.choices(range(len(probabilities)), weights=probabilities)[0]
    
    # Assign outs
    out_ab = int(chosen_index == 0 or chosen_index2 == 0)
    out_1b = int(chosen_index == 1 or chosen_index2 == 1)
    out_2b = int(chosen_index == 2 or chosen_index2 == 2)
    out_3b = int(chosen_index == 3 or chosen_index2 == 3)
    
    print(f"Out Locations: AB: {float(out_ab)}, 1B: {float(out_1b)}, 2B: {float(out_2b)}, 3B: {float(out_3b)}")
    
    
    return out_ab, out_1b, out_2b, out_3b

### Events

In [14]:
def event_results(game, startInt, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play):
    # Create event_dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]

    # Start base dummies
    start_dummies = [startInt == 0, startInt == 1, startInt == 2, startInt == 3]
    start_dummies = [int(dummy) for dummy in start_dummies]
    
    
    ##### Model Approach
    ### Create inputs        
    event_input_list = event_dummies + start_dummies + [game.outs, game.onFirst, game.onSecond, game.onThird, blocked_1b, blocked_2b, blocked_3b, out_ab, out_1b, out_2b, out_3b, error, double_play]
    model_inputs = pd.Series(event_input_list).values.reshape(1,-1)
    
    # Event probabilities
    events_list = model_events.predict_proba(model_inputs).tolist()
    
    # Odds of base
    to_1b = round(events_list[0][0], 2)
    to_2b = round(events_list[0][1], 2)
    to_3b = round(events_list[0][2], 2)
    to_score = round(events_list[0][3], 2)
    
    # ##### Table Approach
    # to_1b = event_table.get((eventsModelInt, startInt, game.outs, game.onFirst, game.onSecond, game.onThird, blocked_1b, blocked_2b, blocked_3b, out_ab, out_1b, out_2b, out_3b, error, double_play, 1), 0)
    # to_2b = event_table.get((eventsModelInt, startInt, game.outs, game.onFirst, game.onSecond, game.onThird, blocked_1b, blocked_2b, blocked_3b, out_ab, out_1b, out_2b, out_3b, error, double_play, 2), 0)
    # to_3b = event_table.get((eventsModelInt, startInt, game.outs, game.onFirst, game.onSecond, game.onThird, blocked_1b, blocked_2b, blocked_3b, out_ab, out_1b, out_2b, out_3b, error, double_play, 3), 0)
    # to_score = event_table.get((eventsModelInt, startInt, game.outs, game.onFirst, game.onSecond, game.onThird, blocked_1b, blocked_2b, blocked_3b, out_ab, out_1b, out_2b, out_3b, error, double_play, 4), 0)
    
    
    
    print("startInt", startInt, "to_1b", to_1b, "to_2b", to_2b, "to_3b", to_3b, "to_score", to_score)
    
    # Create probabilities of each base options
    to_2b = to_1b + to_2b
    to_3b = to_2b + to_3b
    to_score = 1
    
    # Determine where the runner went 
    base_roll = random.random()
    if base_roll < to_1b:
        base = "to_1b" 
    elif base_roll < to_2b:
        base = "to_2b"
    elif base_roll < to_3b:
        base = "to_3b"
    elif base_roll < to_score:
        base = "to_score"
            
                
    return base
    
    

In [15]:
# Simulate at bat
def sim_ab(game, model_binary, model_outs, model_safe, model_pulls, opener_list, weather_df, year, debug=False):
    start = time.time()

    # Choose plate appearance matchup
    game = choose_pa_matchup(game, model_pulls, opener_list)
    
    # Add PA for batter
    game.ab.PA += 1
    game.pitching.PA += 1
    
    # Set the zombie (will be last guy up)
    if game.top_bot == "Top":
        game.away_zombie = game.ab
    else:
        game.home_zombie = game.ab
    
    
    
    
    if debug == True:
        if game.on_1b is None:
            order_1b = 0
        else:
            order_1b = int(game.on_1b.batting_order)
        if game.on_2b is None:
            order_2b = 0
        else:
            order_2b = int(game.on_2b.batting_order)
        if game.on_3b is None:
            order_3b = 0
        else:
            order_3b = int(game.on_3b.batting_order)

        # Calculate batter stats
        game.ab, game = calculate_batter(game.ab, game)

        print("\n")
        print(game.top_bot, game.inning, "Outs: ", game.outs)
        print(f"       {order_2b}")
        print("    /     \\")
        print(f"   {order_3b}   {int(game.pitching.Leverage)}   {order_1b}  {game.pitching.position} {game.pitching.fullName}: {game.pitching.FP}")
        print("    \     /         vs.")
        print(f"       {int(game.ab.batting_order)}      {game.ab.position} {game.ab.fullName}: {game.ab.FP}  ")
        print(f"Away {game.away_score} - {game.home_score} Home")

    
    # Calculate probabilities
    hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo = probabilities(game, model_binary, model_outs, model_safe, weather_df, year)
    
    
    # Roll
    pa_roll = random.random() 
    # Event 1: HBP
    if pa_roll < hbp:
        event = "hbp"
    # Event 2: BB
    elif pa_roll < bb:
        event = "bb"
    # Event 3: Single
    elif pa_roll < b1:
        event = "b1"
    # Event 4: Double    
    elif pa_roll < b2:
        event = "b2"
    # Event 5: Triple
    elif pa_roll < b3:
        event = "b3"
     # Event 6: Home Run
    elif pa_roll < hr:
        event = "hr"
    # Event 7: Strikeout
    elif pa_roll < so:
        event = "so"
    # Event 8: Line drive out
    elif pa_roll < lo:
        event = "lo"
    # Event 9: Pop out
    elif pa_roll < po:
        event = "po"
    # Event 10: Groundball out
    elif pa_roll < go:
        event = "go"
    # Event 11: Fly out 
    else:
        event = "fo"
        
          
    print("Result:", event)    
        
    # Carry out event 
    if event == "b1":
        game.ab.B1 += 1
        game.pitching.B1 += 1
    elif event == "b2":
        game.ab.B2 += 1
        game.pitching.B2 += 1
    elif event == "b3":
        game.ab.B3 += 1
        game.pitching.B3 += 1
    elif event == "hr":
        game.ab.HR += 1
        game.pitching.HR += 1
    elif event == "bb":
        game.ab.BB += 1
        game.pitching.BB += 1
    elif event == "hbp":
        game.ab.HBP += 1
        game.pitching.HBP += 1
    elif event == "so":
        game.pitching.SO += 1
    elif event == "fo":
        game.pitching.FO += 1
    elif event == "go":
        game.pitching.GO += 1
    elif event == "lo":
        game.pitching.LO += 1
    elif event == "po":
        game.pitching.PO += 1
        
    # Assume run will be charged to pitcher 
    game.ab.charged = 1
        
    # Determine if there's an error on the play
    error = is_error(game, event)
    # If there is an error
    if error == 1:
        print("There's an error!")
        # And if the event was supposed to be an out
        if event in ['so', 'go', 'fo', 'lo', 'po']:
            # The batter will not be charged to the pitcher 
            game.ab.charged = 0
        
    # If there's an error with two outs and the event would have ended the inning (been an out), all future runs are unearned
    if error == 1 and event in ['so', 'go', 'fo', 'lo', 'po'] and game.outs == 2:
        game.error_extended = True
        print("An error has extended the inning.")
    
    # Set charged to pitcher variable for each base runner to 0
    if game.error_extended == True:
        if game.on_3b is not None:
            game.on_3b.charged = 0
        if game.on_2b is not None:
            game.on_2b.charged = 0
        if game.on_1b is not None:
            game.on_1b.charged = 0
        game.ab.charged = 0

    ### TESTING:
    # Aggressive unearned runs: If there's an error in an inning, every runner on base will not be charged (even if they might have scored anyway)
    # Note that if there's an error with less than two outs, baserunners post-error could still be earned.
    if error == 1:
        if game.on_3b is not None:
            game.on_3b.charged = 0
        if game.on_2b is not None:
            game.on_2b.charged = 0
        if game.on_1b is not None:
            game.on_1b.charged = 0
        game.ab.charged = 0      
    ### TESTING ENDS

    
    # Determine if there's a double play
    double_play = is_dp(game, event)
    if double_play == 1:
        print("There's a double play!")
        pass
    
    # Determine where outs occur
    out_ab, out_1b, out_2b, out_3b = choose_outs(game, event, error, double_play)
    
    # Outs on play
    outs_on_play = out_ab + out_1b + out_2b + out_3b

    # Runs on play
    runs = 0 
    
    # If the inning isn't over (or if it is over, but it was on a hit/bb/hbp so we still have to determine whether runners scored)
    if (game.outs + outs_on_play < 3) or (event in ['b1', 'b2', 'b3', 'hr', 'bb', 'hbp']):
        # Determine where the runners go
        # Runner on 3B
        # If they're out
        if out_3b == 1:
            # Assign out to base variable
            base_3b = "out"
        # If not but they exist
        elif game.on_3b is not None:
            blocked_1b = 0
            blocked_2b = 0
            blocked_3b = 0
            # Figure out their base
            base_3b = event_results(game, 3, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_3b = "N/A"

        # Runner on 2B
        # If they're out
        if out_2b == 1:
            # Assign out to base variable
            base_2b = "out"
        # If not but they exist
        elif game.on_2b is not None:
            blocked_1b = 0
            blocked_2b = 0
            blocked_3b = int(base_3b == "to_3b")
            # Figure out their base
            base_2b = event_results(game, 2, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_2b = "N/A"

        # Runner on 1B
        # If they're out
        if out_1b == 1:
            # Assign out to base variable
            base_1b = "out"
        # If not but they exist
        elif game.on_1b is not None:
            blocked_1b = 0
            blocked_2b = int(base_2b == "to_2b")
            blocked_3b = int(base_3b == "to_3b" or base_2b == "to_3b")
            # Figure out their base
            base_1b = event_results(game, 1, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_1b = "N/A"

        # AB
        # If they're out
        if out_ab == 1:
            # Assign out to base variable
            base_ab = "out"
        # If not but they exist
        elif game.ab is not None:
            blocked_1b = int(base_1b == "to_1b")
            blocked_2b = int(base_2b == "to_2b" or base_1b == "to_2b")
            blocked_3b = int(base_3b == "to_3b" or base_2b == "to_3b" or base_1b == "to_3b")
            # Figure out their base
            base_ab = event_results(game, 0, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_ab = "N/A"

        print(f"Advancements:  AB: {base_ab}, 1B: {base_1b}, 2B: {base_2b}, 3B: {base_3b}")
            
        # Move Runners:
        # Runner on 3B
        if game.on_3b is not None:
            if base_3b == "to_score":
                game.on_3b.R += 1
                game.on_3b.pitcher.ER += (1 * (1-error) * game.on_3b.charged) # Not an ER if event is error, player reached on error, or inning would be over if not for error.
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.on_3b.pitcher.R += 1
                runs += 1
                game.on_3b = None
            elif base_3b == "out":
                game.on_3b = None
    
        # Runner on 2B
        if game.on_2b is not None:
            if base_2b == "to_3b":
                game.on_3b = game.on_2b
                game.on_2b = None
            elif base_2b == "to_score":
                game.on_2b.R += 1
                game.on_2b.pitcher.ER += (1 * (1-error) * game.on_2b.charged)
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.on_2b.pitcher.R += 1
                runs += 1
                game.on_2b = None
            elif base_2b == "out":
                game.on_2b = None
    
        # Runner on 1B
        if game.on_1b is not None:
            if base_1b == "to_2b":
                game.on_2b = game.on_1b
                game.on_1b = None
            elif base_1b == "to_3b":
                game.on_3b = game.on_1b
                game.on_1b = None
            elif base_1b == "to_score":
                game.on_1b.R += 1
                game.on_1b.pitcher.ER += (1 * (1-error) * game.on_1b.charged)
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.on_1b.pitcher.R += 1 
                runs += 1
                game.on_1b = None
            elif base_1b == "out":
                game.on_1b = None
                
        # AB
        if game.ab is not None:
            if base_ab == "to_1b":
                game.on_1b = game.ab
            elif base_ab == "to_2b":
                game.on_2b = game.ab
            elif base_ab == "to_3b":
                game.on_3b = game.ab
            elif base_ab == "to_score":
                game.ab.R += 1
                game.ab.pitcher.ER += (1 * (1-error) * game.ab.charged)
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.ab.pitcher.R += 1
                runs += 1
            elif base_ab == "out":
                pass
    
    # Determine bases
    game.onThird = 1 if game.on_3b is not None else 0
    game.onSecond = 1 if game.on_2b is not None else 0
    game.onFirst = 1 if game.on_1b is not None else 0
    
    # Add outs on play
    game.outs += outs_on_play
    game.pitching.OUT += outs_on_play
                
    # Add runs
    # Runs scored
    if game.top_bot == "Top":
        game.away_score += runs
    else:
        game.home_score += runs
    # Add to number of batters faced
    game.pitching.faced += 1

    
    # Go to the next batter up
    if game.top_bot == "Top":
        game.away_order += 1
        if game.away_order == 10:
            game.away_order = 1

    else:
        game.home_order += 1
        if game.home_order == 10:
            game.home_order = 1

    print("Pitcher ER", game.pitching.ER)
        
    return game

In [16]:
def sim_inning(game, model_pulls, model_binary, model_outs, model_safe, opener_list, weather_df, year, innings=9, debug=False):
    #####################
    # Early season only #    
    #####################
    # if year == "2024":
    #     year = "2023"

    
    # Set outs to zero
    game.outs = 0
    # Set faced to 0
    game.faced_inning = 0
    # Set base runners to 0
    game.br_inning = 0
    # Clear bases
    game.on_1b = None
    game.on_2b = None
    game.on_3b = None
    game.onFirst = 0
    game.onSecond = 0
    game.onThird = 0
    game.error_extended = False
    
    # Set zombie runner
    if game.inning >= 10:
        if game.top_bot == "Top":
            game.on_2b = game.away_zombie
        else:
            game.on_2b = game.home_zombie
        game.on_2b.charged = 0
    
    
    # Year dummies
    for y in range(2015, 2025):
        setattr(game, f'year_{y}', 1 if y == int(year) else 0)
    
    # Years 
    year_inputs_pa = [getattr(game, year) for year in year_inputs]
    
    # Loop for each PA
    while game.outs < 3:        
        if debug == True:
            # Calculate a pitcher's cumulative stats
            game.home_starter, game = calculate_pitcher(game.home_starter, game)
            game.away_starter, game = calculate_pitcher(game.away_starter, game)
        
        ### Steals
        # Third base
        # If third is empty and second is not
        if game.on_3b is None and game.on_2b is not None:
            # Steal model inputs
            steal_input_list = [game.outs, game.on_2b.sba_imp, game.on_2b.sbr] + year_inputs_pa
            model_inputs = pd.Series(steal_input_list).values.reshape(1,-1)
            
            # Stolen base attempt roll
            sba_3b_roll = random.random()
            
            # Attempt rate
            sba_3b_rate = model_sba_3b.predict_proba(model_inputs).tolist()[0][1]
            print(f"3B Attempt Rate: {sba_3b_rate}")
            
            # If the roll is less than the attempt rate
            if sba_3b_roll < sba_3b_rate:
                # They attempt to steal 3B
                
                # Stolen base success roll
                sb_3b_roll = random.random()
                
                # Success rate
                sb_3b_rate = model_sb_3b.predict_proba(model_inputs).tolist()[0][1]
                print(f"3B Success Rate: {sb_3b_rate}")
                
                # If the roll is less than the success rate
                if sb_3b_roll < sb_3b_rate:
                    # They succeed
                    print("Stolen base")
                    game.on_2b.SB += 1
                    game.on_3b = game.on_2b
                    game.on_2b = None
                    
                # Else
                else:
                    # They're out
                    print("Caught stealing")
                    game.on_2b = None
                    game.pitching.OUT +=1
                    game.outs += 1 
                    
                    # Check outs as this might end the inning
                    if game.outs == 3:
                        break
                    

        # Second base
        # If second is empty and first is not
        if game.on_2b is None and game.on_1b is not None:
            # Steal model inputs
            steal_input_list = [game.outs, game.on_1b.sba_imp, game.on_1b.sbr] + year_inputs_pa
            model_inputs = pd.Series(steal_input_list).values.reshape(1,-1)
            
            # Stolen base attempt roll
            sba_2b_roll = random.random()
            
            # Attempt rate
            sba_2b_rate = model_sba_2b.predict_proba(model_inputs).tolist()[0][1]
            print(f"2B Attempt Rate: {sba_2b_rate}")
            
            # If the roll is less than the attempt rate
            if sba_2b_roll < sba_2b_rate:
                # They attempt to steal 2B
                
                # Stolen base success roll
                sb_2b_roll = random.random()
                
                # Success rate
                sb_2b_rate = model_sb_2b.predict_proba(model_inputs).tolist()[0][1]
                print(f"2B Success Rate: {sb_2b_rate}")
                
                # If the roll is less than the success rate
                if sb_2b_roll < sb_2b_rate:
                    # They succeed
                    print("Stolen base")
                    game.on_1b.SB += 1
                    game.on_2b = game.on_1b
                    game.on_1b = None
                    
                # Else
                else:
                    # They're out
                    print("Caught stealing")
                    game.on_1b = None
                    game.pitching.OUT +=1
                    game.outs += 1 
                    
                    # Check outs as this might end the inning
                    if game.outs == 3:
                        break
        
        # Simulate the AB
        # start = time.time()
        game = sim_ab(game, model_binary, model_outs, model_safe, model_pulls, opener_list, weather_df, year, debug)
        # print('AB:', time.time() - start)        
        
        # They faced an additional batter
        game.faced_inning += 1
        # They allowed an additional base runner (this will be cancelled out, if necessary, in the "out" function)
        game.br_inning += 1 
                  
        ### Who gets the win?
        # If the away team is winning 
            # And we completed the bottom of the fifth
                # And the current winning pitcher is not already on the away team (So if there is no current winning pitcher or he's on the other team)
        if (game.away_score > game.home_score) and ((game.inning == 5 and game.top_bot == "Bot") or game.inning >= 6) and (game.winning_pitcher not in game.away_pitchers):
            # The current pitcher is the winning pitcher
            game.winning_pitcher = game.away_pitcher_up
        elif (game.home_score > game.away_score) and ((game.inning == 5 and game.top_bot == "Bot") or game.inning >= 6) and (game.winning_pitcher not in game.home_pitchers):
            game.winning_pitcher = game.home_pitcher_up
        elif (game.home_score == game.away_score) or game.inning < 5:
            game.winning_pitcher = None

        # Walk off
        if (game.inning == game.innings) and (game.top_bot == "Bot") and (game.home_score > game.away_score):
            game.winning_pitcher = game.home_pitcher_up
            break
  
        
    return game

In [17]:
# Calculate batter fantasy points
def calculate_batter(batter, game):
    batter.FP = (
                batter.B1 * 3 +
                batter.B2 * 5 +
                batter.B3 * 8 +
                batter.HR * 10 +
                batter.RBI * 2 +
                batter.R * 2 +
                batter.BB * 2 +
                batter.HBP * 2 +
                batter.SB * 5
                )
    
    return batter, game

In [18]:
# Calculate pitcher fantasy points
def calculate_pitcher(pitcher, game):
    # Calculate hits allowed
    pitcher.H = (pitcher.B1 + pitcher.B2 + pitcher.B3 + pitcher.HR)

    # If they're the winning pitcher, they get a win
    if game.winning_pitcher == pitcher:
        pitcher.W = 1
    else:
        pitcher.W = 0

    # Determine CG, CGSO, NH
    if pitcher.OUT == 27:
        pitcher.CG = 1
        if pitcher.ER == 0:
            pitcher.CGSO = 1
        if pitcher.H == 0:
            pitcher.NH = 1

    pitcher.FP = (
                pitcher.OUT * 0.75 +
                pitcher.SO * 2 +
                pitcher.W * 4 +
                pitcher.ER * -2 +
                pitcher.H * -0.6 +
                pitcher.BB * -0.6 +
                pitcher.HBP * -0.6 +
                pitcher.CG * 2.5 +
                pitcher.CGSO * 2.5 +
                pitcher.NH * 5
                )
        
    return pitcher, game

In [19]:
# Simulate a whole game     
def sim_game(game_template, model_pulls, model_binary, model_outs, model_safe, opener_list, weather_df, year, innings=9, debug=False): 
    start_time = time.time()
    
    # Copy game template
    game = deepcopy(game_template)
    
    # Determine starters
    game.home_starter = next(pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 1)
    game.away_starter = next(pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 1)
    
    # Loop over every inning
    while game.inning <= game.innings:
        # Simulate the half inning  
        game_start = time.time()
        game = sim_inning(game, model_pulls, model_binary, model_outs, model_safe, opener_list, weather_df, year, innings, debug)              
        # print("Inning:", time.time() - game_start)
            
        # If it's the last inning
        if (game.inning == game.innings):
            # If the home team is winning after simulating the top half
            if (game.home_score > game.away_score) and (game.top_bot == "Top"):
                print("No need to play the bottom of the inning")
                # Stop the game
                break
                
            # If a team is winning at the end of the inning
            if (game.away_score != game.home_score) and (game.top_bot == "Bot"):
                print("No need to play extra innings")
                # Stop the game
                break   
                
            # If it's tied after simulating the bottom half
            if (game.home_score == game.away_score) and (game.top_bot == "Bot"):
                print("Extra innings!")
                # Add an extra inning
                game.innings += 1
            
        # Advance half inning
        if game.top_bot == "Top":
            game.top_bot = "Bot"
        else:
            game.top_bot = "Top"
            game.inning += 1  
            
    # Calculate stats
    for batter in game.home_batters:
        batter, game = calculate_batter(batter, game)
    for batter in game.away_batters:
        batter, game = calculate_batter(batter, game)
    for pitcher in game.home_pitchers:
        pitcher, game = calculate_pitcher(pitcher, game)
    for pitcher in game.away_pitchers:
        pitcher, game = calculate_pitcher(pitcher, game)
        
    
    # print("Game time", (time.time()-start_time))
    

    return game

In [None]:
def sim_game_batch(game_template, model_pulls, model_binary, model_outs, model_safe, opener_list, weather_df, year, innings=9, debug=False, batch_size=50): 
    game_list = []
    for i in range(batch_size):
        game = sim_game(game_template, model_pulls, model_binary, model_outs, model_safe, opener_list, weather_df, year, innings=9, debug=False)
        game_list.append(game)
        
    return game_list

In [20]:
# Retrieve all values for attribute across all players across all simulations
def create_players_dataframe(game_list, attribute='FP', player='batter'):
    # Create a dictionary to store the data
    data = {}

    for i, game in enumerate(game_list):
        data[f'{attribute}{i}'] = []
        if player == 'batter':
            players = game.away_batters + game.home_batters
        elif player == 'pitcher':
            players = game.away_pitchers + game.home_pitchers

        for player_obj in players:
            data[f'{attribute}{i}'].append(getattr(player_obj, attribute))

    # Create the DataFrame from the dictionary
    players_df = pd.DataFrame(data)
    
    # Create columns
    name_list = [getattr(player_obj, 'fullName') for player_obj in players]
    players_df['fullName'] = name_list

    # Reorder columns
    cols = list(players_df.columns)
    cols = ['fullName'] + [col for col in cols if col != 'fullName']
    players_df = players_df[cols]

    return players_df

In [21]:
# Create dataframe with scores
def extract_scores(game_list):
    # Create the DataFrame
    data = {'away_score': [game.away_score for game in game_list],
            'home_score': [game.home_score for game in game_list]}

    score_df = pd.DataFrame(data)
    
    return score_df