# B02. Simulations
- Simulate matchups

### Batting Orders

##### MLB Stats API (Locked)

In [None]:
def create_order_api(date, team, game_id):
    # Read in MLB API Batting Order CSV
    order_api_df = pd.read_csv(os.path.join(baseball_path, "A05. Rosters", "1. Batting Orders", f"Batting Orders {date}", f"Batting Order {team} {game_id}.csv"), encoding='iso-8859-1')

    # Clean and create new columns
    order_api_df.sort_values('order', ascending=True, inplace=True)
    order_api_df['batting_order'] = order_api_df['order'] // 100
    order_api_df.drop_duplicates(subset=['batting_order'], inplace=True, keep='first')
    order_api_df.dropna(subset='batting_order', inplace=True)
    order_api_df['batting_order'] = order_api_df['batting_order'].astype(int)
    order_api_df['confirmed'] = "Y"
    # Keep relevant columns
    order_api_df = order_api_df[['id', 'confirmed', 'batting_order']].reset_index(drop=True)
    # Confirm batting_order adds up to 45
    if order_api_df['batting_order'].sum() != 45:
        print(f"MLB API batting orders do not add up to 45")


    return order_api_df

##### Baseball Monster (Projected)

In [None]:
def create_order_bm(daily_order_bm_df, team, game_num):
    if daily_order_bm_df is None:
        order_bm_df = None
        print("Missing Baseball Monster order.")
        return order_bm_df
        
    # Baseball Monster
    order_bm_df = daily_order_bm_df[(daily_order_bm_df['BBREFTEAM'] == team) & (daily_order_bm_df['game_number'] == game_num)][['id', 'confirmed', 'batting_order']]
    # Clean and keep only batters
    order_bm_df['batting_order'] = pd.to_numeric(order_bm_df['batting_order'], errors='coerce')
    order_bm_df = order_bm_df.dropna(subset=['batting_order']).reset_index(drop=True)
    order_bm_df['batting_order'] = order_bm_df['batting_order'].astype(int)
    # Confirm batting_order adds up to 45
    if order_bm_df['batting_order'].sum() != 45:
        print(f"Baseball Monster batting orders do not add up to 45")
    
    
    return order_bm_df

##### Impute batting order

In [None]:
def fill_missing_batting_order(df):
    # Identify missing batting order numbers
    all_orders = set(range(1, 10))
    existing_orders = set(df['batting_order'].dropna().unique())
    missing_orders = sorted(all_orders - existing_orders)

    # Filter rows with missing batting_order and sort them
    missing_rows = df[df['batting_order'].isna()].copy()
    missing_rows = missing_rows.sort_values(by=['pa_b_long_r', 'b1_b_long_r'], ascending=False)

    # Assign missing numbers to sorted rows sequentially
    for idx, missing_order in zip(missing_rows.index, missing_orders):
        df.loc[idx, 'batting_order'] = missing_order

    df.sort_values('batting_order', ascending=True, inplace=True)

    return df

### Impute Inputs

##### Option 1: Steamer

In [None]:
def impute_batters(batter_df, impute_batter_stats):
    ### Vs. RHP
    # Create is lefty dummy (this will include switch hitters against righties)
    batter_df['b_L'] = (batter_df['batSide'] != 'Right').astype('int')
    # Vs. RHP
    batter_df['p_L'] = 0    
    # If missing data, impute
    batter_df['imp_b_r'].fillna(1, inplace=True)
    # Impute
    try:
        prediction = impute_batter_stats.predict(batter_df.loc[batter_df['imp_b_r'] == 1, batter_stats_fg_imp])
    except:
        prediction = None
        # print("No batter imputations vs RHP")
    # Impute missing values in pitcher_stats with the predicted values
    batter_df.loc[batter_df['imp_b_r'] == 1, batter_stats_r] = prediction
    
    
    ### Vs. LHP
    # Create is lefty dummy (this will not include switch hitters against righties)
    batter_df['b_L'] = (batter_df['batSide'] == 'Left').astype('int')
    # Vs. RHP
    batter_df['p_L'] = 1  
    # If missing data, impute
    batter_df['imp_b_l'].fillna(1, inplace=True)
    # Impute
    try:
        prediction = impute_batter_stats.predict(batter_df.loc[batter_df['imp_b_l'] == 1, batter_stats_fg_imp])
    except:
        prediction = None
        # print("No batter imputations vs LHP")
    # Impute missing values in pitcher_stats with the predicted values
    batter_df.loc[batter_df['imp_b_l'] == 1, batter_stats_l] = prediction
    
    # Fill in missings
    batter_df[batter_stats_l].fillna(0, inplace=True)
    batter_df[batter_stats_r].fillna(0, inplace=True)
    
    batter_df.fillna(0, inplace=True)
    
    return batter_df

In [None]:
def impute_pitchers(pitcher_df, impute_pitcher_stats):
    ### Vs. RHB
    # Create is lefty dummy 
    pitcher_df['p_L'] = (pitcher_df['pitchHand'] == 'Left').astype('int')
    # Vs. RHB
    pitcher_df['b_L'] = 0    
    # If missing data, impute
    pitcher_df['imp_p_r'].fillna(1, inplace=True)
    # Impute
    try:
        prediction = impute_pitcher_stats.predict(pitcher_df.loc[pitcher_df['imp_p_r'] == 1, pitcher_stats_fg_imp])
    except:
        prediction = None
        # print("No pitcher imputations vs RHB")
    # Impute missing values in pitcher_stats with the predicted values
    pitcher_df.loc[pitcher_df['imp_p_r'] == 1, pitcher_stats_r] = prediction

    
    ### Vs. LHB
    # Create is lefty dummy 
    pitcher_df['p_L'] = (pitcher_df['pitchHand'] == 'Left').astype('int')
    # Vs. RHB
    pitcher_df['b_L'] = 1  
    # If missing data, impute
    pitcher_df['imp_p_l'].fillna(1, inplace=True)  
    # Impute
    try:
        prediction = impute_pitcher_stats.predict(pitcher_df.loc[pitcher_df['imp_p_l'] == 1, pitcher_stats_fg_imp])
    except:
        prediction = None
        # print("No pitcher imputations vs LHB")        
    # Impute missing values in pitcher_stats with the predicted values
    pitcher_df.loc[pitcher_df['imp_p_l'] == 1, pitcher_stats_l] = prediction
    
    # Fill in missings
    pitcher_df[pitcher_stats_l].fillna(0, inplace=True)
    pitcher_df[pitcher_stats_r].fillna(0, inplace=True)
    
    pitcher_df.fillna(0, inplace=True)
    
    return pitcher_df

##### Option 2: 0s

Note: This is currently unused, but may be a solid point of comparison

In [None]:
def impute_batters2(batter_df, batter_imputations_model):
    # Fill in missings
    batter_df[batter_stats_l].fillna(0, inplace=True)
    batter_df[batter_stats_r].fillna(0, inplace=True)
    batter_df[['pa_b_l', 'pa_b_r']].fillna(0, inplace=True)
    
    # Take weighted average of existing values and 0 
    # This can be simplified but I want to spell it out for clarity
    for col in batter_stats_l:
        batter_df[col] = (batter_df[col] * batter_df['pa_b_l'] + 0 * (50 - batter_df['pa_b_l'])) / 50
    for col in batter_stats_r:
        batter_df[col] = (batter_df[col] * batter_df['pa_b_r'] + 0 * (50 - batter_df['pa_b_r'])) / 50

    # If still missing, fill with 0
    batter_df.fillna(0, inplace=True)
        
    return batter_df

In [None]:
def impute_pitchers2(pitcher_df, pitcher_imputations_model):
    # Fill in missings
    pitcher_df[pitcher_stats_l].fillna(0, inplace=True)
    pitcher_df[pitcher_stats_r].fillna(0, inplace=True)
    pitcher_df[['pa_p_l', 'pa_p_r']].fillna(0, inplace=True)
    
    # Take weighted average of existing values and 0 
    # This can be simplified but I want to spell it out for clarity
    for col in pitcher_stats_l:
        pitcher_df[col] = (pitcher_df[col] * pitcher_df['pa_p_l'] + 0 * (50-pitcher_df['pa_p_l']))/50
    for col in pitcher_stats_r:
        pitcher_df[col] = (pitcher_df[col] * pitcher_df['pa_p_r'] + 0 * (50-pitcher_df['pa_p_r']))/50

    # If still missing, fill with 0
    pitcher_df.fillna(0, inplace=True)
        
    return pitcher_df

### Matchup Objects

##### Create Batter Objects

In [None]:
def create_batter_objects(batter_df, order_df, scale_batter_stats, scale_batter_stats_steamer, impute_batter_stats):
    # Merge on batting order
    batter_df = pd.merge(batter_df, order_df, on=['id'], how='left')

    # Fill in missing batting orders, if necessary
    if batter_df['batting_order'].sum() != 45:
        print("Batting orders: imputed")
        batter_df = fill_missing_batting_order(batter_df)

    # Keep starting batters
    batter_df = batter_df[~batter_df['batting_order'].isna()]

    ### Scale stats
    ## Model inputs
    batter_df.rename(columns=dict(zip(batter_stats_l, batter_inputs)), inplace=True)
    batter_df[batter_inputs] = scale_batter_stats.transform(batter_df[batter_inputs])
    batter_df.rename(columns=dict(zip(batter_inputs, batter_stats_l)), inplace=True)

    batter_df.rename(columns=dict(zip(batter_stats_r, batter_inputs)), inplace=True)
    batter_df[batter_inputs] = scale_batter_stats.transform(batter_df[batter_inputs])
    batter_df.rename(columns=dict(zip(batter_inputs, batter_stats_r)), inplace=True)

    ## Steamer inpts
    batter_df[batter_stats_fg] = scale_batter_stats_steamer.transform(batter_df[batter_stats_fg])

    ### Impute stats
    batter_df = impute_batters(batter_df, impute_batter_stats)

    ### Create player objects
    # Lists of player objects, by Away/Home status and position group
    Batters = []

    # Batters
    for _, row in batter_df.iterrows():
        batter_data = {attr: row[attr] for attr in batter_columns + ['confirmed']}
        Batters.append(Batter(**batter_data))

    return Batters

##### Create Pitcher Objects

In [None]:
def create_pitcher_objects(pitcher_df, scale_pitcher_stats, scale_pitcher_stats_steamer, impute_pitcher_stats):
    # Drop if missing Leverage
    pitcher_df.dropna(subset=['Leverage'], inplace=True)
    
    # Ensure at least one pitcher at each Leverage
    if 1 not in pitcher_df['Leverage'].values:
        pitcher_df.loc[0, 'Leverage'] = 1
    if 2 not in pitcher_df['Leverage'].values:
        pitcher_df.loc[1, 'Leverage'] = 2
    if 3 not in pitcher_df['Leverage'].values:
        pitcher_df.loc[len(pitcher_df) - 2, 'Leverage'] = 3
    if 4 not in pitcher_df['Leverage'].values:
        pitcher_df.loc[len(pitcher_df) - 1, 'Leverage'] = 4
        
    
    # Assign IP_start if missing
    pitcher_df['IP_start'] = np.where(pitcher_df['IP_start'] == 0, 5, pitcher_df['IP_start'])

    # Assign relief_IP if missing
    # This is necessary because relievers with empty relief_IP may break the code (won't be able to randomly select any pitchers)
    # A better solution is likely possible, but this is only really going to affect early season data when steamer is wonky
    pitcher_df['relief_IP'] = pitcher_df['relief_IP'].fillna(1)

    
    ### Scale stats
    ## Model inputs
    pitcher_df.rename(columns=dict(zip(pitcher_stats_l, pitcher_inputs)), inplace=True)
    pitcher_df[pitcher_inputs] = scale_pitcher_stats.transform(pitcher_df[pitcher_inputs])
    pitcher_df.rename(columns=dict(zip(pitcher_inputs, pitcher_stats_l)), inplace=True)
    
    pitcher_df.rename(columns=dict(zip(pitcher_stats_r, pitcher_inputs)), inplace=True)
    pitcher_df[pitcher_inputs] = scale_pitcher_stats.transform(pitcher_df[pitcher_inputs])
    pitcher_df.rename(columns=dict(zip(pitcher_inputs, pitcher_stats_r)), inplace=True)
        
    ## Steamer inputs
    pitcher_df[pitcher_stats_fg] = scale_pitcher_stats_steamer.transform(pitcher_df[pitcher_stats_fg])
    
    ### Impute stats
    pitcher_df = impute_pitchers(pitcher_df, impute_pitcher_stats)
    
    ### Create player objects
    # Lists of player objects, by Away/Home status and position group
    Pitchers = []
    for _, row in pitcher_df.iterrows():
        pitcher_data = {attr: row[attr] for attr in pitcher_columns}
        Pitchers.append(Pitcher(**pitcher_data))
        
    
    return Pitchers

### Calculate Pull Odds

In [None]:
# Calculate odds of being pulled
def pull_odds(game, predict_pulls):
    # Determine batting and fielding team scores for use as inputs
    if game.top_bot == "Top":        
        batter_score = game.away_score
        pitcher_score = game.home_score
    else:
        batter_score = game.home_score
        pitcher_score = game.away_score
    
    # Inning dummies
    for inning in range(1, 11):  # For innings 1 to 10
        globals()[f'inning_{inning}'] = int(game.inning == inning)

    # Special case for inning 11 or more
    inning_11 = int(game.inning >= 11)

    # Out dummies
    for out in range(0, 3):
        globals()[f'out_{out}'] = int(game.outs == out)
    
    # Pull model inputs 
    inputs_pull = [game.pitching.B1_inning, game.pitching.B2_inning, game.pitching.B3_inning, game.pitching.HR_inning,
                   game.pitching.BB_inning, game.pitching.HBP_inning, 
                   game.pitching.SO_inning, game.pitching.FO_inning, game.pitching.GO_inning, game.pitching.LO_inning, game.pitching.PO_inning,
                   game.pitching.faced_inning, game.pitching.reached_inning, 
                   game.pitching.B1, game.pitching.B2, game.pitching.B3, game.pitching.HR,
                   game.pitching.BB, game.pitching.HBP, 
                   game.pitching.SO, game.pitching.FO, game.pitching.GO, game.pitching.LO, game.pitching.PO,
                   game.pitching.faced, game.pitching.reached,
                   game.pitching.OUT, # included in aggregate, not for the inning (the inning is de-facto included via out dummies)
                   inning_1, inning_2, inning_3, inning_4, inning_5,
                   inning_6, inning_7, inning_8, inning_9, inning_10, inning_11,
                   out_0, out_1, out_2, 
                   pitcher_score, batter_score, 
                   game.onFirst, game.onSecond, game.onThird,
                   game.pitching.IP_start, 
                   game.pitching.imp_p_either
                  ]

    var_names = ['b1_inning_adj',
 'b2_inning_adj',
 'b3_inning_adj',
 'hr_inning_adj',
 'bb_inning_adj',
 'hbp_inning_adj',
 'so_inning_adj',
 'fo_inning_adj',
 'go_inning_adj',
 'lo_inning_adj',
 'po_inning_adj',
 'faced_inning_adj',
 'reached_inning_adj',
 'b1_game_adj',
 'b2_game_adj',
 'b3_game_adj',
 'hr_game_adj',
 'bb_game_adj',
 'hbp_game_adj',
 'so_game_adj',
 'fo_game_adj',
 'go_game_adj',
 'lo_game_adj',
 'po_game_adj',
 'faced_game_adj',
 'reached_game_adj',
 'outs_pa_game_adj',
 'inning_1',
 'inning_2',
 'inning_3',
 'inning_4',
 'inning_5',
 'inning_6',
 'inning_7',
 'inning_8',
 'inning_9',
 'inning_10',
 'inning_11',
 'outs_0',
 'outs_1',
 'outs_2',
 'pitcherScore',
 'batterScore',
 'onFirst',
 'onSecond',
 'onThird',
 'IP_start',
 'imp_p_adj']

    # for var_name, input_pull in zip(var_names, inputs_pull):
    #     print(f"{var_name}: {input_pull}")
    
    
    # Reshape to match what models expect
    model_inputs = pd.Series(inputs_pull).values.reshape(1,-1)

    # Predict and make into a list
    pull_list = predict_pulls.predict_proba(model_inputs).tolist()
    
    # Choose the probability that pull = 1
    odds = pull_list[0][1] 
    
    del inputs_pull, model_inputs, pull_list
    
    
    return odds

### Determine Pitcher

In [None]:
def choose_pitcher(game, predict_pulls, predict_leverage, opener_list):
    ### Top of the inning
    if game.top_bot == "Top":
        ### Determine leverage
        # If the starter is still in the game
        if game.home_starter_pulled == False:
            # Pitcher is the starter
            game.home_pitcher_up = random.choice([pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 1]) if game.home_pitchers else None
            # Assign home starter as the pitcher
            game.pitching = game.home_pitcher_up

            # Roll to see if they'll be pulled
            pull_prob = pull_odds(game, predict_pulls)
            # print("Pull%: ", round(pull_prob, 3))
            pull_roll = random.random()
            if pull_roll < pull_prob:
                game.home_starter_pulled = True    
                # print(pull_roll, "Home starter pulled")
            # If they're an opener and it's the third inning,
            if game.inning >= 3 and game.home_starter.Name in opener_list:
                # Pull them
                game.home_starter_pulled = True

        # If the home starter has been pulled (note that this could have happened during this function call, hence the if statement here)
        if game.home_starter_pulled == True:
            # Pitcher's lead (can be negative)
            pitcher_lead = game.home_score - game.away_score
            # Top of the inning
            top = int(game.top_bot == "Top")
            # Inning dummy 
            for inning in range(1, 11):  # For innings 1 to 10
                globals()[f'inning_{inning}'] = int(game.inning == inning)

            # Special case for inning 11 or more
            inning_11 = int(game.inning >= 11)
            
            # Predict probabilities
            leverage_inputs = pd.Series([pitcher_lead, top, inning_1, inning_2, inning_3, inning_4, inning_5, 
                                         inning_6, inning_7, inning_8, inning_9, inning_10, inning_11]).values.reshape(1,-1)
            predictions_proba = predict_leverage.predict_proba(leverage_inputs)
            leverage_df = pd.DataFrame(predictions_proba, columns=predict_leverage.classes_)
    
            leverage_list = [leverage_df[2][0], leverage_df[3][0], leverage_df[4][0]]
            selected_leverage = random.choices([2, 3, 4], weights=leverage_list, k=1)[0]
            
            game.home_leverage = selected_leverage
        
            # Choose a relief pitchers at given leverage
            eligible_pitchers = [pitcher for pitcher in game.home_pitchers if pitcher.Leverage == game.home_leverage]
            # If there isn't one at that leverage
            if eligible_pitchers == []:
                # Assume low leverage
                eligible_pitchers = [pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 2]

            # Select eligible pitcher weighted by relief_IP
            game.home_pitcher_up = random.choices(eligible_pitchers, weights=[pitcher.relief_IP for pitcher in eligible_pitchers])[0] if eligible_pitchers else None
    
        # Determine pitcher
        game.pitching = game.home_pitcher_up


    
    ### Bottom of the inning
    elif game.top_bot == "Bot":
        ### Determine leverage
        # If the starter is still in the game
        if game.away_starter_pulled == False:
            # Pitcher is the starter
            game.away_pitcher_up = random.choice([pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 1]) if game.away_pitchers else None
            game.pitching = game.away_pitcher_up
            
            # Roll to see if they'll be pulled
            pull_prob = pull_odds(game, predict_pulls)
            # print("Pull%: ", round(pull_prob, 3))
            pull_roll = random.random()
            if pull_roll < pull_prob:
                game.away_starter_pulled = True 
                # print(pull_roll, "Away starter pulled")
            # If they're an opener and it's the third inning,
            if game.inning >= 3 and game.away_starter.Name in opener_list:
                # Pull them
                game.away_starter_pulled = True
            
        # If the home starter has been pulled
        if game.away_starter_pulled == True:            
            # Pitcher's lead (can be negative)
            pitcher_lead = game.away_score - game.home_score
            # Top of the inning
            top = int(game.top_bot == "Top")
            # Inning dummy 
            for inning in range(1, 11):  # For innings 1 to 10
                globals()[f'inning_{inning}'] = int(game.inning == inning)

            # Special case for inning 11 or more
            inning_11 = int(game.inning >= 11)
            
            # Predict probabilities
            leverage_inputs = pd.Series([pitcher_lead, top, inning_1, inning_2, inning_3, inning_4, inning_5, 
                                         inning_6, inning_7, inning_8, inning_9, inning_10, inning_11]).values.reshape(1,-1)
            predictions_proba = predict_leverage.predict_proba(leverage_inputs)
            leverage_df = pd.DataFrame(predictions_proba, columns=predict_leverage.classes_)
    
            leverage_list = [leverage_df[2][0], leverage_df[3][0], leverage_df[4][0]]
            selected_leverage = random.choices([2, 3, 4], weights=leverage_list, k=1)[0]
            
            game.away_leverage = selected_leverage
        
            # Choose a relief pitchers at given leverage
            eligible_pitchers = [pitcher for pitcher in game.away_pitchers if pitcher.Leverage == game.away_leverage]
            # If there isn't one at that leverage
            if eligible_pitchers == []:
                # Assume low leverage
                eligible_pitchers = [pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 2]

            # Select eligible pitcher weighted by relief_IP
            game.away_pitcher_up = random.choices(eligible_pitchers, weights=[pitcher.relief_IP for pitcher in eligible_pitchers])[0] if eligible_pitchers else None

        # Determine matchup
        game.pitching = game.away_pitcher_up


    return game

### Probabilities

### Testing

In [None]:
# out_df = pd.read_pickle(os.path.join(model_path, 'out_df.pkl'))

In [None]:
def adjust_out_df(out_df, value):
    for index, row in out_df.iterrows():
        if value < row['is_out_pred_max']:
            return value * (row['is_out_mean'] / row['is_out_pred_mean'])
    # If the value is greater than all the maximum values, use the last row for adjustment
    last_row = out_df.iloc[-1]
    return value * (last_row['is_out_mean'] / last_row['is_out_pred_mean'])


### End Testing

In [None]:
# Create PA event probability
def probabilities(game, predict_binary, predict_outs, predict_safe, predict_all, park_object):    
    # Batter attributes
    # Set batter attributes specific to pitcher hand
    for stat in batter_inputs + ['imp_b']:
        if game.pitching.pitchHand == "Left":
            attr_name = f'{stat}_l'
        else:
            attr_name = f'{stat}_r'

        # Get the new attribute value
        new_value = getattr(game.ab, attr_name)

        # Set the attribute to the new value
        setattr(game.ab, stat, new_value)
        
    # Pitcher attributes
    # Set pitcher attributes specific to batter hand
    for stat in pitcher_inputs + ['imp_p']:
        if game.ab.batSide == "Left" or (game.ab.batSide == "Switch" and game.pitching.pitchHand == "Right"):
            attr_name = f'{stat}_l'
        else:
            attr_name = f'{stat}_r'

        # Get the new attribute value
        new_value = getattr(game.pitching, attr_name)

        # Set the attribute to the new value
        setattr(game.pitching, stat, new_value)
    
    
    # Make compatible with model
    if game.ab.batSide == "Left" or (game.ab.batSide == "Switch" and game.pitching.pitchHand == "Right"):
        game.ab.b_L = 1
    else:
        game.ab.b_L = 0
    
    if game.pitching.pitchHand == "Left":
        game.pitching.p_L = 1
    else:
        game.pitching.p_L = 0
        
    # On base dummies
    if game.on_1b is not None:
        game.onFirst = 1
    else:
        game.onFirst = 0
    if game.on_2b is not None:
        game.onSecond = 1
    else:
        game.onSecond = 0
    if game.on_3b is not None:
        game.onThird = 1
    else: 
        game.onThird = 0
           
    # Top of the inning dummy
    if game.top_bot == "Top":
        game.top = 1
    else:
        game.top = 0
        
    # Score differentials
    if game.top == 1:
        game.score_diff = game.away_score - game.home_score
    else:
        game.score_diff = game.home_score - game.away_score    



    
    ### Inputs
    # Batters
    batter_inputs_pa =  [getattr(game.ab, stat) for stat in batter_inputs]
    
    # Pitchers
    pitcher_inputs_pa = [getattr(game.pitching, stat) for stat in pitcher_inputs]
    
    # Player hands
    hand_inputs_pa = [game.pitching.p_L, game.ab.b_L]

    # Imputation flags
    imp_inputs_pa = [game.ab.imp_b, game.pitching.imp_p]

    # Starter Inputs
    if game.top == 1:
        starter = int(game.home_starter_pulled == False)
    else:
        starter = int(game.away_starter_pulled == False)

    starter_inputs_pa = [starter]

    # Cumulative Inning Inputs
    cumulative_inning_inputs = [game.pitching.B1_inning, game.pitching.B2_inning, game.pitching.B3_inning, game.pitching.HR_inning,
                                game.pitching.BB_inning, game.pitching.HBP_inning, game.pitching.SO_inning, game.pitching.FO_inning,
                                game.pitching.GO_inning, game.pitching.LO_inning, game.pitching.PO_inning,
                                game.pitching.H_inning, game.pitching.TB_inning, game.pitching.reached_inning, game.pitching.faced_inning,
                                game.pitching.OUT_inning]

    # Cumulative Game Inputs
    cumulative_game_inputs = [game.pitching.B1, game.pitching.B2, game.pitching.B3, game.pitching.HR,
                              game.pitching.BB, game.pitching.HBP, game.pitching.SO, game.pitching.FO,
                              game.pitching.GO, game.pitching.LO, game.pitching.PO,
                              game.pitching.H, game.pitching.TB, game.pitching.reached, game.pitching.faced,
                              game.pitching.OUT]
   
    # Game state
    # Score for team pitching
    pitcher_score = int(np.where(game.top_bot == "Bot", game.away_score, game.home_score))
    # Score for team at the plate
    batter_score = int(np.where(game.top_bot == "Top", game.away_score, game.home_score))
    # Batting team is leading dummy
    winning = int(batter_score > pitcher_score)
    # Batting team is leading by more than 3 dummy
    winning_big = int(batter_score > pitcher_score + 3)
    # Times pitcher has faced batter
    times_faced = game.pitching.faced // 9 
    
    game_state_inputs_pa = [game.onFirst, game.onSecond, game.onThird, game.top, game.score_diff, pitcher_score, batter_score, winning, winning_big, times_faced]
    
    # Inning dummies
    for inning in range(1, 11):  # For innings 1 to 10
        globals()[f'inning_{inning}'] = int(game.inning == inning)

    # Special case for inning 11 or more
    inning_11 = int(game.inning >= 11)
    
    inning_inputs_pa = [inning_1, inning_2, inning_3, inning_4, inning_5, inning_6, inning_7, inning_8, inning_9, inning_10, inning_11]
    
    # Out dummies
    for out in range(0, 3):
        globals()[f'out_{out}'] = int(game.outs == out)

    out_inputs_pa = [out_0, out_1, out_2] 
    
    # Venue dummies
    # Ensure that these match M03. Plate Appearances and are in the correct order
    venue_list = [1, 2, 3, 4, 5, 7, 10, 12, 14, 15, 17, 19, 22, 31, 32, 680, 2392, 2394, 
                  2395, 2602, 2680, 2681, 2889, 3289, 3309, 3312, 3313, 4169, 4705, 5325]

    venue_dummy_inputs = [int(venue == park_object.venue_id) for venue in venue_list]

    # Park/Weather Multipliers
    if game.ab.b_L == 1:
        wfx_inputs_pa = [
                      park_object.b1_wfx_l,
                      park_object.b2_wfx_l,
                      park_object.b3_wfx_l,
                      park_object.hr_wfx_l,
                      park_object.bb_wfx_l,
                      park_object.hbp_wfx_l,
                      park_object.so_wfx_l,
                      park_object.fo_wfx_l,
                      park_object.go_wfx_l,
                      park_object.lo_wfx_l,
                      park_object.po_wfx_l]
                      
    else:
        wfx_inputs_pa = [
                      park_object.b1_wfx_r,
                      park_object.b2_wfx_r,
                      park_object.b3_wfx_r,
                      park_object.hr_wfx_r,
                      park_object.bb_wfx_r,
                      park_object.hbp_wfx_r,
                      park_object.so_wfx_r,
                      park_object.fo_wfx_r,
                      park_object.go_wfx_r,
                      park_object.lo_wfx_r,
                      park_object.po_wfx_r]
        
        

    
    ### Create inputs 
    inputs_pa = batter_inputs_pa + pitcher_inputs_pa + hand_inputs_pa + imp_inputs_pa + starter_inputs_pa + cumulative_inning_inputs + cumulative_game_inputs + game_state_inputs_pa + inning_inputs_pa + out_inputs_pa + venue_dummy_inputs + wfx_inputs_pa 

 #    for pa_input, value in zip(pa_inputs2, inputs_pa):
 #        print(f"{pa_input}: {value}")
    
      
    model_inputs = pd.Series(inputs_pa).values.reshape(1,-1)
    
    
    start_time = time.time()
    
    ## Run models
    # Out or safe
    binary_list = predict_binary.predict_proba(model_inputs).tolist()
    
    # Odds of safe/out
    is_safe = binary_list[0][0]
    
    # ######### TESTING ADJUSTMENT
    # is_out = 1 - is_safe
    # is_out = is_out * 1.004002
    # is_safe = 1 - is_out
    # #############################
    
    # ### Testing
    # # Adjust out/safe probability by actual rates (make predicted rates line up with actual rates in testing dataset)
    # is_out = 1 - is_safe
    # is_out = adjust_out_df(out_df, is_out)
    # is_safe = 1 - is_out
    # ### Testing Ends
    
    
    # # ## Three model approach
    # # Determine out or safe. Then predict probabilities specific to out or safe.
    # # Roll to determine if safe
    # is_safe_roll = random.random()
    # if is_safe_roll < is_safe:
    #     # print("He's safe")
    #     # Safe types
    #     safe_list = predict_safe.predict_proba(model_inputs).tolist()
    #     # Given that it's safe
    #     b1 = safe_list[0][0]
    #     b2 = safe_list[0][1]
    #     b3 = safe_list[0][2]
    #     bb = safe_list[0][3]
    #     hbp = safe_list[0][4]
    #     hr = safe_list[0][5]
    #     fo = 0
    #     go = 0 
    #     lo = 0
    #     po = 0
    #     so = 0
    # else:
    #     # print("He's out")
    #     # Out types
    #     outs_list = predict_outs.predict_proba(model_inputs).tolist()
    #      # Given that it's an out
    #     b1 = 0
    #     b2 = 0
    #     b3 = 0 
    #     bb = 0 
    #     hbp = 0 
    #     hr = 0
    #     fo = outs_list[0][0]
    #     go = outs_list[0][1]
    #     lo = outs_list[0][2]
    #     po = outs_list[0][3]
    #     so = outs_list[0][4]
        
    ### One Model approach
    all_list = predict_all.predict_proba(model_inputs).tolist()
        
    b1 = all_list[0][0]
    b2 = all_list[0][1]
    b3 = all_list[0][2]
    bb = all_list[0][3]
    fo = all_list[0][4]
    go = all_list[0][5]
    hbp = all_list[0][6]
    hr = all_list[0][7]
    lo = all_list[0][8]
    po = all_list[0][9]
    so = all_list[0][10]

    # Output list pre-adding stats together to create range 
    # This could be cleaned up to be done outside function, probably!!
    output_list = [hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo]
    
    # Probabilities 
    # Add stats together so they represent the end point of a range with a probability of being selected equal to their rate
    bb = hbp + bb 
    b1 = bb + b1
    b2 = b1 + b2
    b3 = b2 + b3
    hr = b3 + hr
    so = hr + so
    lo = so + lo
    po = lo + po
    go = po + go
    fo = go + fo
    # Set flyout to be 1. This model almost guarantees this. It's accurate to like, 10 decimal places, but why risk it?
    fo = 1
    


    end_time = time.time()
    
    elapsed_time = end_time-start_time
    # print("Elapsed time:", elapsed_time)
    
    pa_summary = output_list + model_inputs.tolist()[0]
    
    # pa_summary included for debugging 
    return hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo, pa_summary

### Errors

In [None]:
def is_error(game, event):    
    # Create event dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]

    ##### Model Approach
    ### Create inputs     
    error_input_list = event_dummies + [game.onFirst, game.onSecond, game.onThird]
    model_inputs = pd.Series(error_input_list).values.reshape(1,-1)
    
    # Error probabilities
    error_list = predict_errors.predict_proba(model_inputs).tolist()
    
    # Odds of error
    is_error = error_list[0][1]
    # print("Error%: ", round(is_error, 4))
    
    # Determine if there's an error (including on hits)
    error_roll = random.random()
    error = int(error_roll < is_error)    
    
    
    return error

### Double Play

In [None]:
def is_dp(game, event):        
    # Create event dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]
    
    ##### Model Approach
    ### Create inputs     
    double_play_input_list = event_dummies + [game.outs, game.onFirst, game.onSecond, game.onThird]
    model_inputs = pd.Series(double_play_input_list).values.reshape(1,-1)
    
    # DP probabilities
    dp_list = predict_dp.predict_proba(model_inputs).tolist()
    is_dp = dp_list[0][1]
    # print("DP%:", round(is_dp, 4))
    
    # Determine if there's a DP (including on hits)
    dp_roll = random.random()
    double_play = int(dp_roll < is_dp)   
    
    
    return double_play

### Outs

In [None]:
def choose_outs(game, event, error, double_play):
    # Create event_dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]
    
    ##### Model Approach
    ### 3B
    if game.on_3b is not None:
        out_input_list = event_dummies + [0, 0, 0, 1, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
        # print("3B", "out_input_list", out_input_list)
        model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
        outs_list = predict_out_bases.predict_proba(model_inputs).tolist()
        out_3b = outs_list[0][1]
    else:
        out_3b = 0
        
    ### 2B
    if game.on_2b is not None:
        out_input_list = event_dummies + [0, 0, 1, 0, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
        # print("2B", "out_input_list", out_input_list)
        model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
        outs_list = predict_out_bases.predict_proba(model_inputs).tolist()
        out_2b = outs_list[0][1]
    else:
        out_2b = 0
    
    ### 1B
    if game.on_1b is not None:
        out_input_list = event_dummies + [0, 1, 0, 0, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
        # print("1B", "out_input_list", out_input_list)
        model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
        outs_list = predict_out_bases.predict_proba(model_inputs).tolist()
        out_1b = outs_list[0][1]
    else:
        out_1b = 0
    
    ### AB
    out_input_list = event_dummies + [1, 0, 0, 0, game.outs, game.onFirst, game.onSecond, game.onThird, error, double_play]
    # print("AB", "out_input_list", out_input_list)
    model_inputs = pd.Series(out_input_list).values.reshape(1,-1)
    outs_list = predict_out_bases.predict_proba(model_inputs).tolist()
    out_ab = outs_list[0][1]
    
    # Determine probability of being safe
    safe_probability = (1 + double_play) - np.sum([out_ab, out_1b, out_2b, out_3b])
    # safe_probability is for hits, not outs. Can't be negative. 
    # While there is some chance of no one being out on an out event, this will be classified as an error.
    if safe_probability < 0 or (event in ['so', 'go', 'lo', 'po', 'fo'] and error == 0):
        # print("Calculated safe probability:", safe_probability)
        safe_probability = 0
    # List of probabilities
    probabilities = [out_ab, out_1b, out_2b, out_3b, safe_probability]
    probabilities = [round(item, 2) for item in probabilities]
    probabilities = [100 * x / sum(probabilities) for x in probabilities]
    
    # print(f"Out Odds:      AB: {float(probabilities[0]/100)}, 1B: {float(probabilities[1]/100)}, 2B: {float(probabilities[2]/100)}, 3B: {float(probabilities[3]/100)}, No One: {float(probabilities[4]/100)}")
    
    # Choose an item from the list of probabilities based on their odds
    chosen_index = random.choices(range(len(probabilities)), weights=probabilities)[0]
    chosen_index2 = np.nan
    
    # If there's a double play
    if double_play == 1:
        # Remove the out you already chose
        probabilities[chosen_index] = 0
        # Failsafe: (Not sure what's up here, tbh, but it almost never happens) If there's no chance of anyone else being out, 
        if np.sum(probabilities)  <= 0:
            # Just assign a guaranteed chance to the batter. 
            probabilities[0] = 100
        
        # And choose again
        chosen_index2 = random.choices(range(len(probabilities)), weights=probabilities)[0]
    
    # Assign outs
    out_ab = int(chosen_index == 0 or chosen_index2 == 0)
    out_1b = int(chosen_index == 1 or chosen_index2 == 1)
    out_2b = int(chosen_index == 2 or chosen_index2 == 2)
    out_3b = int(chosen_index == 3 or chosen_index2 == 3)
    
    # print(f"Out Locations: AB: {float(out_ab)}, 1B: {float(out_1b)}, 2B: {float(out_2b)}, 3B: {float(out_3b)}")
    
    
    return out_ab, out_1b, out_2b, out_3b

### Events

In [None]:
def event_results(game, startInt, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play):
    # Create event_dummies
    event_dummies = [event == "b1", event == "b2", event == "b3", event == "bb", event == "fo", event == "go", event == "hbp", event == "hr", event == "lo", event == "po", event == "so"]
    event_dummies = [int(dummy) for dummy in event_dummies]

    # Start base dummies
    start_dummies = [startInt == 0, startInt == 1, startInt == 2, startInt == 3]
    start_dummies = [int(dummy) for dummy in start_dummies]
    
    
    ##### Model Approach
    ### Create inputs        
    event_input_list = event_dummies + start_dummies + [game.outs, game.onFirst, game.onSecond, game.onThird, blocked_1b, blocked_2b, blocked_3b, out_ab, out_1b, out_2b, out_3b, error, double_play]
    model_inputs = pd.Series(event_input_list).values.reshape(1,-1)
    
    # Event probabilities
    events_list = predict_events.predict_proba(model_inputs).tolist()
    
    # Odds of base
    to_1b = round(events_list[0][0], 2)
    to_2b = round(events_list[0][1], 2)
    to_3b = round(events_list[0][2], 2)
    to_score = round(events_list[0][3], 2)
    
    # print("startInt", startInt, "to_1b", to_1b, "to_2b", to_2b, "to_3b", to_3b, "to_score", to_score)
    
    # Create probabilities of each base options
    to_2b = to_1b + to_2b
    to_3b = to_2b + to_3b
    to_score = 1
    
    # Determine where the runner went 
    base_roll = random.random()
    if base_roll < to_1b:
        base = "to_1b" 
    elif base_roll < to_2b:
        base = "to_2b"
    elif base_roll < to_3b:
        base = "to_3b"
    elif base_roll < to_score:
        base = "to_score"
           

    return base   

In [1]:
### TESTING A LOG
def log_pa_summary(pa_summary, baseball_path, filename='pa_summary_log.csv'):
    filepath = os.path.join(baseball_path, filename)
    
    # Check if the file already exists
    file_exists = os.path.isfile(filepath)
    
    # Open the file in append mode
    with open(filepath, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        # Write the header only if the file is new
        if not file_exists:
            header = [f'col_{i+1}' for i in range(len(pa_summary))]
            writer.writerow(header)
        
        # Write the new row
        writer.writerow(pa_summary)

In [None]:
# Simulate at bat
def sim_ab(game, predict_binary, predict_outs, predict_safe, predict_all, predict_pulls, predict_leverage, opener_list, park_object, debug=False):
    start = time.time()

    # Choose plate appearance matchup
    # If it's the top of the inning
    if game.top_bot == "Top":
        # And the home starter is still in the game
        if game.home_starter_pulled == False:
            # Consider pulling them
            game = choose_pitcher(game, predict_pulls, predict_leverage, opener_list)
        # Note: we only assign new relief pitchers to start an inning

        # Assign batter
        game.ab = next(batter for batter in game.away_batters if batter.batting_order == game.away_order)
        # Assign pitcher to the batter
        game.ab.pitcher = game.pitching
    
    # If it's the bottom of the inning
    if game.top_bot == "Bot":
        # And the away starter is still in the game
        if game.away_starter_pulled == False:
            # Consider pulling them
            game = choose_pitcher(game, predict_pulls, predict_leverage, opener_list)
        # Note: we only assign new relief pitchers to start an inning

        # Assign batter
        game.ab = next(batter for batter in game.home_batters if batter.batting_order == game.home_order)
        # Assign pitcher to the batter
        game.ab.pitcher = game.pitching    

    
    # Add PA for batter
    game.ab.PA += 1
    game.pitching.PA += 1
    
    # Set the zombie (will be last guy up)
    if game.top_bot == "Top":
        game.away_zombie = game.ab
    else:
        game.home_zombie = game.ab
    
    
    
    
    if debug == True:
        if game.on_1b is None:
            order_1b = 0
        else:
            order_1b = int(game.on_1b.batting_order)
        if game.on_2b is None:
            order_2b = 0
        else:
            order_2b = int(game.on_2b.batting_order)
        if game.on_3b is None:
            order_3b = 0
        else:
            order_3b = int(game.on_3b.batting_order)

        # Calculate batter stats
        game.ab, game = calculate_batter(game.ab, game)

        # print("\n")
        # print(game.top_bot, game.inning, "Outs: ", game.outs)
        # print(f"       {order_2b}")
        # print("    /     \\")
        # print(f"   {order_3b}   {int(game.pitching.Leverage)}   {order_1b}  {game.pitching.position} {game.pitching.fullName}: {game.pitching.FP}")
        # print("    \     /         vs.")
        # print(f"       {int(game.ab.batting_order)}      {game.ab.position} {game.ab.fullName}: {game.ab.FP}  ")
        # print(f"Away {game.away_score} - {game.home_score} Home")

    
    # Calculate probabilities
    hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo, pa_summary = probabilities(game, predict_binary, predict_outs, predict_safe, predict_all, park_object)

    
    # Roll
    pa_roll = random.random() 
    # Event 1: HBP
    if pa_roll < hbp:
        event = "hbp"
    # Event 2: BB
    elif pa_roll < bb:
        event = "bb"
    # Event 3: Single
    elif pa_roll < b1:
        event = "b1"
    # Event 4: Double    
    elif pa_roll < b2:
        event = "b2"
    # Event 5: Triple
    elif pa_roll < b3:
        event = "b3"
     # Event 6: Home Run
    elif pa_roll < hr:
        event = "hr"
    # Event 7: Strikeout
    elif pa_roll < so:
        event = "so"
    # Event 8: Line drive out
    elif pa_roll < lo:
        event = "lo"
    # Event 9: Pop out
    elif pa_roll < po:
        event = "po"
    # Event 10: Groundball out
    elif pa_roll < go:
        event = "go"
    # Event 11: Fly out 
    else:
        event = "fo"
        
          
    # print("Result:", event)    
        
    # Carry out event 
    if event == "b1":
        game.ab.B1 += 1
        game.pitching.B1 += 1
        game.pitching.B1_inning += 1
    elif event == "b2":
        game.ab.B2 += 1
        game.pitching.B2 += 1
        game.pitching.B2_inning += 1
    elif event == "b3":
        game.ab.B3 += 1
        game.pitching.B3 += 1
        game.pitching.B3_inning += 1
    elif event == "hr":
        game.ab.HR += 1
        game.pitching.HR += 1
        game.pitching.HR_inning += 1
    elif event == "bb":
        game.ab.BB += 1
        game.pitching.BB += 1
        game.pitching.BB_inning += 1
    elif event == "hbp":
        game.ab.HBP += 1
        game.pitching.HBP += 1
        game.pitching.HBP_inning += 1
    elif event == "so":
        game.pitching.SO += 1
        game.pitching.SO_inning += 1
    elif event == "fo":
        game.pitching.FO += 1
        game.pitching.FO_inning += 1
    elif event == "go":
        game.pitching.GO += 1
        game.pitching.GO_inning += 1
    elif event == "lo":
        game.pitching.LO += 1
        game.pitching.LO_inning += 1
    elif event == "po":
        game.pitching.PO += 1
        game.pitching.PO_inning += 1
        
    # Assume run will be charged to pitcher 
    game.ab.charged = 1
        
    # Determine if there's an error on the play
    error = is_error(game, event)
    # If there is an error
    if error == 1:
        # print("There's an error!")
        # And if the event was supposed to be an out
        if event in ['so', 'go', 'fo', 'lo', 'po']:
            # The batter will not be charged to the pitcher 
            game.ab.charged = 0
        
    # If there's an error with two outs and the event would have ended the inning (been an out), all future runs are unearned
    if error == 1 and event in ['so', 'go', 'fo', 'lo', 'po'] and game.outs == 2:
        game.error_extended = True
        # print("An error has extended the inning.")
    
    # Set charged to pitcher variable for each base runner to 0
    if game.error_extended == True:
        if game.on_3b is not None:
            game.on_3b.charged = 0
        if game.on_2b is not None:
            game.on_2b.charged = 0
        if game.on_1b is not None:
            game.on_1b.charged = 0
        game.ab.charged = 0

    ### TESTING:
    # Aggressive unearned runs: If there's an error in an inning, every runner on base will not be charged (even if they might have scored anyway)
    # Note that if there's an error with less than two outs, baserunners post-error could still be earned.
    if error == 1:
        if game.on_3b is not None:
            game.on_3b.charged = 0
        if game.on_2b is not None:
            game.on_2b.charged = 0
        if game.on_1b is not None:
            game.on_1b.charged = 0
        game.ab.charged = 0      
    ### TESTING ENDS

    
    # Determine if there's a double play
    double_play = is_dp(game, event)
    if double_play == 1:
        # print("There's a double play!")
        pass
    
    # Determine where outs occur
    out_ab, out_1b, out_2b, out_3b = choose_outs(game, event, error, double_play)
    
    # Outs on play
    outs_on_play = out_ab + out_1b + out_2b + out_3b

    # Runs on play
    runs = 0 
    
    # If the inning isn't over (or if it is over, but it was on a hit/bb/hbp so we still have to determine whether runners scored)
    if (game.outs + outs_on_play < 3) or (event in ['b1', 'b2', 'b3', 'hr', 'bb', 'hbp']):
        # Determine where the runners go
        # Runner on 3B
        # If they're out
        if out_3b == 1:
            # Assign out to base variable
            base_3b = "out"
        # If not but they exist
        elif game.on_3b is not None:
            blocked_1b = 0
            blocked_2b = 0
            blocked_3b = 0
            # Figure out their base
            base_3b = event_results(game, 3, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_3b = "N/A"

        # Runner on 2B
        # If they're out
        if out_2b == 1:
            # Assign out to base variable
            base_2b = "out"
        # If not but they exist
        elif game.on_2b is not None:
            blocked_1b = 0
            blocked_2b = 0
            blocked_3b = int(base_3b == "to_3b")
            # Figure out their base
            base_2b = event_results(game, 2, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_2b = "N/A"

        # Runner on 1B
        # If they're out
        if out_1b == 1:
            # Assign out to base variable
            base_1b = "out"
        # If not but they exist
        elif game.on_1b is not None:
            blocked_1b = 0
            blocked_2b = int(base_2b == "to_2b")
            blocked_3b = int(base_3b == "to_3b" or base_2b == "to_3b")
            # Figure out their base
            base_1b = event_results(game, 1, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_1b = "N/A"

        # AB
        # If they're out
        if out_ab == 1:
            # Assign out to base variable
            base_ab = "out"
        # If not but they exist
        elif game.ab is not None:
            blocked_1b = int(base_1b == "to_1b")
            blocked_2b = int(base_2b == "to_2b" or base_1b == "to_2b")
            blocked_3b = int(base_3b == "to_3b" or base_2b == "to_3b" or base_1b == "to_3b")
            # Figure out their base
            base_ab = event_results(game, 0, event, out_ab, out_1b, out_2b, out_3b, blocked_1b, blocked_2b, blocked_3b, error, double_play)
        else:
            base_ab = "N/A"

        # print(f"Advancements:  AB: {base_ab}, 1B: {base_1b}, 2B: {base_2b}, 3B: {base_3b}")
            
        # Move Runners:
        # Runner on 3B
        if game.on_3b is not None:
            if base_3b == "to_score":
                game.on_3b.R += 1
                game.on_3b.pitcher.ER += (1 * (1-error) * game.on_3b.charged) # Not an ER if event is error, player reached on error, or inning would be over if not for error.
                game.on_3b.pitcher.ER_inning += (1 * (1-error) * game.on_3b.charged) # Not an ER if event is error, player reached on error, or inning would be over if not for error.
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.on_3b.pitcher.R += 1
                runs += 1
                game.on_3b = None
            elif base_3b == "out":
                game.on_3b = None
    
        # Runner on 2B
        if game.on_2b is not None:
            if base_2b == "to_3b":
                game.on_3b = game.on_2b
                game.on_2b = None
            elif base_2b == "to_score":
                game.on_2b.R += 1
                game.on_2b.pitcher.ER += (1 * (1-error) * game.on_2b.charged)
                game.on_2b.pitcher.ER_inning += (1 * (1-error) * game.on_2b.charged)
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.on_2b.pitcher.R += 1
                runs += 1
                game.on_2b = None
            elif base_2b == "out":
                game.on_2b = None
    
        # Runner on 1B
        if game.on_1b is not None:
            if base_1b == "to_2b":
                game.on_2b = game.on_1b
                game.on_1b = None
            elif base_1b == "to_3b":
                game.on_3b = game.on_1b
                game.on_1b = None
            elif base_1b == "to_score":
                game.on_1b.R += 1
                game.on_1b.pitcher.ER += (1 * (1-error) * game.on_1b.charged)
                game.on_1b.pitcher.ER_inning += (1 * (1-error) * game.on_1b.charged)
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.on_1b.pitcher.R += 1 
                runs += 1
                game.on_1b = None
            elif base_1b == "out":
                game.on_1b = None
                
        # AB
        if game.ab is not None:
            if base_ab == "to_1b":
                game.on_1b = game.ab
            elif base_ab == "to_2b":
                game.on_2b = game.ab
            elif base_ab == "to_3b":
                game.on_3b = game.ab
            elif base_ab == "to_score":
                game.ab.R += 1
                game.ab.pitcher.ER += (1 * (1-error) * game.ab.charged)
                game.ab.pitcher.ER_inning += (1 * (1-error) * game.ab.charged)
                game.ab.RBI += 1 * (1-error) * (1-double_play)
                game.ab.pitcher.R += 1
                runs += 1
            elif base_ab == "out":
                pass
    
    # Determine bases
    game.onThird = 1 if game.on_3b is not None else 0
    game.onSecond = 1 if game.on_2b is not None else 0
    game.onFirst = 1 if game.on_1b is not None else 0
    
    # Add outs on play
    game.outs += outs_on_play
    game.pitching.OUT += outs_on_play
    game.pitching.OUT_inning += outs_on_play
                
    # Add runs
    # Runs scored
    if game.top_bot == "Top":
        game.away_score += runs
    else:
        game.home_score += runs
        
    # Add to number of batters faced
    game.pitching.faced += 1
    game.pitching.faced_inning += 1

    # Update pitching stats
    game.pitching, game = calculate_pitcher(game.pitching, game)
    
    # Go to the next batter up
    if game.top_bot == "Top":
        game.away_order += 1
        if game.away_order == 10:
            game.away_order = 1

    else:
        game.home_order += 1
        if game.home_order == 10:
            game.home_order = 1

    # print(f"faced {game.pitching.faced} faced_inning {game.pitching.faced_inning}")
    # print(f"reached {game.pitching.reached} reached_inning {game.pitching.reached_inning}")
    # print(f"TB {game.pitching.TB} TB_inning {game.pitching.TB_inning}")


    # Plate appearance summary:
    pa_summary = [event] + pa_summary
    
    # print("PA Summary", pa_summary)
    # log_pa_summary(pa_summary, baseball_path)
        
    return game

In [None]:
def sim_inning(game, predict_pulls, predict_leverage, predict_binary, predict_outs, predict_safe, predict_all, opener_list, park_object, innings=9, debug=False):
    # Set outs to zero
    game.outs = 0
    # Clear bases
    game.on_1b = None
    game.on_2b = None
    game.on_3b = None
    game.onFirst = 0
    game.onSecond = 0
    game.onThird = 0
    # Reset error_extended-status
    game.error_extended = False
    
    # Set zombie runner
    if game.inning >= 10:
        if game.top_bot == "Top":
            game.on_2b = game.away_zombie
        else:
            game.on_2b = game.home_zombie
        game.on_2b.charged = 0

    
    # Assign pitcher
    # Note: choosing pitchers occurs as part of sim_ab for starting pitchers only, so we'll exclude them here to avoid rerolling their pull probabilities
    # If it's the top of the inning
    if game.top_bot == "Top":
        # Put in the home pitcher
        game.pitching = game.home_pitcher_up
        # If the starter is pulled
        if game.home_starter_pulled == True:
            # Choose a reliever
            game = choose_pitcher(game, predict_pulls, predict_leverage, opener_list)
    # If it's the bottom of the inning
    if game.top_bot == "Bot":
        # Put in the away pitcher
        game.pitching = game.away_pitcher_up
        # If the starter is pulled
        if game.away_starter_pulled == True:
            # Choose a reliever
            game = choose_pitcher(game, predict_pulls, predict_leverage, opener_list)
    
    # Clear inning-specific stats
    game.pitching.HBP_inning = 0
    game.pitching.BB_inning = 0
    game.pitching.B1_inning = 0
    game.pitching.B2_inning = 0
    game.pitching.B3_inning = 0
    game.pitching.HR_inning = 0
    
    game.pitching.SO_inning = 0
    game.pitching.PO_inning = 0
    game.pitching.GO_inning = 0
    game.pitching.LO_inning = 0
    game.pitching.FO_inning = 0
    
    game.pitching.H_inning = 0
    game.pitching.faced_inning = 0
    game.pitching.reached_inning = 0
    game.pitching.TB_inning = 0
    game.pitching.OUT_inning = 0 
            
    # Loop for each PA
    while game.outs < 3:        
        # # Calculate a pitcher's cumulative stats - why did this take and return starter?
        # game.home_starter, game = calculate_pitcher(game.home_starter, game)
        # game.away_starter, game = calculate_pitcher(game.away_starter, game)
        
        ### Steals
        # Third base
        # If third is empty and second is not
        if game.on_3b is None and game.on_2b is not None:
            # Steal model inputs
            sba_imp = game.on_2b.sba / game.on_2b.sbo
            sb_imp = game.on_2b.sb / game.on_2b.sba
            steal_input_list = [game.outs, sba_imp, sb_imp] 
            model_inputs = pd.Series(steal_input_list).values.reshape(1,-1)
            
            # Stolen base attempt roll
            sba_3b_roll = random.random()
            
            # Attempt rate
            sba_3b_rate = predict_sba_3b.predict_proba(model_inputs).tolist()[0][1]
            # print(f"3B Attempt Rate: {sba_3b_rate}")
            
            # If the roll is less than the attempt rate
            if sba_3b_roll < sba_3b_rate:
                # They attempt to steal 3B
                
                # Stolen base success roll
                sb_3b_roll = random.random()
                
                # Success rate
                sb_3b_rate = predict_sb_3b.predict_proba(model_inputs).tolist()[0][1]
                # print(f"3B Success Rate: {sb_3b_rate}")
                
                # If the roll is less than the success rate
                if sb_3b_roll < sb_3b_rate:
                    # They succeed
                    # print("Stolen base")
                    game.on_2b.SB += 1
                    game.on_3b = game.on_2b
                    game.on_2b = None
                    
                # Else
                else:
                    # They're out
                    # print("Caught stealing")
                    game.on_2b = None
                    game.pitching.OUT +=1
                    game.pitching.OUT_inning
                    game.outs += 1 
                    
                    # Check outs as this might end the inning
                    if game.outs == 3:
                        break
                    

        # Second base
        # If second is empty and first is not
        if game.on_2b is None and game.on_1b is not None:
            # Steal model inputs
            sba_imp = game.on_1b.sba / game.on_1b.sbo
            sb_imp = game.on_1b.sb / game.on_1b.sba
            steal_input_list = [game.outs, sba_imp, sb_imp]
            # print("Steal INPUT list", steal_input_list)
            model_inputs = pd.Series(steal_input_list).values.reshape(1,-1)
            
            # Stolen base attempt roll
            sba_2b_roll = random.random()
            
            # Attempt rate
            sba_2b_rate = predict_sba_2b.predict_proba(model_inputs).tolist()[0][1]
            # print(f"2B Attempt Rate: {sba_2b_rate}")
            
            # If the roll is less than the attempt rate
            if sba_2b_roll < sba_2b_rate:
                # They attempt to steal 2B
                
                # Stolen base success roll
                sb_2b_roll = random.random()
                
                # Success rate
                sb_2b_rate = predict_sb_2b.predict_proba(model_inputs).tolist()[0][1]
                # print(f"2B Success Rate: {sb_2b_rate}")
                
                # If the roll is less than the success rate
                if sb_2b_roll < sb_2b_rate:
                    # They succeed
                    # print("Stolen base")
                    game.on_1b.SB += 1
                    game.on_2b = game.on_1b
                    game.on_1b = None
                    
                # Else
                else:
                    # They're out
                    # print("Caught stealing")
                    game.on_1b = None
                    game.pitching.OUT +=1
                    game.pitching.OUT_inning
                    game.outs += 1 
                    
                    # Check outs as this might end the inning
                    if game.outs == 3:
                        break
        
        # Simulate the AB
        game = sim_ab(game, predict_binary, predict_outs, predict_safe, predict_all, predict_pulls, predict_leverage, opener_list, park_object, debug)
                  
        ### Who gets the win?
        # If the away team is winning 
            # And we completed the bottom of the fifth
                # And the current winning pitcher is not already on the away team (So if there is no current winning pitcher or he's on the other team)
        if (game.away_score > game.home_score) and ((game.inning == 5 and game.top_bot == "Bot") or game.inning >= 6) and (game.winning_pitcher not in game.away_pitchers):
            # The current pitcher is the winning pitcher
            game.winning_pitcher = game.away_pitcher_up
        elif (game.home_score > game.away_score) and ((game.inning == 5 and game.top_bot == "Bot") or game.inning >= 6) and (game.winning_pitcher not in game.home_pitchers):
            game.winning_pitcher = game.home_pitcher_up
        elif (game.home_score == game.away_score) or game.inning < 5:
            game.winning_pitcher = None

        # Walk off
        if (game.inning == game.innings) and (game.top_bot == "Bot") and (game.home_score > game.away_score):
            game.winning_pitcher = game.home_pitcher_up
            break
  
        
    return game

In [None]:
# Calculate batter fantasy points
def calculate_batter(batter, game):
    batter.FP = (
                batter.B1 * 3 +
                batter.B2 * 5 +
                batter.B3 * 8 +
                batter.HR * 10 +
                batter.RBI * 2 +
                batter.R * 2 +
                batter.BB * 2 +
                batter.HBP * 2 +
                batter.SB * 5
                )
    
    return batter, game

In [None]:
# Calculate pitcher fantasy points
def calculate_pitcher(pitcher, game):
    # Calculate hits allowed
    pitcher.H = (pitcher.B1 + pitcher.B2 + pitcher.B3 + pitcher.HR)
    # Calculate total bases allowed
    pitcher.TB = (pitcher.B1 * 1 + pitcher.B2 * 2 + pitcher.B3 * 3 + pitcher.HR * 4)
    # Calculate batters allowed to reach
    pitcher.reached = (pitcher.B1 + pitcher.B2 + pitcher.B3 + pitcher.HR + pitcher.BB + pitcher.HBP)

    # Calculate hits allowed that inning
    pitcher.H_inning = (pitcher.B1_inning + pitcher.B2_inning + pitcher.B3_inning + pitcher.HR_inning)
    # Calculate total bases allowed that inning
    pitcher.TB_inning = (pitcher.B1_inning * 1 + pitcher.B2_inning * 2 + pitcher.B3_inning * 3 + pitcher.HR_inning * 4)
    # Calculate batters allowed to reach that inning
    pitcher.reached_inning = (pitcher.B1_inning + pitcher.B2_inning + pitcher.B3_inning + pitcher.HR_inning + pitcher.BB_inning + pitcher.HBP_inning)
    
    # If they're the winning pitcher, they get a win
    if game.winning_pitcher == pitcher:
        pitcher.W = 1
    else:
        pitcher.W = 0

    # Determine CG, CGSO, NH
    if pitcher.OUT == 27:
        pitcher.CG = 1
        if pitcher.ER == 0:
            pitcher.CGSO = 1
        if pitcher.H == 0:
            pitcher.NH = 1

    pitcher.FP = (
                pitcher.OUT * 0.75 +
                pitcher.SO * 2 +
                pitcher.W * 4 +
                pitcher.ER * -2 +
                pitcher.H * -0.6 +
                pitcher.BB * -0.6 +
                pitcher.HBP * -0.6 +
                pitcher.CG * 2.5 +
                pitcher.CGSO * 2.5 +
                pitcher.NH * 5
                )
    
        
    return pitcher, game

In [None]:
# Simulate a whole game     
def sim_game(game_template, predict_pulls, predict_leverage, predict_binary, predict_outs, predict_safe, predict_all, opener_list, park_object, innings=9, debug=False): 
    start_time = time.time()
    
    
    # Copy game template
    game = deepcopy(game_template)
    
    # Determine starters
    game.home_starter = next(pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 1)
    game.away_starter = next(pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 1)

    # Pitcher up is the starter
    game.home_pitcher_up = game.home_starter
    game.away_pitcher_up = game.away_starter

    # First pitcher is the home starter
    game.pitching = game.home_pitcher_up
    
    # Loop over every inning
    while game.inning <= game.innings:
        # Simulate the half inning  
        game_start = time.time()
        game = sim_inning(game, predict_pulls, predict_leverage, predict_binary, predict_outs, predict_safe, predict_all, opener_list, park_object, innings, debug)              
        # print("Inning:", time.time() - game_start)
            
        # If it's the last inning
        if (game.inning == game.innings):
            # If the home team is winning after simulating the top half
            if (game.home_score > game.away_score) and (game.top_bot == "Top"):
                # print("No need to play the bottom of the inning")
                # Stop the game
                break
                
            # If a team is winning at the end of the inning
            if (game.away_score != game.home_score) and (game.top_bot == "Bot"):
                # print("No need to play extra innings")
                # Stop the game
                break   
                
            # If it's tied after simulating the bottom half
            if (game.home_score == game.away_score) and (game.top_bot == "Bot"):
                # print("Extra innings!")
                # Add an extra inning
                game.innings += 1
            
        # Advance half inning
        if game.top_bot == "Top":
            game.top_bot = "Bot"
        else:
            game.top_bot = "Top"
            game.inning += 1  
            
    # Calculate stats
    for batter in game.home_batters:
        batter, game = calculate_batter(batter, game)
    for batter in game.away_batters:
        batter, game = calculate_batter(batter, game)
    for pitcher in game.home_pitchers:
        pitcher, game = calculate_pitcher(pitcher, game)
    for pitcher in game.away_pitchers:
        pitcher, game = calculate_pitcher(pitcher, game)
        
    
    # print("Game time", (time.time()-start_time))
    

    return game

In [None]:
def sim_game_batch(game_template, predict_pulls, predict_leverage, predict_binary, predict_outs, predict_safe, predict_all, opener_list, park_object, innings=9, debug=False, batch_size=50): 
    game_list = []
    for i in range(batch_size):
        game = sim_game(game_template, predict_pulls, predict_leverage, predict_binary, predict_outs, predict_safe, predict_all, opener_list, park_object, innings, debug)
        game_list.append(game)
        
    return game_list

In [None]:
# Retrieve all values for attribute across all players across all simulations
def create_players_dataframe(game_list, attribute='FP', player='batter'):
    # Create a dictionary to store the data
    data = {}

    for i, game in enumerate(game_list):
        data[f'{attribute}{i}'] = []
        if player == 'batter':
            players = game.away_batters + game.home_batters
        elif player == 'pitcher':
            players = game.away_pitchers + game.home_pitchers

        for player_obj in players:
            data[f'{attribute}{i}'].append(getattr(player_obj, attribute))

    # Create the DataFrame from the dictionary
    players_df = pd.DataFrame(data)
    
    # Create columns
    name_list = [getattr(player_obj, 'fullName') for player_obj in players]
    players_df['fullName'] = name_list

    # Reorder columns
    cols = list(players_df.columns)
    cols = ['fullName'] + [col for col in cols if col != 'fullName']
    players_df = players_df[cols]

    return players_df

In [None]:
# Create dataframe with scores
def extract_scores(game_list):
    # Create the DataFrame
    data = {'away_score': [game.away_score for game in game_list],
            'home_score': [game.home_score for game in game_list]}

    score_df = pd.DataFrame(data)
    
    return score_df