# B02. Simulations
- Simulate matchups

In [37]:
%run "U1. Imports.ipynb"
%run "U2. Utilities.ipynb"
%run "U3. Classes.ipynb"
# %run "D3. Simulation Functions.ipynb"

baseball_path = r'C:\Users\james\Documents\MLB\Database'

db_path = r'C:\Users\james\Documents\MLB\Database\MLBDB.db'
engine = create_engine(f'sqlite:///{db_path}')

In [38]:
def scale_inputs(away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df, batter_stats_scaler, batter_stats_fg_scaler, pitcher_stats_scaler, pitcher_stats_fg_scaler):
    ### Standardize stats
    # Away batters
    away_batter_df[batter_stats_l] = batter_stats_scaler.fit_transform(away_batter_df[batter_stats_l])
    away_batter_df[batter_stats_r] = batter_stats_scaler.fit_transform(away_batter_df[batter_stats_r])
    away_batter_df[batter_stats_fg] = batter_stats_fg_scaler.fit_transform(away_batter_df[batter_stats_fg])
    # Away pitchers
    away_pitcher_df[pitcher_stats_l] = pitcher_stats_scaler.fit_transform(away_pitcher_df[pitcher_stats_l])
    away_pitcher_df[pitcher_stats_r] = pitcher_stats_scaler.fit_transform(away_pitcher_df[pitcher_stats_r])
    away_pitcher_df[pitcher_stats_fg2] = pitcher_stats_fg_scaler.fit_transform(away_pitcher_df[pitcher_stats_fg2])

    # Home batters
    home_batter_df[batter_stats_l] = batter_stats_scaler.fit_transform(home_batter_df[batter_stats_l])
    home_batter_df[batter_stats_r] = batter_stats_scaler.fit_transform(home_batter_df[batter_stats_r])
    home_batter_df[batter_stats_fg] = batter_stats_fg_scaler.fit_transform(home_batter_df[batter_stats_fg])
    # Home pitchers
    home_pitcher_df[pitcher_stats_l] = pitcher_stats_scaler.fit_transform(home_pitcher_df[pitcher_stats_l])
    home_pitcher_df[pitcher_stats_r] = pitcher_stats_scaler.fit_transform(home_pitcher_df[pitcher_stats_r])
    home_pitcher_df[pitcher_stats_fg2] = pitcher_stats_fg_scaler.fit_transform(home_pitcher_df[pitcher_stats_fg2])
    
    return away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df

In [39]:
# Create team-position objects
def create_matchup(matchup, matchup_path, batter_stats_scaler, batter_stats_fg_scaler, pitcher_stats_scaler, pitcher_stats_fg_scaler):
    ### Read in data
    # Batters
    away_batter_df = pd.read_excel(os.path.join(matchup_path, matchup), sheet_name='AwayBatters', engine='openpyxl')
    home_batter_df = pd.read_excel(os.path.join(matchup_path, matchup), sheet_name='HomeBatters', engine='openpyxl')
    
    # Pitchers
    away_pitcher_df = pd.read_excel(os.path.join(matchup_path, matchup), sheet_name='AwayPitchers', engine='openpyxl')
    home_pitcher_df = pd.read_excel(os.path.join(matchup_path, matchup), sheet_name='HomePitchers', engine='openpyxl')

    ### Scale inputs
    away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df = scale_inputs(away_batter_df, away_pitcher_df, home_batter_df, home_pitcher_df, batter_stats_scaler, batter_stats_fg_scaler, pitcher_stats_scaler, pitcher_stats_fg_scaler)
    
    
    # Column names
    batter_columns = away_batter_df.columns.tolist()
    pitcher_columns = away_pitcher_df.columns.tolist()
   
    
    ### Create player objects
    # Lists of player objects, by Away/Home status and position group
    AwayBatters = []
    HomeBatters = []
    AwayPitchers = []
    HomePitchers = []
    
    # Away Batters
    for _, row in away_batter_df.iterrows():
        batter_data = {attr: row[attr] for attr in batter_columns}
        AwayBatters.append(Batter(**batter_data))
    
    # Home Batters
    for _, row in home_batter_df.iterrows():
        batter_data = {attr: row[attr] for attr in batter_columns}
        HomeBatters.append(Batter(**batter_data))

    # Away Pitchers
    for _, row in away_pitcher_df.iterrows():
        pitcher_data = {attr: row[attr] for attr in pitcher_columns}
        AwayPitchers.append(Pitcher(**pitcher_data))
    
    # Home Pitchers
    for _, row in home_pitcher_df.iterrows():
        pitcher_data = {attr: row[attr] for attr in pitcher_columns}
        HomePitchers.append(Pitcher(**pitcher_data))
    

    return AwayBatters, HomeBatters, AwayPitchers, HomePitchers 

In [40]:
# Calculate odds of being pulled
def pull_odds(game, model_pulls):
    # Determine batting and fielding team scores for use as inputs
    if game.top_bot == "Top":        
        batter_score = game.away_score
        pitcher_score = game.home_score
    else:
        batter_score = game.home_score
        pitcher_score = game.away_score
    
    # Pull model inputs (pull_inputs3)
    X_list = [
        game.pitching.B1, game.pitching.B2, game.pitching.B3, game.pitching.HR, game.pitching.BB, game.pitching.HBP, 
        game.pitching.SO, game.pitching.ER, game.pitching.faced, pitcher_score, batter_score, game.pitching.IP_start
            ]
    
    # Reshape to match what models expect
    model_inputs = pd.Series(X_list).values.reshape(1,-1)

    # Predict and make into a list
    pull_list = model_pulls.predict_proba(model_inputs).tolist()
    
    # Choose the probability that pull = 1
    odds = pull_list[0][1] 
    
    del X_list, model_inputs, pull_list
    
    return odds

In [41]:
matchup = "WAS@LAA 04112023 0938PM ET.xlsx"
matchup_path = r"C:\Users\james\Documents\MLB\Database\B01. Matchups\Matchups 85143"

In [42]:
AwayBatters, HomeBatters, AwayPitchers, HomePitchers = create_matchup(matchup, matchup_path, batter_stats_scaler, batter_stats_fg_scaler, pitcher_stats_scaler, pitcher_stats_fg_scaler)

In [43]:
game = Scoreboard(AwayBatters, HomePitchers, HomeBatters, AwayPitchers, 9)

In [44]:
def choose_pa_matchup(game, model_pulls, opener_list):    
    ### Top of the inning
    if game.top_bot == "Top":
        # Shuffle list of pitchers so that when a pitcher is decided based on being the first in the list with a given leverage, that pitcher will vary
        random.shuffle(game.home_pitchers)
       
        ### Determine leverage
        # If the starter is still in the game
        if game.home_starter_pulled == False:
            # Pitcher is the starter
            game.home_pitcher_up = next(pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 1)
            game.pitching = game.home_pitcher_up
            # Roll to see if they'll be pulled
            pull_prob = pull_odds(game, model_pulls)
            pull_roll = random.random()
            if pull_roll < pull_prob:
                game.home_starter_pulled = True 
    
            # If they're an opener and it's the third inning,
            if game.inning >= 3 and game.home_starter.Name in opener_list:
                # Pull them
                game.home_starter_pulled = True
            
        # If the home starter has been pulled
        if game.home_starter_pulled == True:
            # Set leverage to 2 (low) by default
            game.home_leverage = 2
            # Set leverage to 3 (medium) if it's late
            if (game.inning > game.innings - 3):
                game.home_leverage = 3
            # Set leverage to 4 (high) if it's a save situation
            if (4 > (game.home_score - game.away_score) >= 0) and game.inning == game.innings and game.home_starter_pulled == True:
                game.home_leverage = 4
        
        # Pick pitcher
        try:
            # Try to use method above
            game.home_pitcher_up = next(pitcher for pitcher in game.home_pitchers if pitcher.Leverage == game.home_leverage)
        except:
            # Might not have a pitcher at every leverage. In that case, choose low leverage
            game.home_pitcher_up = next(pitcher for pitcher in game.home_pitchers if pitcher.Leverage == 2)    
    
        # Determine matchup
        game.pitching = game.home_pitcher_up
        game.ab = next(batter for batter in game.away_batters if batter.batting_order == game.away_order)
      
    
    
    ### Bottom of the inning
    elif game.top_bot == "Bot":
        # Shuffle list of pitchers so that when a pitcher is decided based on being the first in the list with a given leverage, that pitcher will vary
        random.shuffle(game.away_pitchers)
       
        ### Determine leverage
        # If the starter is still in the game
        if game.away_starter_pulled == False:
            # Pitcher is the starter
            game.away_pitcher_up = next(pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 1)
            game.pitching = game.away_pitcher_up
            # Roll to see if they'll be pulled
            pull_prob = pull_odds(game, model_pulls)
            pull_roll = random.random()
            if pull_roll < pull_prob:
                game.away_starter_pulled = True 
    
            # If they're an opener and it's the third inning,
            if game.inning >= 3 and game.away_starter.Name in opener_list:
                # Pull them
                game.away_starter_pulled = True
            
        # If the home starter has been pulled
        if game.away_starter_pulled == True:
            # Set leverage to 2 (low) by default
            game.away_leverage = 2
            # Set leverage to 3 (medium) if it's late
            if (game.inning > game.innings - 3):
                game.away_leverage = 3
            # Set leverage to 4 (high) if it's a save situation
            if (4 > (game.away_score - game.home_score) >= 0) and game.inning == game.innings and game.away_starter_pulled == True:
                game.away_leverage = 4
        
        # Pick pitcher
        try:
            # Try to use method above
            game.away_pitcher_up = next(pitcher for pitcher in game.away_pitchers if pitcher.Leverage == game.away_leverage)
        except:
            # Might not have a pitcher at every leverage. In that case, choose low leverage
            game.away_pitcher_up = next(pitcher for pitcher in game.away_pitchers if pitcher.Leverage == 2)    
    
        # Determine matchup
        game.pitching = game.away_pitcher_up
        game.ab = next(batter for batter in game.home_batters if batter.batting_order == game.home_order)
    
    return game

In [45]:
game = Scoreboard(AwayBatters, HomeBatters, AwayPitchers, HomePitchers, 9)
opener_list = []
game = choose_pa_matchup(game, model_pulls, opener_list)

In [66]:
# Create PA event probability
def probabilities(game, model_binary, model_outs, model_safe):
    
    # Batter attributes
    # Set batter attributes specific to pitcher hand
    for stat in batter_stats + ['imp_b']:
        if game.pitching.pitchHand == "Left":
            attr_name = f'{stat}_l'
        else:
            attr_name = f'{stat}_r'

        # Get the new attribute value
        new_value = getattr(game.ab, attr_name)

        # Set the attribute to the new value
        setattr(game.ab, stat, new_value)
        
    # Pitcher attributes
    # Set pitcher attributes specific to batter hand
    for stat in pitcher_stats + ['imp_p']:
        if game.ab.batSide == "Left" or (game.ab.batSide == "Switch" and game.pitching.pitchHand == "Right"):
            attr_name = f'{stat}_l'
        else:
            attr_name = f'{stat}_r'

        # Get the new attribute value
        new_value = getattr(game.pitching, attr_name)

        # Set the attribute to the new value
        setattr(game.pitching, stat, new_value)
        
    
    
    # Make compatible with model
    if game.ab.batSide == "Left" or (game.ab.batSide == "Switch" and game.pitching.pitchHand == "Right"):
        game.ab.b_L = 1
    else:
        game.ab.b_L = 0
    
    if game.pitching.pitchHand == "Left":
        game.pitching.p_L = 1
    else:
        game.pitching.p_L = 0
        
    # On base dummies
    if game.on_1b is not None:
        game.onFirst = 1
    else:
        game.onFirst = 0
    if game.on_2b is not None:
        game.onSecond = 1
    else:
        game.onSecond = 0
    if game.on_3b is not None:
        game.onThird = 1
    else: 
        game.onThird = 0
        
    # Top of the inning dummy
    if game.top_bot == "Top":
        game.top = 1
    else:
        game.top = 0

    # SCore differentials
    if game.top == 1:
        game.score_diff = game.away_score - game.home_score
    else:
        game.score_diff = game.home_score - game.away_score
    
    # Year dummies
    game.year_2015 = 0
    game.year_2016 = 0
    game.year_2017 = 0
    game.year_2018 = 0
    game.year_2019 = 0
    game.year_2020 = 0
    game.year_2021 = 0
    game.year_2022 = 0
    game.year_2023 = 1

    
    
    ### Inputs
    # Batters
    batter_inputs =  [getattr(game.ab, stat) for stat in batter_stats]
    
    # Pitchers
    pitcher_inputs = [getattr(game.pitching, stat) for stat in pitcher_stats]
    
    # Venues
    for num in venues:  # Iterate through your venue values
        venue_attribute_name = f'venue_{num}'  # Create the attribute name
        if getattr(game.ab.venue_id, 'venue_id', None) == num:
            setattr(game, venue_attribute_name, 1)
        else:
            setattr(game, venue_attribute_name, 0)
    venue_inputs = [getattr(game, venue) for venue in venues]

    # Years 
    year_inputs = [getattr(game, year) for year in years]
    
    
    
    # Other 
    other_inputs = [game.pitching.p_L, game.ab.b_L, 
                    game.ab.x_vect, game.ab.y_vect, game.ab.temperature, 
                    game.onFirst, game.onSecond, game.onThird, game.inning, game.top, game.score_diff]
    
    imp_inputs = [game.ab.imp_b, game.pitching.imp_p]
    
    ### Create inputs 
    inputs = batter_inputs + pitcher_inputs + venue_inputs + year_inputs + other_inputs + imp_inputs
    model_inputs = pd.Series(inputs).values.reshape(1,-1)
    
    print(len(batter_inputs))
    print(len(pitcher_inputs))
    print(len(venue_inputs))
    print(len(year_inputs))
    print(len(other_inputs))
    print(len(imp_inputs))
    
    print(len(inputs))
    print(len(pa_inputs2))
    
    ### Run models
    # Out or save
    binary_list = model_binary.predict_proba(model_inputs).tolist()
    # Out types
    outs_list = model_outs.predict_proba(model_inputs).tolist()
    # Safe types
    safe_list = model_safe.predict_proba(model_inputs).tolist()
    
    # Odds of safe/out
    is_safe = binary_list[0][0]
    is_out = 1 - is_safe
    
    # Probabilities 
    # Given that it's an out
    fo_pred = outs_list[0][0]
    go_pred = outs_list[0][1]
    lo_pred = outs_list[0][2]
    po_pred = outs_list[0][3]
    so_pred = outs_list[0][4]
    
    # Given that it's safe
    b1_pred = safe_list[0][0]
    b2_pred = safe_list[0][1]
    b3_pred = safe_list[0][2]
    bb_pred = safe_list[0][3]
    hbp_pred = safe_list[0][4]
    hr_pred = safe_list[0][5]
       
    # Overall probabilities
    b1 = is_safe * b1_pred
    b2 = is_safe * b2_pred
    b3 = is_safe * b3_pred
    bb = is_safe * bb_pred
    fo = is_out * fo_pred
    go = is_out * go_pred
    hbp = is_safe * hbp_pred
    hr = is_safe * hr_pred
    lo = is_out * lo_pred
    po = is_out * po_pred
    so = is_out * so_pred 
           
    # Add stats together so they represent the end point of a range with a probability of being selected equal to their rate
    bb = hbp + bb 
    b1 = bb + b1
    b2 = b1 + b2
    b3 = b2 + b3
    hr = b3 + hr
    so = hr + so
    lo = so + lo
    po = lo + po
    go = po + go
    fo = go + fo
    # Set flyout to be 1. This model almost guarantees this. It's accurate to like, 10 decimal places, but why risk it?
    fo = 1
    
    del model_inputs
    
    return hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo, game 

In [67]:
len(pa_inputs2)

155

In [68]:
hbp, bb, b1, b2, b3, hr, so, lo, po, go, fo, game  = probabilities(game, model_binary, model_outs, model_safe)

46
46
41
9
11
2
155
155


ValueError: X has 155 features, but LogisticRegression is expecting 157 features as input.