<a href="https://colab.research.google.com/github/brian-feldman-3/baseball_lineups/blob/main/OptimalBaseballLineup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The purpose of this tool is to evaluate the optimized lineup for a baseball team based on each players' current statistics.

In [32]:
import numpy as np
from numpy import random
import pandas as pd
pd.options.mode.chained_assignment = None


In [46]:
#Import data from Baseball Reference website for a single team (for testing)

url = 'https://www.baseball-reference.com/teams/BOS/2025-batting.shtml'

# Read the HTML tables from the URL
current_red_sox = pd.read_html(url)
current_red_sox_df = current_red_sox[0]
current_red_sox_df = current_red_sox_df[['Player', 'PA', 'H', '2B', '3B', 'HR', 'BB']]

#Delete rows with player named "Player" and "Team Totals"
current_red_sox_df.drop(current_red_sox_df.loc[current_red_sox_df['Player'] == 'Player'].index, inplace = True)
current_red_sox_df.drop(current_red_sox_df.loc[current_red_sox_df['Player'] == 'Team Totals'].index, inplace = True)

#Make all plate possiblities integers and create a column for singles
current_red_sox_df[['PA', 'H', '2B', '3B', 'HR', 'BB']] = current_red_sox_df[['PA', 'H', '2B', '3B', 'HR', 'BB']].astype(int)
current_red_sox_df['1B'] = current_red_sox_df['H']-current_red_sox_df['2B']-current_red_sox_df['3B']-current_red_sox_df['HR']

#Remove players below a certain plate appearance threshold
plate_appearance_threshold = 1
current_red_sox_df.drop(current_red_sox_df.loc[current_red_sox_df['PA'] < plate_appearance_threshold].index, inplace = True)

#Now, we replace each plate possiblity with its percent chance of happening, but first get rid of weird sac flies and stuff so we add to 100
current_red_sox_df['OUT'] = current_red_sox_df['PA'] - (current_red_sox_df['1B'] + current_red_sox_df['2B'] + current_red_sox_df['3B'] + current_red_sox_df['HR'] + current_red_sox_df['BB'])
for hit_type in ['1B', '2B', '3B', 'HR', 'BB', 'OUT']:
  current_red_sox_df[hit_type] = current_red_sox_df[hit_type]/current_red_sox_df['PA']

#We now round these to 3 decimals, multipyly by 1000 and create length 1000 list for each hitter of outcomes
for hit_type in ['1B', '2B', '3B', 'HR', 'BB', 'OUT']:
  current_red_sox_df[hit_type] = current_red_sox_df[hit_type].round(3)
  current_red_sox_df[hit_type] = current_red_sox_df[hit_type]*1000

#Create player dictionary with a list of possible plate appearance outcomes as the key
player_dict = dict()
for player in current_red_sox_df['Player']:
  player_dict[player] = (['HR']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, 'HR'])
                         + ['3B']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, '3B'])
                         + ['2B']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, '2B'])
                         + ['1B']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, '1B'])
                         + ['OUT']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, 'OUT']))

#We want to shuffle all plater appearance outcomes (although we expect true randomness in random.rand, we do this to be sure)
for value in player_dict.values():
  value = np.random.shuffle(value)

#We need to create a way to ignore traded players because they are included in the team table even after they are traded
#traded_players

  player_dict[player] = (['HR']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, 'HR'])
  + ['3B']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, '3B'])
  + ['2B']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, '2B'])
  + ['1B']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, '1B'])
  + ['OUT']*int(current_red_sox_df.loc[current_red_sox_df['Player'] == player, 'OUT']))


In [3]:
#Initialize 0 outs in the first inning
out_count = 0
batter_number = 0
lineup = ['Jarren Duran', 'Rafael Devers']

for inning_count in range(1,10):
  batter_up = lineup(batter_number)
  batter_outcome = np.random.choice(player_dict(batter_up),
  batter_number = batter_number + 1

1
2
3
4
5
6
7
8
9
