#### Import libraries

In [557]:
%matplotlib inline
import numpy as np
import pandas as pd
from pathlib import Path

# Random Baseball Game Generator

This notebook randomly generates baseball games based on the baserunning speed and slugging average of the players in each lineup.

There are seven possible outcomes per at-bat:
  1. Single
  2. Double
  3. Triple
  4. Home run
  5. Fly out (including sacrifice flies)
  6. Ground out (including double plays)
  7. Strike out

These outcomes are determined using a beta distribution where `beta = 3` and `mean = player's slugging pct / 4`, and then then outcome is multiplied by 4 and rounded so that 0: out, 1: single, 2: double, 3: triple, and 4: home run. Outs are randomly divided into strikeouts, fly outs, and ground outs. 14% of singles are converted into home runs (to adjust for the distribution underpredicting HRs).

Speed is defined as the probability a runner advances to third on a single, scores from second on a single, scores from first on a double, scores on a sacrifice fly, or advances to third on a ground out.

Home field advantage is included by giving the home team a 2% boost to slugging pct and the away team a 2% penalty.

Possible improvements:
  1. Include other kinds of outs (baserunning errors)
  2. Include base stealing (based on speed)
  3. Take into account pitcher attributes
  4. Toggle on/off the print statements to allow for simulation analysis

In [658]:
def adj_prob(p):
    # Increased probability for plays at home when bases are loaded
    return p + p * (1 - p)

In [492]:
def generate_at_bat_result(slug):
    result = round(np.random.beta(3*slug/(4-slug), 3)*4)
    # Turn 14% of singles into home runs
    if result == 1:
        if np.random.binomial(1, 0.14):
            result = 4
    return result

In [686]:
def generate_inning(current_batter, runs, team, inning, away_runs=10000):
    outs = 0
    bases = [None]*3
    # Multiply slugger input by 0.98 and 1.02 for the away and home team respectively
    if away_runs == 10000:
        hfa = 0.98
    else:
        hfa = 1.02
    while outs < 3:
        print([team[b].name if b != None else None for b in bases])
        result = generate_at_bat_result(team[current_batter].slug * hfa)
        if result != 0:
            team[current_batter].hit += 1
        if result == 1:
            print(f'{team[current_batter].name} hits a single')
            if bases[2] != None:
                print(f'{team[bases[2]].name} scores from third')
                team[current_batter].rbi += 1
                team[bases[2]].run += 1
                runs += 1
                bases[2] = None
            if bases[1] != None:
                if np.random.binomial(1, team[bases[1]].speed):
                    print(f'{team[bases[1]].name} scores from second')
                    team[current_batter].rbi += 1
                    team[bases[1]].run += 1
                    runs += 1
                else:
                    print(f'{team[bases[1]].name} advances to third')
                    bases[2] = bases[1]
                bases[1] = None
            if bases[0] != None:
                if not bases[2] != None and np.random.binomial(1, team[bases[0]].speed):
                    print(f'{team[bases[0]].name} advances to third')
                    bases[2] = bases[0]
                else:
                    print(f'{team[bases[0]].name} advances to second')
                    bases[1] = bases[0]
            bases[0] = current_batter
        elif result == 2:
            team[current_batter].double += 1
            print(f'{team[current_batter].name} hits a double')
            if bases[2] != None:
                print(f'{team[bases[2]].name} scores from third')
                team[current_batter].rbi += 1
                team[bases[2]].run += 1
                runs += 1
                bases[2] = None
            if bases[1] != None:
                print(f'{team[bases[1]].name} scores from second')
                team[current_batter].rbi += 1
                team[bases[1]].run += 1
                runs += 1
            if bases[0] != None:
                if np.random.binomial(1, team[bases[0]].speed):
                    print(f'{team[bases[0]].name} scores from first')
                    team[current_batter].rbi += 1
                    team[bases[0]].run += 1
                    runs += 1
                else:
                    print(f'{team[bases[0]].name} advances to third')
                    bases[2] = bases[0]
            bases[1] = current_batter
            bases[0] = None
        elif result == 3:
            team[current_batter].triple += 1
            print(f'{team[current_batter].name} hits a triple')
            if bases[2] != None:
                print(f'{team[bases[2]].name} scores from third')
                team[current_batter].rbi += 1
                team[bases[2]].run += 1
                runs += 1
            if bases[1] != None:
                print(f'{team[bases[1]].name} scores from second')
                team[current_batter].rbi += 1
                team[bases[1]].run += 1
                runs += 1
            if bases[0] != None:
                print(f'{team[bases[0]].name} scores from first')
                team[current_batter].rbi += 1
                team[bases[0]].run += 1
                runs += 1
            bases[2] = current_batter
            bases[1] = bases[0] = None
        elif result == 4:
            print(f'{team[current_batter].name} hits a home run!')
            runs += 1
            team[current_batter].hr += 1
            if bases[2] != None:
                print(f'{team[bases[2]].name} scores from third')
                team[current_batter].rbi += 1
                team[bases[2]].run += 1
                runs += 1
            if bases[1] != None:
                print(f'{team[bases[1]].name} scores from second')
                team[current_batter].rbi += 1
                team[bases[1]].run += 1
                runs += 1
            if bases[0] != None:
                print(f'{team[bases[0]].name} scores from first')
                team[current_batter].rbi += 1
                team[bases[0]].run += 1
                runs += 1
            bases = [None]*3
        else:
            # 0: strikeout, 1: fly out, 2: ground out
            out_type = np.random.choice(3, p=[.287, .341, .372])
            if out_type == 1:
                print(f'{team[current_batter].name} flies out')
                outs += 1
                # Sacrifice fly
                if outs < 3 and bases[2] != None:
                    if np.random.binomial(1, team[bases[2]].speed):
                        print(f'{team[bases[2]].name} scores from third')
                        team[current_batter].sf += 1
                        team[current_batter].rbi += 1
                        team[bases[2]].run += 1
                        runs += 1
                        bases[2] = None
            elif out_type == 2:
                # Special case when bases are loaded
                if outs < 3 and bases[2] != None and bases[1] != None and bases[0] != None:
                    if outs < 3 and np.random.binomial(1, adj_prob(team[bases[2]].speed)):
                        print(f'{team[bases[2]].name} scores from third')
                        runs += 1
                        team[current_batter].rbi += 1
                        team[bases[2]].run += 1
                    else:
                        print(f'{team[bases[2]].name} out at home!')
                        outs += 1
                    bases[2] = None
                    if outs < 3:
                        if np.random.binomial(1, adj_prob(team[current_batter].speed)):
                            print(f'{team[current_batter].name} reaches first on fielder\'s choice')
                            temp_current_batter = current_batter
                        else:
                            print(f'{team[current_batter].name} grounds out')
                            outs += 1
                            temp_current_batter = None
                        if outs < 3:
                            print(f'{team[bases[1]].name} advances to third')
                            bases[2] = bases[1]
                            print(f'{team[bases[0]].name} advances to second')
                            bases[1] = bases[0]
                            bases[0] = temp_current_batter
                else:
                    print(f'{team[current_batter].name} grounds out')
                    outs += 1
                    # Runner scores from third
                    if outs < 3 and bases[2] != None:
                        if np.random.binomial(1, team[bases[2]].speed):
                            print(f'{team[bases[2]].name} scores from third')
                            team[current_batter].rbi += 1
                            team[bases[2]].run += 1
                            runs += 1
                            bases[2] = None
                    # Runner advances from second
                    if outs < 3 and bases[1] != None and bases[2] == None:
                        if np.random.binomial(1, team[bases[1]].speed) or bases[0] != None:
                            print(f'{team[bases[1]].name} advances to third')
                            bases[2] = bases[1]
                            bases[1] = None
                    # Double play
                    if bases[0] != None:
                        if outs < 2 and np.random.binomial(1, 0.322):
                            print(f'{team[current_batter].name} hits into a double play')
                            outs += 1
                            bases[0] = None
                        elif outs < 3:
                            print(f'{team[bases[0]].name} advances to second')
                            bases[1] = bases[0]
                            bases[0] = None
            else:
                print(f'{team[current_batter].name} strikes out')
                team[current_batter].ko += 1
                outs += 1
        team[current_batter].ab += 1
        if current_batter < 8:
            current_batter += 1
        else:
            current_batter = 0
        if inning >= 9 and runs > away_runs:
            print('\nWALK-OFF WIN FOR THE HOME TEAM')
            break
    return current_batter, runs

In [646]:
def generate_game(away_team, home_team, game=None):
    assert len(away_team) == len(home_team) == 9
    home_runs = 0
    away_runs = 0
    home_batter = 0
    away_batter = 0
    if game:
        print(f'Game {game}\n')
    for i in range(9):
        away_batter, away_runs = generate_inning(away_batter, away_runs, away_team, i + 1)
        print(f'\nTop of inning {i+1} is over, score is {away_runs}-{home_runs}\n')
        if i + 1 != 9 or home_runs <= away_runs:
            home_batter, home_runs = generate_inning(
                home_batter, home_runs, home_team, i + 1, away_runs)
            print(f'\nBottom of inning {i+1} is over, score is {away_runs}-{home_runs}\n')
    if home_runs == away_runs:
        print('Score is tied, going to extra innings\n')
        while home_runs == away_runs:
            i += 1
            away_batter, away_runs = generate_inning(away_batter, away_runs, away_team, i + 1)
            print(f'\nTop of inning {i+1} is over, score is {away_runs}-{home_runs}\n')
            home_batter, home_runs = generate_inning(
                home_batter, home_runs, home_team, i + 1, away_runs)
            print(f'\nBottom of inning {i+1} is over, score is {away_runs}-{home_runs}\n')
    print(f'GAME OVER, final score is {away_runs}-{home_runs}\n\n')
    return away_runs, home_runs, i + 1

### Input player attributes

In [572]:
class Player():
    def __init__(self, name, slug=0.435, speed=0.5):
        self.name = name
        self.slug = slug
        self.speed = speed
        self.hit = 0
        self.ko = 0
        self.double = 0
        self.triple = 0
        self.hr = 0
        self.ab = 0
        self.rbi = 0
        self.run = 0
        self.sf = 0
    
    def obs_ba(self):
        return round(self.hit / self.ab, 3)
    
    def obs_slug(self):
        return round(
            ((self.hit - self.double - self.triple - self.hr) +
            self.double * 2 + self.triple * 3 + self.hr * 4) / self.ab, 3
        )

In [574]:
home_team = [
    Player('F. Lindor', .415, 0.7),
    Player('C. Hernandez', .408, 0.8),
    Player('J. Ramirez', .607, 0.6),
    Player('F. Reyes', .450, 0.3),
    Player('C. Santana', .350, 0.4),
    Player('T. Naquin', .383, 0.7),
    Player('D. DeShields', .318, 0.7),
    Player('D. Santana', .286, 0.7),
    Player('R. Perez', .216, 0.2)
]

away_team = [
    Player('L. Voit', .610),
    Player('D. LeMahieu', .590),
    Player('A. Hicks', .414),
    Player('G. Sanchez', .365),
    Player('G. Urshela', .490),
    Player('G. Toerres', .368),
    Player('C. Frazier', .511),
    Player('A. Judge', .554),
    Player('B. Gardner', .392)
]

average_team = [Player(f'Player {i+1}', 0.515-i/50) for i in range(9)]

### Generate a random game

In [624]:
generate_game(away_team, home_team)

[None, None, None]
L. Voit flies out
[None, None, None]
D. LeMahieu hits a single
['D. LeMahieu', None, None]
A. Hicks strikes out
['D. LeMahieu', None, None]
G. Sanchez hits a single
D. LeMahieu advances to second
['G. Sanchez', 'D. LeMahieu', None]
G. Urshela flies out

Top of inning 1 is over, score is 0-0

[None, None, None]
F. Lindor strikes out
[None, None, None]
C. Hernandez hits a single
['C. Hernandez', None, None]
J. Ramirez hits a double
C. Hernandez scores from first
[None, 'J. Ramirez', None]
F. Reyes grounds out
J. Ramirez advances to third
[None, None, 'J. Ramirez']
C. Santana flies out

Bottom of inning 1 is over, score is 0-1

[None, None, None]
G. Toerres hits a single
['G. Toerres', None, None]
C. Frazier hits a single
G. Toerres advances to third
['C. Frazier', None, 'G. Toerres']
A. Judge grounds out
A. Judge hits into a double play
[None, None, 'G. Toerres']
B. Gardner strikes out

Top of inning 2 is over, score is 0-1

[None, None, None]
T. Naquin hits a single
[

(9, 5, 9)

In [603]:
for player in home_team:
    print(player.name, player.obs_ba())

F. Lindor 0.344
C. Hernandez 0.27
J. Ramirez 0.306
F. Reyes 0.306
C. Santana 0.356
T. Naquin 0.268
D. DeShields 0.2
D. Santana 0.226
R. Perez 0.154


### Generate 10,000 random games and get win probability for the home team

In [457]:
def unique_score(row):
    if row.home_win:
        return f'{row.home_runs}-{row.away_runs}'
    else:
        return f'{row.away_runs}-{row.home_runs}'

In [661]:
del play_by_play

In [662]:
%%capture play_by_play
games = pd.DataFrame([
    generate_game(average_team, average_team, game=i+1) for i in range(10000)],
    columns=['away_runs', 'home_runs', 'innings'])
games['home_win'] = games.away_runs < games.home_runs
games['total_runs'] = games.away_runs + games.home_runs
games['unique_score'] = games.apply(unique_score, axis=1)

In [663]:
Path('play_by_play.txt').write_text(play_by_play.stdout)

45624441

In [664]:
games.innings.value_counts() / len(games)

9     0.9005
10    0.0461
11    0.0239
12    0.0131
13    0.0074
14    0.0041
15    0.0016
16    0.0013
17    0.0007
18    0.0006
19    0.0003
20    0.0002
23    0.0001
21    0.0001
Name: innings, dtype: float64

In [665]:
games.home_win.mean()

0.5441

In [666]:
games.unique_score.value_counts().head(10)

4-3    558
5-4    524
3-2    488
6-5    431
4-2    399
5-3    393
6-4    323
6-3    295
5-2    294
7-6    278
Name: unique_score, dtype: int64

In [667]:
np.mean([p.sf / p.ab for p in average_team])

0.005422129213389724