In [1]:
import sys
import random
import numpy as np
import pandas as pd

sys.path.insert(0, '../../pipelines/tasks/')

from mlb.sim.state import Inning
from mlb.sim.models import EventVariableFactory, EventCodes, PlayerStats, all_event_codes

In [2]:
df_players = pd.read_csv('../../data/mlb/player_stats.csv', index_col=None)
df_players.season = df_players.season.astype(str)
df_players = df_players[np.logical_and(df_players.player == 'correca01', df_players.season == '2022')]
df_players = df_players.rename(columns={
    'SO': 'K'
})

columns = ['PA', 'AB', 'SH', 'SF', 'K', 'BB', 'HBP', '1B', '2B', '3B', 'HR', 'R', 'G']

correa = df_players[columns].to_dict('records')[0]

ichiro = {
    'AB': 704, ## Appearance
    'SH': 2, ## Sac Bunts
    'SF': 3, ## Sac Flys
    'K': 63,
    'BB': 49,
    'HBP': 4,
    '1B': 225,
    '2B': 24,
    '3B': 5,
    'HR': 8
}

df_teams = pd.read_csv('../../data/mlb/season_stats.csv', index_col=None)
df_teams.season = df_teams.season.astype(str)
df_teams = df_teams[np.logical_and(df_teams.season == '2022', df_teams.team == 17)]
df_teams = df_teams.rename(columns={
    'SO': 'K'
})

team_columns = columns + ['R/G']
twins = df_teams[team_columns].to_dict('records')[0]

In [3]:
twins

{'PA': 6113,
 'AB': 5476,
 'SH': 10,
 'SF': 46,
 'K': 1353,
 'BB': 518,
 'HBP': 62,
 '1B': 891.0,
 '2B': 269,
 '3B': 18,
 'HR': 178,
 'R': 696,
 'G': 162,
 'R/G': 4.3}

In [5]:
def play_inning(event_variables):
    def get_prob_ranges(event_variables):
        i = 0

        ranges = []
        for ev in event_variables:
            ranges.append(
            ev.probability + i
            )

            i += ev.probability

        ranges[-1] = 1

        baseball_events = list(zip(ranges, event_variables))
        baseball_events

        return baseball_events

    def generate_event(event_variables):
        rv = random.random()
        probs = get_prob_ranges(event_variables)

        for p, ev in probs:
            if rv <= p:
                return ev.event_code


        raise ValueError('No Event Code was generated!')

    inning = Inning()

    history = []
    while not inning.is_over():
        history.append(
            inning.execute(
                generate_event(event_variables),
            )
        )

    return history

event_variables = EventVariableFactory().create(PlayerStats(ichiro).likelihoods())
play_inning(event_variables)

[{'bases': [0, 0, 0],
  'runs': 0,
  'outs': 1,
  'event': <EventCodes.LineDriveInfieldFly: 15>,
  'desc': 'LineDriveInfieldFly'},
 {'bases': [0, 0, 0],
  'runs': 0,
  'outs': 2,
  'event': <EventCodes.LongFly: 16>,
  'desc': 'LongFly'},
 {'bases': [0, 0, 0],
  'runs': 0,
  'outs': 3,
  'event': <EventCodes.LongFly: 16>,
  'desc': 'LongFly'}]

In [6]:
avg_innings_per_game = (26.72 / 3)

def run_simulation(data, iterations=25000):
    event_variables = EventVariableFactory().create(
        PlayerStats(data).likelihoods()
    )

    runs = 0
    for _ in range(iterations):
        half_inning = play_inning(event_variables)
        runs += half_inning[-1]['runs']

    return runs / iterations

In [7]:
ichiro_runs = run_simulation(ichiro)

print(ichiro_runs)
print(ichiro_runs * avg_innings_per_game, 'runs per game')
print((ichiro_runs * avg_innings_per_game) * 162, 'runs per season')

0.79388
7.070824533333333 runs per game
1145.4735744 runs per season


In [8]:
twins_runs = run_simulation(twins)

print(twins_runs)
print(twins_runs * avg_innings_per_game, 'runs per game', 'vs', twins['R/G'])
print((twins_runs * avg_innings_per_game) * 162, 'runs per season', 'vs', twins['R/G'] * 162, 'w/ real', twins['R'])

0.48164
4.289806933333333 runs per game vs 4.3
694.9487231999999 runs per season vs 696.6 w/ real 696


In [9]:
correa_runs = run_simulation(correa)

print(correa_runs)
print(correa_runs * avg_innings_per_game, 'runs per game')
print((correa_runs * avg_innings_per_game) * 162, 'runs per season')

0.69804
6.2172095999999994 runs per game
1007.1879551999999 runs per season


In [10]:
def remove_player_from_team(team, player):
    team_minus_player = team.copy()

    for key in player.keys():
        team_minus_player[key] = team_minus_player[key] - player[key]

    return team_minus_player

In [11]:
twins_minus_correa_runs = run_simulation(
    remove_player_from_team(twins, correa)
)

print(twins_minus_correa_runs)
print(twins_minus_correa_runs * avg_innings_per_game, 'runs per game')
print((twins_minus_correa_runs * avg_innings_per_game) * 162, 'runs per season')

0.47328
4.2153472 runs per game
682.8862464 runs per season
