In [1]:
import sys
import random
import numpy as np
import pandas as pd
import seaborn as sns

sys.path.insert(0, '../../pipelines/tasks/')

from mlb.sim.state import Inning
from mlb.sim.models import EventVariableFactory, EventCodes, PlayerStats, all_event_codes

In [2]:
columns = ['PA', 'AB', 'SH', 'SF', 'K', 'BB', 'HBP', '1B', '2B', '3B', 'HR', 'R', 'G']

In [3]:
df_players = pd.read_csv('../../data/mlb/player_stats.csv', index_col=None)
df_players.season = df_players.season.astype(str)
df_players = df_players[np.logical_and(df_players.player == 'correca01', df_players.season == '2022')]
df_players = df_players.rename(columns={
    'SO': 'K'
})

correa = df_players[columns].to_dict('records')[0]

ichiro = {
    'AB': 704, ## Appearance
    'SH': 2, ## Sac Bunts
    'SF': 3, ## Sac Flys
    'K': 63,
    'BB': 49,
    'HBP': 4,
    '1B': 225,
    '2B': 24,
    '3B': 5,
    'HR': 8
}

df_teams = pd.read_csv('../../data/mlb/season_stats.csv', index_col=None)
df_teams.season = df_teams.season.astype(str)
df_teams = df_teams[df_teams.season == '2022']
df_teams = df_teams.rename(columns={
    'SO': 'K'
})

avg_team = df_teams[columns].mean().to_frame().T.to_dict('records')[0]
avg_team['R/G'] = avg_team['R'] / avg_team['G']

twins_columns = columns + ['R/G']
twins = df_teams.loc[df_teams.team == 17, columns + ['R/G']].to_dict('records')[0]

In [4]:
avg_innings_per_game = (26.72 / 3)

def generate_event(event_variables_with_ranges):
    rv = random.random()
    for p, ev in event_variables_with_ranges:
        if rv <= p:
            return ev.event_code


    raise ValueError('No Event Code was generated!')

def play_inning(event_variables_with_ranges):
    inning = Inning()

    history = []
    while not inning.is_over():
        history.append(
            inning.execute(
                generate_event(event_variables_with_ranges),
            )
        )

    return history

def run_simulation(data, iterations=50000):
    event_variables = EventVariableFactory().create_with_ranges(
        PlayerStats(data).likelihoods()
    )

    runs = 0
    for _ in range(iterations):
        half_inning = play_inning(event_variables)
        runs += half_inning[-1]['runs']

    return runs / iterations

In [5]:
ichiro_runs = run_simulation(ichiro)

print(ichiro_runs)
print(ichiro_runs * avg_innings_per_game, 'runs per game')

runs_per_season = (ichiro_runs * avg_innings_per_game) * 162
print(runs_per_season, 'runs per season')

0.79828
7.110013866666666 runs per game
1151.8222463999998 runs per season


In [6]:
avg_team_runs = run_simulation(avg_team)

print(avg_team_runs)
print(avg_team_runs * avg_innings_per_game, 'runs per game', 'vs', avg_team['R/G'])
print((avg_team_runs * avg_innings_per_game) * 162, 'runs per season')

0.47128
4.197533866666666 runs per game vs 4.283333333333333
680.0004864 runs per season


In [7]:
twins_runs = run_simulation(twins)

print(twins_runs)
print(twins_runs * avg_innings_per_game, 'runs per game', 'vs', twins['R/G'])
twins_runs_per_season = (twins_runs * avg_innings_per_game) * 162
print(twins_runs_per_season, 'runs per season', 'vs', twins['R/G'] * 162)

0.4843
4.313498666666667 runs per game vs 4.3
698.786784 runs per season vs 696.6


In [8]:
correa_runs = run_simulation(correa)

print(correa_runs)
print(correa_runs * avg_innings_per_game, 'runs per game')
print((correa_runs * avg_innings_per_game) * 162, 'runs per season')

0.66902
5.958738133333332 runs per game
965.3155775999999 runs per season


In [9]:
def remove_player_from_team(team, player):
    team_minus_player = team.copy()

    for key in player.keys():
        team_minus_player[key] = team_minus_player[key] - player[key]

    return team_minus_player

In [10]:
twins_minus_correa_runs = run_simulation(
    remove_player_from_team(twins, correa)
)

print(twins_minus_correa_runs)
print(twins_minus_correa_runs * avg_innings_per_game, 'runs per game')

twins_minus_correa_runs_per_season = (twins_minus_correa_runs * avg_innings_per_game) * 162
print(twins_minus_correa_runs_per_season, 'runs per season')

0.4628
4.122005333333333 runs per game
667.764864 runs per season


## Wins Created

In [11]:
## 82-80, 696 Runs, 684 Runs Allowed

runs_allowed = 684

def pythagorean(runs_scored, runs_allowed, exp = 2):
    rate = (runs_scored / runs_allowed) ** exp
    return round(rate / (rate + 1), 3)

print('wins w/out correa:', pythagorean(twins_minus_correa_runs_per_season, runs_allowed) * 162)
print('wins w correa:', pythagorean(twins_runs_per_season, runs_allowed) * 162)

print(
    'wins created:',
    (pythagorean(twins_runs_per_season, runs_allowed) * 162) - (pythagorean(twins_minus_correa_runs_per_season, runs_allowed) * 162)
)

wins w/out correa: 79.056
wins w correa: 82.782
wins created: 3.725999999999999
