In [1]:
import sys
import math
import random
import numpy as np
import pandas as pd
import seaborn as sns

sys.path.insert(0, '../../pipelines/tasks/mlb/')

from common.sim.state import Inning
from common.sim.models import EventVariableFactory, EventCodes, all_event_codes, BattersFactory
from common.sim.engines import InningSimulator

In [2]:
columns = ['PA', 'AB', 'SH', 'SF', 'K', 'BB', 'HBP', '1B', '2B', '3B', 'HR', 'R', 'G']

In [3]:
df_players = pd.read_csv('../../data/mlb/batters/player_aggregates.csv', index_col=None)
df_players.season = df_players.season.astype(str)
df_players = df_players[np.logical_and(df_players.player == 'correca01', df_players.season == '2022')]
df_players = df_players.rename(columns={
    'SO': 'K'
})

correa = df_players[columns].to_dict('records')[0]

ichiro = {
    'AB': 704, ## Appearance
    'SH': 2, ## Sac Bunts
    'SF': 3, ## Sac Flys
    'K': 63,
    'BB': 49,
    'HBP': 4,
    '1B': 225,
    '2B': 24,
    '3B': 5,
    'HR': 8
}

df_teams = pd.read_csv('../../data/mlb/batters/season_aggregates.csv', index_col=None)
df_teams.season = df_teams.season.astype(str)
df_teams = df_teams[df_teams.season == '2022']
df_teams = df_teams.rename(columns={
    'SO': 'K'
})

avg_team = df_teams[columns].mean().to_frame().T.to_dict('records')[0]
for key in avg_team.keys():
    avg_team[key] = math.floor(avg_team[key])
    
avg_team['R/G'] = avg_team['R'] / avg_team['G']

twins_columns = columns + ['R/G']
twins = df_teams.loc[df_teams.team == 17, columns + ['R/G']].to_dict('records')[0]

In [4]:
avg_innings_per_game = (26.72 / 3)

def run_simulation(batters, iterations=50000):
    inning_simulator = InningSimulator(batters)

    runs = 0
    for i in range(iterations):
        inning = inning_simulator.play()
        runs += inning.history[-1].runs

    return runs / iterations

#### Ichiro - 2004

In [5]:
batters = BattersFactory().create_batters([
    ('ichiro', ichiro, 1)
])

ichiro_runs = run_simulation(batters)

print(ichiro_runs)
print(ichiro_runs * avg_innings_per_game, 'runs per game')

runs_per_season = (ichiro_runs * avg_innings_per_game) * 162
print(runs_per_season, 'runs per season')

0.80284
7.150628266666667 runs per game
1158.4017792 runs per season


#### Average Team - 2022

In [6]:
batters = BattersFactory().create_batters([
    ('avg_team', avg_team, 1)
])

avg_team_runs = run_simulation(batters)

print(avg_team_runs)
print(avg_team_runs * avg_innings_per_game, 'runs per game', 'vs', avg_team['R/G'])
print((avg_team_runs * avg_innings_per_game) * 162, 'runs per season')

0.46834
4.171348266666667 runs per game vs 4.277777777777778
675.7584191999999 runs per season


#### Correa - 2022

In [7]:
batters = BattersFactory().create_batters([
    ('correa', correa, 1)
])

correa_runs = run_simulation(batters)

print(correa_runs)
print(correa_runs * avg_innings_per_game, 'runs per game')
print((correa_runs * avg_innings_per_game) * 162, 'runs per season')

0.68458
6.097325866666666 runs per game
987.7667903999999 runs per season


#### Correa on Average Team - 2022

In [8]:
correa_hits_prob = correa['PA'] / avg_team['PA']
avg_team_hits_prob = 1 - correa_hits_prob

## have correa play ~ 9 % of the PA for an avg team
batters = BattersFactory().create_batters([
    ('correa', correa, correa_hits_prob),
    ('avg_team', avg_team, avg_team_hits_prob)
])

correa_avg_team_runs = run_simulation(batters)

print(correa_avg_team_runs)
print(correa_avg_team_runs * avg_innings_per_game, 'runs per game')
print((correa_avg_team_runs * avg_innings_per_game) * 162, 'runs per season')

0.4796
4.2716373333333335 runs per game
692.005248 runs per season


#### Twins - 2022

In [9]:
batters = BattersFactory().create_batters([
    ('twins', twins, 1),
])

twins_runs = run_simulation(batters)

print(twins_runs)
print(twins_runs * avg_innings_per_game, 'runs per game', 'vs', twins['R/G'])
twins_runs_per_season = (twins_runs * avg_innings_per_game) * 162
print(twins_runs_per_season, 'runs per season', 'vs', twins['R/G'] * 162)

0.48448
4.315101866666667 runs per game vs 4.3
699.0465024 runs per season vs 696.6


#### Twins minus Correa - 2022

In [10]:
def remove_player_from_team(team, player):
    team_minus_player = team.copy()

    for key in player.keys():
        team_minus_player[key] = team_minus_player[key] - player[key]

    return team_minus_player

In [11]:
batters = BattersFactory().create_batters([
    ('twins_minus_correa', remove_player_from_team(twins, correa), 1),
])

twins_minus_correa_runs = run_simulation(batters)

print(twins_minus_correa_runs)
print(twins_minus_correa_runs * avg_innings_per_game, 'runs per game')

twins_minus_correa_runs_per_season = (twins_minus_correa_runs * avg_innings_per_game) * 162
print(twins_minus_correa_runs_per_season, 'runs per season')

0.46374
4.1303776 runs per game
669.1211712 runs per season


#### Wins Created by Correa for 2022 Twins

In [12]:
## 82-80, 696 Runs, 684 Runs Allowed

runs_allowed = 684

def pythagorean(runs_scored, runs_allowed, exp = 2):
    rate = (runs_scored / runs_allowed) ** exp
    return round(rate / (rate + 1), 3)

print('wins w/out correa:', pythagorean(twins_minus_correa_runs_per_season, runs_allowed) * 162)
print('wins w correa:', pythagorean(twins_runs_per_season, runs_allowed) * 162)

print(
    'wins created:',
    (pythagorean(twins_runs_per_season, runs_allowed) * 162) - (pythagorean(twins_minus_correa_runs_per_season, runs_allowed) * 162)
)

wins w/out correa: 79.218
wins w correa: 82.782
wins created: 3.563999999999993
