# Setting Up Simulation

## Loading Probs

In [1]:
from sim.load_data import load_hitters, load_pitchers
import pandas as pd
from sim import simulate_game

rename_map = {
    "out_rate_pred": "out",
    "so_rate_pred": "so",
    "bb_rate_pred": "walk",
    "hbp_rate_pred": "hbp",
    "roe_rate_pred": "roe",
    "single_rate_pred": "single",
    "double_rate_pred": "double",
    "triple_rate_pred": "triple",
    "hr_rate_pred": "hr",
}

hitter_probs = pd.read_csv("data/hitter_probs.csv").rename(columns=rename_map)
pitcher_probs = pd.read_csv("data/pitcher_probs.csv").rename(columns=rename_map)

# Load players
hitters = load_hitters("data/hitter_probs.csv")
pitchers = load_pitchers("data/pitcher_probs.csv")

print(f"{len(hitters)} hitters loaded, {len(pitchers)} pitchers loaded.")
print("Example hitter objects:")
for h in hitters[:3]:
    print(f"  {h.name} ({h.hand}) â€” outcomes: {list(h.probs.keys())[:5]}...")

55 hitters loaded, 60 pitchers loaded.
Example hitter objects:
  Shohei Ohtani (L) â€” outcomes: ['out', 'so', 'bb', 'hbp', 'roe']...
  George Springer (R) â€” outcomes: ['out', 'so', 'bb', 'hbp', 'roe']...
  George Springer (R) â€” outcomes: ['out', 'so', 'bb', 'hbp', 'roe']...


## Checking for Missing Players

In [2]:
# Load hitters
hitters = load_hitters("data/hitter_probs.csv")

# Deduplicate hitters by player name (keep first occurrence)
unique_hitters = {}
for p in hitters:
    if p.name not in unique_hitters:
        unique_hitters[p.name] = p
hitters = list(unique_hitters.values())

desired_names = [
    "Shohei Ohtani", "Mookie Betts", "Will Smith",
    "Freddie Freeman", "Tommy Edman", "Teoscar HernÃ¡ndez",
    "Max Muncy", "Enrique HernÃ¡ndez", "Andy Pages"
]

found_names = [p.name for p in hitters if p.name in desired_names]

print("Found:", len(found_names), "of", len(desired_names))
print("Missing names:", [n for n in desired_names if n not in [p.name for p in hitters]])


Found: 9 of 9
Missing names: []


In [3]:
import pandas as pd

df = pd.read_csv("data/hitter_probs.csv")

# Look for names containing part of the missing names
print(df[df["full_name"].str.contains("Bichette", case=False, na=False)]["full_name"])
print(df[df["full_name"].str.contains("e Hern", case=False, na=False)]["full_name"])


12    Bo Bichette
16    Bo Bichette
Name: full_name, dtype: object
24    Enrique HernÃ¡ndez
42    Enrique HernÃ¡ndez
Name: full_name, dtype: object


## Setting Lineups

In [4]:
# Load hitters
hitters = load_hitters("data/hitter_probs.csv")

# Deduplicate hitters by player name (keep first occurrence)
unique_hitters = {}
for p in hitters:
    if p.name not in unique_hitters:
        unique_hitters[p.name] = p
hitters = list(unique_hitters.values())

# Blue Jays Game 1 lineup
bluejays_lineup = [p for p in hitters if p.name in [
    "George Springer", "Davis Schneider", "Vladimir Guerrero Jr.",
    "Bo Bichette", "Alejandro Kirk", "Ernie Clement", "Daulton Varsho",
    "Isiah Kiner-Falefa", "AndrÃ©s GimÃ©nez"
]]

# Dodgers Game 1 lineup
dodgers_lineup = [p for p in hitters if p.name in [
    "Shohei Ohtani", "Mookie Betts", "Will Smith",
    "Freddie Freeman", "Tommy Edman", "Teoscar HernÃ¡ndez",
    "Max Muncy", "Enrique HernÃ¡ndez", "Andy Pages"
]]

# Optional sanity check
print("Blue Jays lineup:")
for i, p in enumerate(bluejays_lineup, start=1):
    print(f"{i}. {p.name} ({p.hand})")

print("\nDodgers lineup:")
for i, p in enumerate(dodgers_lineup, start=1):
    print(f"{i}. {p.name} ({p.hand})")


Blue Jays lineup:
1. George Springer (R)
2. Vladimir Guerrero Jr. (R)
3. Daulton Varsho (L)
4. Alejandro Kirk (R)
5. Davis Schneider (R)
6. Bo Bichette (R)
7. Ernie Clement (R)
8. AndrÃ©s GimÃ©nez (L)
9. Isiah Kiner-Falefa (R)

Dodgers lineup:
1. Shohei Ohtani (L)
2. Max Muncy (L)
3. Will Smith (R)
4. Freddie Freeman (L)
5. Mookie Betts (R)
6. Andy Pages (R)
7. Enrique HernÃ¡ndez (R)
8. Teoscar HernÃ¡ndez (R)
9. Tommy Edman (S)


## Setting Pitching Staffs

In [5]:
# --- Deduplicate pitchers ---
unique_pitchers = {p.name: p for p in pitchers}.values()


# --- BLUE JAYS bullpen (relievers only) ---
bluejays_bullpen = [
    p for p in unique_pitchers if p.name in [
        "Jeff Hoffman", "Seranthony DomÃ­nguez", "Louis Varland",
        "Brendon Little", "Chris Bassitt", "Mason Fluharty",
        "Braydon Fisher", "Eric Lauer"
    ]
]

# --- DODGERS bullpen (relievers only) ---
dodgers_bullpen = [
    p for p in unique_pitchers if p.name in [
        "Roki Sasaki", "Blake Treinen", "Anthony Banda", "Emmet Sheehan",
        "Jack Dreyer", "Edgardo Henriquez", "Justin Wrobleski",
        "Will Klein", "Clayton Kershaw"
    ]
]

# --- STARTING PITCHERS ---
team_bluejays_starting_pitcher = next(p for p in unique_pitchers if p.name == "Trey Yesavage")
team_dodgers_starting_pitcher   = next(p for p in unique_pitchers if p.name == "Blake Snell")

# Insert starters at the top of each bullpen
bluejays_bullpen.insert(0, team_bluejays_starting_pitcher)
dodgers_bullpen.insert(0, team_dodgers_starting_pitcher)

# --- BUILD TEAMS ---
from sim import Team

team_bluejays = Team(name="Blue Jays", lineup=bluejays_lineup, bullpen=bluejays_bullpen)
team_dodgers  = Team(name="Dodgers", lineup=dodgers_lineup, bullpen=dodgers_bullpen)

# --- Build reserves (team filtering now works!) ---
team_bluejays.reserves = [
    p for p in unique_pitchers
    if p.team == "TOR"
    and p.name not in [team_bluejays.starting_pitcher.name] + [bp.name for bp in team_bluejays.bullpen]
]

team_dodgers.reserves = [
    p for p in unique_pitchers
    if p.team == "LAD"
    and p.name not in [team_dodgers.starting_pitcher.name] + [bp.name for bp in team_dodgers.bullpen]
]


# --- VERIFY ---
print(f"{team_bluejays.name} starter:", team_bluejays.get_pitcher().name)
print(f"{team_dodgers.name} starter:", team_dodgers.get_pitcher().name)
print(f"{team_bluejays.name} bullpen:", [p.name for p in team_bluejays.bullpen])
print(f"{team_dodgers.name} bullpen:", [p.name for p in team_dodgers.bullpen])
print(f"{team_bluejays.name} reserves:", [p.name for p in team_bluejays.reserves])
print(f"{team_dodgers.name} reserves:", [p.name for p in team_dodgers.reserves])


Blue Jays starter: Trey Yesavage
Dodgers starter: Blake Snell
Blue Jays bullpen: ['Trey Yesavage', 'Louis Varland', 'Brendon Little', 'Mason Fluharty', 'Jeff Hoffman', 'Chris Bassitt', 'Eric Lauer', 'Seranthony DomÃ­nguez', 'Braydon Fisher']
Dodgers bullpen: ['Blake Snell', 'Justin Wrobleski', 'Jack Dreyer', 'Clayton Kershaw', 'Emmet Sheehan', 'Anthony Banda', 'Blake Treinen', 'Roki Sasaki']
Blue Jays reserves: ['Shane Bieber', 'Kevin Gausman', 'Yariel RodrÃ­guez', 'Isiah Kiner-Falefa', 'Max Scherzer', 'Tyler Heineman']
Dodgers reserves: ['Alex Vesia', 'Yoshinobu Yamamoto', 'Tyler Glasnow', 'Shohei Ohtani', 'Ben Casparius', 'Miguel Rojas', 'Enrique HernÃ¡ndez']


# Running Simulations

In [None]:
# from importlib import reload
# import sim.simulate
# reload(sim.simulate)
# from sim import run_simulations


## Individual Sim

In [6]:
import copy
from sim import simulate_game
import pandas as pd

# Deepcopy ensures fresh bullpens, lineups, and fatigue every time
t1 = copy.deepcopy(team_bluejays)
t2 = copy.deepcopy(team_dodgers)

result = simulate_game(
    team1=t1,
    team2=t2,
    hitter_probs=hitter_probs,
    pitcher_probs=pitcher_probs,
    verbose=True
)
print(result)




Top 1: Blue Jays batting vs Dodgers
End of Top 1: {'Blue Jays': 0, 'Dodgers': 0}

Bottom 1: Dodgers batting vs Blue Jays
End of 1: {'Blue Jays': 0, 'Dodgers': 0}

Top 2: Blue Jays batting vs Dodgers
End of Top 2: {'Blue Jays': 0, 'Dodgers': 0}

Bottom 2: Dodgers batting vs Blue Jays
End of 2: {'Blue Jays': 0, 'Dodgers': 0}

Top 3: Blue Jays batting vs Dodgers
End of Top 3: {'Blue Jays': 0, 'Dodgers': 0}

Bottom 3: Dodgers batting vs Blue Jays
End of 3: {'Blue Jays': 0, 'Dodgers': 2}

Top 4: Blue Jays batting vs Dodgers
End of Top 4: {'Blue Jays': 0, 'Dodgers': 2}

Bottom 4: Dodgers batting vs Blue Jays
End of 4: {'Blue Jays': 0, 'Dodgers': 2}

Top 5: Blue Jays batting vs Dodgers
ðŸ§® Manager selects Justin Wrobleski (Avg DRA-: 68.1)
End of Top 5: {'Blue Jays': 0, 'Dodgers': 2}

Bottom 5: Dodgers batting vs Blue Jays
ðŸ§® Manager selects Louis Varland (Avg DRA-: 65.7)
End of 5: {'Blue Jays': 0, 'Dodgers': 2}

Top 6: Blue Jays batting vs Dodgers
ðŸ§® Manager selects Emmet Sheehan (Avg D

## Multi-Sim

In [None]:
# import sys
# import importlib

# # Remove any cached reference
# sys.modules.pop('sim.game', None)

# # Re-import the module
# import sim.game
# importlib.reload(sim.game)

# from sim.game import simulate_game


### Run

In [7]:
import copy
import pandas as pd
from sim import simulate_game

n_sims = 1000
all_results = []
hitting_boxscores = {}
pitching_boxscores = {}

for i in range(n_sims):
    t1 = copy.deepcopy(team_bluejays)
    t2 = copy.deepcopy(team_dodgers)

    # Unpack all three return values
    score, hit_df, pit_df, log_df = simulate_game(
        team1=t1,
        team2=t2,
        hitter_probs=hitter_probs,
        pitcher_probs=pitcher_probs,
        verbose=False
    )


    game_id = f"Game_{i+1}"
    all_results.append({
        "game_id": game_id,
        "Blue Jays": score["Blue Jays"],
        "Dodgers": score["Dodgers"],
        "winner": "Blue Jays" if score["Blue Jays"] > score["Dodgers"] else "Dodgers"
    })

    # Save full boxscores for both hitting and pitching
    hitting_boxscores[game_id] = hit_df
    pitching_boxscores[game_id] = pit_df

# Convert summary results into a DataFrame
results_df = pd.DataFrame(all_results)

# Display win percentages
display(results_df["winner"].value_counts(normalize=True))

print(f"âœ… Completed {n_sims} simulations.")


: 

: 

### Blowout Games

In [None]:
# Find Blue Jays blowout wins
results_df[(results_df["Blue Jays"] - results_df["Dodgers"]) >= 10.head()


### Individual Game Finders

In [None]:
# Biggest Jays Blowout Win
game_id = "Game_4"

print(f"ðŸ“Š Hitting Boxscore â€“ {game_id}")
display(hitting_boxscores[game_id])

print(f"\nðŸŽ¯ Pitching Boxscore â€“ {game_id}")
display(pitching_boxscores[game_id])



### Visualize Results

### Write Results to CSV

In [None]:
# results_df.to_csv(f"data/sim_results_{n_sims}.csv", index=False)
# # save each boxscore separately if you want to inspect later
# import pickle
# with open("data/sim_boxscores.pkl", "wb") as f:
#     pickle.dump(boxscores, f)
