In [1]:
from datetime import date
import pandas as pd
from pybaseball import playerid_lookup, statcast_batter

def get_player_id(last, first):
    df = playerid_lookup(last, first)
    if df.empty:
        raise ValueError("Player not found.")
    return int(df.iloc[0]["key_mlbam"])

def batter_statcast_by_park(mlbam_id: int, years=(2021, 2022)) -> pd.DataFrame:
    # Pull pitch-by-pitch Statcast for each year and stack
    frames = []
    for y in years:
        # Wide date window safely covers regular + postseason
        frames.append(statcast_batter(f"{y}-01-01", f"{y}-12-31", mlbam_id))
    data = pd.concat(frames, ignore_index=True)

    # Prefer venue name if present; otherwise fall back to home_team as a proxy for park
    venue_col = "venue_name" if "venue_name" in data.columns else "home_team"
    data["venue"] = data[venue_col]

    # Columns for expected stats (names used by Baseball Savant exports)
    xwoba_col = "estimated_woba_using_speedangle"
    xslg_col  = "estimated_slg_using_speedangle"
    xba_col   = "estimated_ba_using_speedangle"

    # Keep only rows where expected stats exist (i.e., batted balls)
    keep_cols = ["game_date", "home_team", "away_team", "venue", xwoba_col, xslg_col, xba_col, "player_name"]
    have_cols = [c for c in keep_cols if c in data.columns]
    bb = data[have_cols].dropna(subset=[c for c in [xwoba_col, xslg_col] if c in have_cols])

    # Add year for easy filtering
    bb["year"] = pd.to_datetime(bb["game_date"]).dt.year

    # Aggregate by year × venue
    agg = (
        bb.groupby(["year", "venue"])
          .agg(
              PA=("game_date", "count"),                     # number of batted balls in this split
              xwOBA_mean=(xwoba_col, "mean"),
              xSLG_mean=(xslg_col, "mean"),
              xBA_mean =(xba_col, "mean") if xba_col in bb.columns else ("game_date", "size")
          )
          .reset_index()
          .sort_values(["year", "PA"], ascending=[True, False])
    )
    # Optional: round for readability
    for col in ["xwOBA_mean", "xSLG_mean", "xBA_mean"]:
        if col in agg.columns:
            agg[col] = agg[col].round(3)
    return agg

# ---- Example: Mike Trout 2021–2022 ----
mlbam = get_player_id("trout", "mike")
by_park = batter_statcast_by_park(mlbam, years=(2021, 2022))
print(by_park.head(12))


Gathering player lookup table. This may take a moment.
Gathering Player Data
Gathering Player Data
    year venue   PA  xwOBA_mean  xSLG_mean  xBA_mean
3   2021   LAA   30       0.630      1.016     0.466
5   2021   TEX   11       0.432      0.565     0.408
4   2021   SEA    9       0.511      0.727     0.450
6   2021   TOR    8       0.775      1.246     0.601
2   2021    KC    7       0.678      1.048     0.568
0   2021   BOS    6       0.264      0.344     0.272
1   2021   HOU    4       0.513      0.839     0.383
13  2022   LAA  143       0.566      0.912     0.408
22  2022   TEX   30       0.597      0.895     0.475
18  2022   OAK   18       0.347      0.515     0.291
7   2022   BAL   13       0.581      0.971     0.397
16  2022   MIN   10       0.630      1.032     0.459
