In [1]:
# requirements: pip install MLB-StatsAPI pandas requests

import json
import unicodedata
from urllib.parse import urlencode
from urllib.request import urlopen, URLError

import pandas as pd
import requests

# Optional: MLB StatsAPI (pip install MLB-StatsAPI). Used if available.
try:
    import statsapi  # type: ignore
except Exception:
    statsapi = None


# ================================
# Schedule / probables utilities
# ================================
def opposing_probables_by_team(date_str: str, teams: list[str]) -> pd.DataFrame:
    """
    For each team name (e.g., 'Yankees', 'Mets'), find the game on date_str (MM/DD/YYYY)
    and return the opponent and the opponent's probable starting pitcher.

    Handles doubleheaders by returning one row per game (Gm 1 / Gm 2).
    """
    if statsapi is None:
        raise RuntimeError("statsapi is required for schedule lookup. Install with: pip install MLB-StatsAPI")

    games = statsapi.schedule(date=date_str)
    rows = []
    for team in teams:
        tg = [
            g for g in games
            if team.lower() in g.get("home_name", "").lower() or team.lower() in g.get("away_name", "").lower()
        ]
        if not tg:
            rows.append({
                "team": team,
                "opponent": None,
                "opposing_probable": "No game found",
                "game_datetime": None,
                "game_pk": None,
                "doubleheader": None,
            })
            continue

        for g in tg:  # support DHs
            if team.lower() in g.get("home_name", "").lower():
                opponent = g.get("away_name")
                opp_sp = g.get("away_probable_pitcher") or "TBD"
            else:
                opponent = g.get("home_name")
                opp_sp = g.get("home_probable_pitcher") or "TBD"

            rows.append({
                "team": team,
                "opponent": opponent,
                "opposing_probable": opp_sp,
                "game_datetime": g.get("game_datetime"),
                "game_pk": g.get("game_id") or g.get("game_pk"),
                "doubleheader": g.get("doubleheader"),  # 'Y' if DH, else ''
            })

    df = pd.DataFrame(rows).sort_values(["team", "game_datetime"]).reset_index(drop=True)
    return df


def opposing_probables_for_players(date_str: str, player_to_team: dict[str, str]) -> pd.DataFrame:
    """
    Convenience wrapper: give it {player_name: team_name}, get a table per player (handles DHs).
    """
    teams = sorted(set(player_to_team.values()))
    team_df = opposing_probables_by_team(date_str, teams)

    out_rows = []
    for player, team in player_to_team.items():
        matches = team_df[team_df["team"] == team]
        if matches.empty:
            out_rows.append({
                "player": player, "team": team, "opponent": None,
                "opposing_probable": "No game found", "game_datetime": None
            })
        else:
            for _, r in matches.iterrows():
                out_rows.append({
                    "player": player,
                    "team": team,
                    "opponent": r["opponent"],
                    "opposing_probable": r["opposing_probable"],
                    "game_datetime": r["game_datetime"],
                })

    return pd.DataFrame(out_rows).sort_values(["team", "player", "game_datetime"]).reset_index(drop=True)


# ================================
# Pitcher ID / IP helpers
# ================================
def _strip_accents(s: str) -> str:
    return unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")


def _requests_json(url: str, params: dict) -> dict:
    try:
        r = requests.get(url, params=params, timeout=10)
        r.raise_for_status()
        return r.json()
    except Exception:
        return {}


def fetch_person_id(name: str) -> int | None:
    """Robust name → MLB personId. Tries statsapi, then raw HTTP search endpoints (with accent fallbacks)."""
    # 1) Try MLB-StatsAPI if available
    if statsapi is not None:
        try:
            people = statsapi.lookup_player(name) or []
            if people:
                # prefer exact full name match (case-insensitive)
                exact = [p for p in people if str(p.get("fullName", "")).lower() == name.lower()]
                # else prefer pitchers; else first result
                pool = exact or [p for p in people if p.get("primaryPosition", {}).get("abbreviation") == "P"] or people
                pid = pool[0].get("id")
                if pid:
                    return int(pid)
        except Exception:
            pass

    # 2) MLB public endpoints via requests
    attempts = [
        ("https://statsapi.mlb.com/api/v1/people",        {"search": name,                 "sportId": 1}, "people"),
        ("https://statsapi.mlb.com/api/v1/people/search", {"names":  name},                               "searchPeople"),
        ("https://statsapi.mlb.com/api/v1/people",        {"search": _strip_accents(name), "sportId": 1}, "people"),
        ("https://statsapi.mlb.com/api/v1/people/search", {"names":  _strip_accents(name)},               "searchPeople"),
    ]
    for url, params, root in attempts:
        data = _requests_json(url, params)
        arr = data.get(root, []) if isinstance(data, dict) else []
        if not arr:
            continue
        first = arr[0]
        pid = first.get("id") or first.get("personId")
        if pid:
            return int(pid)
    return None


def fetch_season_outs(person_id: int, year: int) -> int:
    """Regular season only (gameType=R). Returns OUTS as an integer."""
    if not person_id:
        return 0
    url = f"https://statsapi.mlb.com/api/v1/people/{person_id}/stats"
    params = {"stats": "season", "group": "pitching", "season": year, "gameType": "R"}
    try:
        r = requests.get(url, params=params, timeout=10)
        r.raise_for_status()
        data = r.json()
        stats = data.get("stats", []) or []
        splits = stats[0].get("splits", []) if stats else []
        stat = splits[0].get("stat", {}) if splits else {}
        outs = stat.get("outs") or stat.get("outsPitched") or 0
        return int(outs)
    except Exception:
        return 0


def outs_to_baseball_ip(outs: int) -> str:
    """Convert outs → baseball innings string like '174.1' (174 IP and 1 out)."""
    return f"{outs // 3}.{outs % 3}"


def attach_pitcher_ip(pairs_df: pd.DataFrame, year_curr=2025, year_prev=2024) -> pd.DataFrame:
    """Adds person_id, outs_YYYY and ip_YYYY (baseball decimals) to your pairs table."""
    names = list(dict.fromkeys(pairs_df["opposing_probable"].astype(str)))
    id_map = {n: fetch_person_id(n) for n in names}

    rows = []
    for n in names:
        pid = id_map.get(n)
        o_curr = fetch_season_outs(pid, year_curr)
        o_prev = fetch_season_outs(pid, year_prev)
        rows.append({
            "opposing_probable": n,
            "person_id": pid,
            f"outs_{year_curr}": o_curr,
            f"outs_{year_prev}": o_prev,
            f"ip_{year_curr}": outs_to_baseball_ip(o_curr),
            f"ip_{year_prev}": outs_to_baseball_ip(o_prev),
        })
    ip_df = pd.DataFrame(rows)
    return pairs_df.merge(ip_df, on="opposing_probable", how="left")


def filter_experience(df_with_ip: pd.DataFrame, min_total_ip=150, year_curr=2025, year_prev=2024):
    """Keep only rows where (IP_curr + IP_prev) >= min_total_ip (default 150)."""
    oc = f"outs_{year_curr}"
    op = f"outs_{year_prev}"
    need = df_with_ip.copy()
    need[oc] = need[oc].fillna(0).astype(int)
    need[op] = need[op].fillna(0).astype(int)

    # convert outs → innings pitched
    total_ip = (need[oc] + need[op]) / 3.0

    qualified = need[total_ip >= min_total_ip].copy()
    dropped   = need[total_ip <  min_total_ip].copy()
    return qualified, dropped


# ================================
# Players → Teams mapping
# (kept as provided)
# ================================
player_to_team = {
    # Yankees
    "Aaron Judge": "Yankees",

    # Mets
    "Jeff McNeil": "Mets",
    "Pete Alonso": "Mets",
    "Francisco Lindor": "Mets",
    "Brandon Nimmo": "Mets",
    "Juan Soto": "Mets",

    # Braves
    "Ozzie Albies": "Braves",
    "Matt Olson": "Braves",
    "Marcell Ozuna": "Braves",
    "Austin Riley": "Braves",

    # Rangers
    "Marcus Semien": "Rangers",
    "Nathaniel Lowe": "Rangers",
    "Josh Smith": "Rangers",

    # Phillies
    "Nick Castellanos": "Phillies",
    "Trea Turner": "Phillies",
    "Kyle Schwarber": "Phillies",

    # Brewers
    "William Contreras": "Brewers",
    "Brice Turang": "Brewers",

    # Dodgers
    "Mookie Betts": "Dodgers",
    "Andy Pages": "Dodgers",

    # Red Sox
    "Jarren Duran": "Red Sox",

    # Blue Jays
    "Bo Bichette": "Blue Jays",
    "Ernie Clement": "Blue Jays",

    # Astros
    "Christian Walker": "Astros",

    # Cubs
    "Nico Hoerner": "Cubs",
    "Dansby Swanson": "Cubs",
    "Kyle Tucker": "Cubs",

    # Padres
    "Xander Bogaerts": "Padres",
    "Jake Cronenworth": "Padres",
    "Manny Machado": "Padres",
    "Josh Naylor": "Padres",

    # Reds
    "Spencer Steer": "Reds",
    "Jonathan India": "Royals",

    # Athletics
    "Brent Rooker": "Athletics",
    "Lawrence Butler": "Athletics",

    # Mariners
    "Cal Raleigh": "Mariners",

    # Angels
    "Taylor Ward": "Angels",
    "Nolan Schanuel": "Angels",

    # Guardians
    "Steven Kwan": "Guardians",

    # Pirates
    "Bryan Reynolds": "Pirates",

    # Cardinals
    "Masyn Winn": "Cardinals",
    "Brendan Donovan": "Cardinals",
    "Alec Burleson": "Cardinals",

    # Marlins
    "Otto Lopez": "Marlins",

    # Nationals
    "CJ Abrams": "Nationals",

    # Orioles
    "Gunnar Henderson": "Orioles",

    # White Sox
    "Gavin Sheets": "White Sox",

    # Tigers
    "Gleyber Torres": "Tigers",

    # Giants
    "Willy Adames": "Giants",
    "Rafael Devers": "Giants"

}


# ================================
# Main
# ================================
if __name__ == "__main__":
    date_str = "07/06/2025"

    # Step 1: Who are the opposing probables for each player’s team?
    df = opposing_probables_for_players(date_str, player_to_team)
    print(df.to_string(index=False))

    # Step 2: Attach pitcher IDs and season IP (2025 & 2024)
    df_ip = attach_pitcher_ip(df, year_curr=2025, year_prev=2024)

    # Show the pitcher → IP lookup so you can sanity-check the IDs
    cols_lookup = ["opposing_probable", "person_id", "ip_2025", "ip_2024"]
    print("\nPitcher IP lookup:")
    print(
        df_ip[cols_lookup]
        .drop_duplicates()
        .sort_values("opposing_probable")
        .to_string(index=False)
    )

    # Step 3: Filter by combined experience (IP_2025 + IP_2024 >= 150)
    qualified, dropped = filter_experience(df_ip, min_total_ip=150, year_curr=2025, year_prev=2024)

    show_cols = ["player", "team", "opponent", "opposing_probable", "ip_2025", "ip_2024", "game_datetime"]

    print("\nQualified hitter–pitcher pairs (SP experience OK):")
    if not qualified.empty:
        print(qualified[show_cols].to_string(index=False))
    else:
        print("(none)")

    print("\nDropped (SP lacks combined experience):")
    if not dropped.empty:
        print(dropped[show_cols].to_string(index=False))
    else:
        print("(none)")

           player      team              opponent    opposing_probable        game_datetime
   Nolan Schanuel    Angels     Toronto Blue Jays        Kevin Gausman 2025-07-06T17:37:00Z
      Taylor Ward    Angels     Toronto Blue Jays        Kevin Gausman 2025-07-06T17:37:00Z
 Christian Walker    Astros   Los Angeles Dodgers        Emmet Sheehan 2025-07-06T20:10:00Z
     Brent Rooker Athletics  San Francisco Giants      Hayden Birdsong 2025-07-07T02:05:00Z
  Lawrence Butler Athletics  San Francisco Giants      Hayden Birdsong 2025-07-07T02:05:00Z
      Bo Bichette Blue Jays    Los Angeles Angels       Tyler Anderson 2025-07-06T17:37:00Z
    Ernie Clement Blue Jays    Los Angeles Angels       Tyler Anderson 2025-07-06T17:37:00Z
     Austin Riley    Braves     Baltimore Orioles        Trevor Rogers 2025-07-06T15:35:00Z
    Marcell Ozuna    Braves     Baltimore Orioles        Trevor Rogers 2025-07-06T15:35:00Z
       Matt Olson    Braves     Baltimore Orioles        Trevor Rogers 2025-07-0