In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import statsapi
from concurrent.futures import ThreadPoolExecutor
from pybaseball import team_batting
import sys
import datetime

In [7]:
def parse_pitcher_stats(raw_data, name):
    lines = [line.strip() for line in raw_data.split('\n') if line.strip()]
    relevant_lines = lines[2:]
    data = {}
    print(relevant_lines)
    for line in relevant_lines:
        key, value = line.split(':')
        data[key.strip()] = value.strip()
    specific_fields = {
        "Name": name,
        "GP": data.get("gamesPlayed"),
        "AB": data.get("atBats"),
        "AVG": data.get("avg"),
        "S%": data.get("strikePercentage"),
        "P/I": data.get("pitchesPerInning"),
        "K": data.get("strikeOuts"),
        "K/9": data.get("strikeoutsPer9Inn")
    }
    return specific_fields

In [8]:
def fetch_pitcher_stats(name, team, opponent):
    try:
        player = statsapi.lookup_player(name)
        if not player:
            raise ValueError(f"Player {name} not found")
        player_id = player[0]['id']
        stats = statsapi.player_stats(player_id, group="[pitching]", type="season")
        pitcher_stats = parse_pitcher_stats(stats, name)
        pitcher_stats["Opponent"] = opponent
        return pitcher_stats
    except Exception as e:
        return {"Name": name, "Team": team, "Opponent": opponent, "Error": str(e)}

def get_team_full_name(abbreviation):
    team_mapping = {
        "SEA": "Seattle Mariners",
        "OAK": "Oakland Athletics",
        "CIN": "Cincinnati Reds",
        "BOS": "Boston Red Sox",
        "COL": "Colorado Rockies",
        "PIT": "Pittsburgh Pirates",
        "TBR": "Tampa Bay Rays",
        "DET": "Detroit Tigers",
        "MIN": "Minnesota Twins",
        "CHC": "Chicago Cubs",
        "ATL": "Atlanta Braves",
        "MIL": "Milwaukee Brewers",
        "CHW": "Chicago White Sox",
        "LAA": "Los Angeles Angels",
        "STL": "St. Louis Cardinals",
        "WSN": "Washington Nationals",
        "LAD": "Los Angeles Dodgers",
        "PHI": "Philadelphia Phillies",
        "BAL": "Baltimore Orioles",
        "SFG": "San Francisco Giants",
        "MIA": "Miami Marlins",
        "TEX": "Texas Rangers",
        "NYM": "New York Mets",
        "ARI": "Arizona Diamondbacks",
        "CLE": "Cleveland Guardians",
        "TOR": "Toronto Blue Jays",
        "NYY": "New York Yankees",
        "SDP": "San Diego Padres",
        "KCR": "Kansas City Royals",
        "HOU": "Houston Astros"
    }
    return team_mapping.get(abbreviation, "Unknown")

In [9]:
def get_pitcher_data(pitcher_div):
    player_info = pitcher_div.find('div', class_='player-info')
    if not player_info:
        return 'Unknown',"N/A","N/A"
    
    name_tag = player_info.find('h3').find('a')
    name = name_tag.get_text(strip=True) if name_tag else 'Unknown'
    
    probable_stats = pitcher_div.find('p', class_='probable-stats')
    if probable_stats:
        table = probable_stats.find('table', class_='pitcher-stats')
        if table:
            rows = table.find_all('tr')
            if len(rows) > 1:
                data_row = rows[1].find_all('td')
                if len(data_row) >= 2:
                    pa = data_row[0].get_text(strip=True)
                    k_percentage = data_row[1].get_text(strip=True)
                    return name, pa, k_percentage
    return name, 0, 0 

In [10]:
def getOppData(date):
    date_obj = datetime.datetime.strptime(date, "%m/%d/%Y")
    converted_date = date_obj.strftime("%Y-%m-%d")
    url = f"https://baseballsavant.mlb.com/probable-pitchers?date={converted_date}"
    response = requests.get(url)

    data = []

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        blocks = soup.find_all('div', class_='mod')

        for block in blocks:
            cols = block.find_all('div', class_='col')
            for col in cols:
                name, pa, k_percentage = get_pitcher_data(col)
                data.append({"Pitcher": name, "PA": pa, "K%": k_percentage})
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")

    df = pd.DataFrame(data)
    return df

In [11]:
def main(date):
    # Fetch the schedule for the given date
    sched = statsapi.schedule(start_date=date, end_date=date)
    pitcher_tasks = []

    # Collect tasks for fetching pitcher stats
    for game in sched:
        away_team, home_team = game['away_name'], game['home_name']
        if game['away_probable_pitcher']:
            pitcher_tasks.append((game['away_probable_pitcher'], away_team, home_team))
        if game['home_probable_pitcher']:
            pitcher_tasks.append((game['home_probable_pitcher'], home_team, away_team))

    # Fetch pitcher stats using ThreadPoolExecutor
    with ThreadPoolExecutor() as executor:
        results = [future.result() for future in 
                   [executor.submit(fetch_pitcher_stats, pitcher, team, opponent) 
                    for pitcher, team, opponent in pitcher_tasks]]

    # Prepare team batting dataframe
    df = team_batting(2024)
    df['SO/AB'] = (100 * df['SO'] / df['AB'])
    df = df[['Team', 'SO/AB']].sort_values(by='SO/AB', ascending=False)
    df['Team'] = df['Team'].apply(get_team_full_name)

    # Merge pitcher stats with team batting data
    main_df = pd.DataFrame(results)
    merged = pd.merge(main_df, df, left_on='Opponent', right_on='Team', how='left')
    
    # Get opponent data and merge it
    opp_df = getOppData(date)
    pitchers = pd.merge(merged, opp_df, left_on='Name', right_on='Pitcher', how='left')
    pitchers.drop(columns=['Team'], inplace=True)
    
    # Calculate additional metrics
    pitchers['AB/GP'] = pitchers['AB'].astype(float) / pitchers['GP'].astype(float)
    pitchers['K/AB'] = 100 * (pitchers['K'].astype(float) / pitchers['AB'].astype(float))
    pitchers = pitchers.sort_values(by=['SO/AB'], ascending=False)
    
    # Select and style the final columns
    pitchers = pitchers[['Name', 'GP', 'AB', 'K', 'AVG', 'S%', 'P/I', 'K/9', 'AB/GP', 'K/AB', 'PA', 'K%', 'SO/AB', 'Opponent']]
    
    styled_pitchers = pitchers.style.background_gradient(cmap='YlGnBu', subset=['SO/AB', "AB/GP", "K/AB", 'K%', 'PA'])
    styled_pitchers = (styled_pitchers
                       .format({'SO/AB': '{:.2f}', 'AB/GP': '{:.1f}', 'K/AB': '{:.2f}'})
                       .set_properties(**{'text-align': 'center'})
                       .set_table_styles(
                           [{'selector': 'th', 'props': [('font-size', '14px'), ('background-color', '#f4f4f4')]},
                            {'selector': 'td', 'props': [('padding', '6px'), ('border', '1px solid #ddd')]}]))
    return styled_pitchers

['gamesPlayed: 10', 'gamesStarted: 8', 'groundOuts: 55', 'airOuts: 56', 'runs: 18', 'doubles: 8', 'triples: 1', 'homeRuns: 4', 'strikeOuts: 36', 'baseOnBalls: 11', 'intentionalWalks: 0', 'hits: 46', 'hitByPitch: 1', 'avg: .238', 'atBats: 193', 'obp: .283', 'slg: .352', 'ops: .635', 'caughtStealing: 0', 'stolenBases: 1', 'stolenBasePercentage: 1.000', 'groundIntoDoublePlay: 6', 'numberOfPitches: 751', 'era: 2.84', 'inningsPitched: 50.2', 'wins: 5', 'losses: 2', 'saves: 0', 'saveOpportunities: 0', 'holds: 1', 'blownSaves: 0', 'earnedRuns: 16', 'whip: 1.13', 'battersFaced: 205', 'outs: 152', 'gamesPitched: 10', 'completeGames: 0', 'shutouts: 0', 'strikes: 507', 'strikePercentage: .680', 'hitBatsmen: 1', 'balks: 0', 'wildPitches: 0', 'pickoffs: 0', 'totalBases: 68', 'groundOutsToAirouts: 0.98', 'winPercentage: .714', 'pitchesPerInning: 14.82', 'gamesFinished: 0', 'strikeoutWalkRatio: 3.27', 'strikeoutsPer9Inn: 6.39', 'walksPer9Inn: 1.95', 'hitsPer9Inn: 8.17', 'runsScoredPer9: 3.20', 'homeR

Unnamed: 0,Name,GP,AB,K,AVG,S%,P/I,K/9,AB/GP,K/AB,PA,K%,SO/AB,Opponent
18,Framber Valdez,7,154,34,0.279,0.63,14.86,7.34,22.0,22.08,154,23.4,31.07,Seattle Mariners
8,Lance Lynn,11,221,53,0.244,0.61,17.23,8.32,20.1,23.98,55,32.7,28.86,Cincinnati Reds
1,Cole Irvin,10,193,36,0.238,0.68,14.82,6.39,19.3,18.65,47,19.1,28.29,Boston Red Sox
6,Xzavion Curry,3,53,8,0.283,0.67,16.82,5.68,17.7,15.09,8,12.5,27.69,Colorado Rockies
13,Robert Gasser,4,84,13,0.25,0.71,14.0,5.09,21.0,15.48,0,0.0,26.59,Chicago Cubs
12,Justin Steele,6,121,31,0.223,0.68,14.94,8.63,20.2,25.62,77,31.2,26.02,Milwaukee Brewers
2,Alec Marsh,9,186,43,0.21,0.65,15.62,7.74,20.7,23.12,16,25.0,25.95,Minnesota Twins
4,Chris Bassitt,11,231,56,0.264,0.64,17.45,8.35,21.0,24.24,73,26.0,25.82,Chicago White Sox
9,Nick Lodolo,7,148,46,0.196,0.66,15.67,10.26,21.1,31.08,16,31.3,25.66,St. Louis Cardinals
10,Mitchell Parker,8,167,38,0.246,0.69,15.16,7.71,20.9,22.75,0,0.0,25.47,Atlanta Braves


In [12]:
main("5/27/2024") 

['gamesPlayed: 11', 'gamesStarted: 11', 'groundOuts: 52', 'airOuts: 73', 'runs: 25', 'doubles: 13', 'triples: 1', 'homeRuns: 7', 'strikeOuts: 73', 'baseOnBalls: 9', 'intentionalWalks: 0', 'hits: 54', 'hitByPitch: 1', 'avg: .214', 'atBats: 252', 'obp: .244', 'slg: .357', 'ops: .601', 'caughtStealing: 0', 'stolenBases: 7', 'stolenBasePercentage: 1.000', 'groundIntoDoublePlay: 3', 'numberOfPitches: 1024', 'era: 2.96', 'inningsPitched: 67.0', 'wins: 4', 'losses: 3', 'saves: 0', 'saveOpportunities: 0', 'holds: 0', 'blownSaves: 0', 'earnedRuns: 22', 'whip: 0.94', 'battersFaced: 262', 'outs: 201', 'gamesPitched: 11', 'completeGames: 0', 'shutouts: 0', 'strikes: 702', 'strikePercentage: .690', 'hitBatsmen: 1', 'balks: 0', 'wildPitches: 2', 'pickoffs: 0', 'totalBases: 90', 'groundOutsToAirouts: 0.71', 'winPercentage: .571', 'pitchesPerInning: 15.28', 'gamesFinished: 0', 'strikeoutWalkRatio: 8.11', 'strikeoutsPer9Inn: 9.81', 'walksPer9Inn: 1.21', 'hitsPer9Inn: 7.25', 'runsScoredPer9: 3.36', 'hom

Unnamed: 0,Name,GP,AB,K,AVG,S%,P/I,K/9,AB/GP,K/AB,PA,K%,SO/AB,Opponent
18,Framber Valdez,7,154,34,0.279,0.63,14.86,7.34,22.0,22.08,154,23.4,31.07,Seattle Mariners
8,Lance Lynn,11,221,53,0.244,0.61,17.23,8.32,20.1,23.98,55,32.7,28.86,Cincinnati Reds
1,Cole Irvin,10,193,36,0.238,0.68,14.82,6.39,19.3,18.65,47,19.1,28.29,Boston Red Sox
6,Xzavion Curry,3,53,8,0.283,0.67,16.82,5.68,17.7,15.09,8,12.5,27.69,Colorado Rockies
13,Robert Gasser,4,84,13,0.25,0.71,14.0,5.09,21.0,15.48,0,0.0,26.59,Chicago Cubs
12,Justin Steele,6,121,31,0.223,0.68,14.94,8.63,20.2,25.62,77,31.2,26.02,Milwaukee Brewers
2,Alec Marsh,9,186,43,0.21,0.65,15.62,7.74,20.7,23.12,16,25.0,25.95,Minnesota Twins
4,Chris Bassitt,11,231,56,0.264,0.64,17.45,8.35,21.0,24.24,73,26.0,25.82,Chicago White Sox
9,Nick Lodolo,7,148,46,0.196,0.66,15.67,10.26,21.1,31.08,16,31.3,25.66,St. Louis Cardinals
10,Mitchell Parker,8,167,38,0.246,0.69,15.16,7.71,20.9,22.75,0,0.0,25.47,Atlanta Braves
