In [1]:
import pandas as pd
import statsapi
import json
from concurrent.futures import ThreadPoolExecutor
import re
from pybaseball import team_batting

# Function to parse raw pitcher stats and return specific fields as a dictionary
def getKStats(raw_data, name):
    lines = [line.strip() for line in raw_data.split('\n') if line.strip()]
    relevant_lines = lines[2:]
    data = {}
    for line in relevant_lines:
        key, value = line.split(':')
        data[key.strip()] = value.strip()
    specific_fields = {
        "Name": name,
        "GP": data.get("gamesPlayed"),
        "AB": data.get("atBats"),
        "AVG": data.get("avg"),
        "S%": data.get("strikePercentage"),
        "P/I": data.get("pitchesPerInning"),
        "K": data.get("strikeOuts"),
        "K/9": data.get("strikeoutsPer9Inn")
    }
    return specific_fields

# Function to fetch pitcher stats and return them as a dictionary
def getPitcherStats(name, team, opponent):
    try:
        player = statsapi.lookup_player(name)
        if not player:
            raise ValueError(f"Player {name} not found")
        id = player[0]['id']
        stats = statsapi.player_stats(id, group="[pitching]", type="season")
        pitcher_stats = getKStats(stats, name)
        pitcher_stats["Opponent"] = opponent
        return pitcher_stats
    except Exception as e:
        return {"Name": name, "Team": team, "Opponent": opponent, "Error": str(e)}

# Fetch schedule for the specified date
sched = statsapi.schedule(start_date='05/25/2024', end_date='05/25/2024')

# Prepare a list to store the tasks
pitcher_tasks = []

for game in sched:
    away_team = game['away_name']
    home_team = game['home_name']
    away_pitcher = game['away_probable_pitcher']
    home_pitcher = game['home_probable_pitcher']
    
    if away_pitcher:
        pitcher_tasks.append((away_pitcher, away_team, home_team))
    if home_pitcher:
        pitcher_tasks.append((home_pitcher, home_team, away_team))

# Fetch pitcher stats using parallel processing
def fetch_stats(pitcher_name, team_name, opponent_name):
    return getPitcherStats(pitcher_name, team_name, opponent_name)

with ThreadPoolExecutor() as executor:
    # Create tasks for fetching pitcher stats
    futures = [executor.submit(fetch_stats, pitcher, team, opponent) for pitcher, team, opponent in pitcher_tasks]

    # Collect results
    results = [future.result() for future in futures]


In [2]:
df = team_batting(2024)

df['SO/AB'] = (100 * df['SO'] / df['AB']) 
df = df[['Team', 'SO/AB']]
df = df.sort_values(by='SO/AB', ascending=False)

def map_abbreviation_to_full_name(abbreviation):
    team_mapping = {
        "SEA": "Seattle Mariners",
        "OAK": "Oakland Athletics",
        "CIN": "Cincinnati Reds",
        "BOS": "Boston Red Sox",
        "COL": "Colorado Rockies",
        "PIT": "Pittsburgh Pirates",
        "TBR": "Tampa Bay Rays",
        "DET": "Detroit Tigers",
        "MIN": "Minnesota Twins",
        "CHC": "Chicago Cubs",
        "ATL": "Atlanta Braves",
        "MIL": "Milwaukee Brewers",
        "CHW": "Chicago White Sox",
        "LAA": "Los Angeles Angels",
        "STL": "St. Louis Cardinals",
        "WSN": "Washington Nationals",
        "LAD": "Los Angeles Dodgers",
        "PHI": "Philadelphia Phillies",
        "BAL": "Baltimore Orioles",
        "SFG": "San Francisco Giants",
        "MIA": "Miami Marlins",
        "TEX": "Texas Rangers",
        "NYM": "New York Mets",
        "ARI": "Arizona Diamondbacks",
        "CLE": "Cleveland Guardians",
        "TOR": "Toronto Blue Jays",
        "NYY": "New York Yankees",
        "SDP": "San Diego Padres",
        "KCR": "Kansas City Royals",
        "HOU": "Houston Astros"
    }
    
    return team_mapping.get(abbreviation, "Unknown")  # Return the full name or "Unknown" if not found

df['Team'] = df['Team'].apply(map_abbreviation_to_full_name)

In [4]:
# Convert list of dictionaries to DataFrame
main = pd.DataFrame(results)

pitchers = pd.merge(main, df, left_on='Opponent', right_on='Team', how='left')
pitchers.drop(columns=['Team'], inplace=True)
pitchers['AB/GP'] = (pitchers['AB'].astype(float) / pitchers['GP'].astype(float))
pitchers['K/AB'] = 100 * (pitchers['K'].astype(float) / pitchers['AB'].astype(float))

pitchers = pitchers.sort_values(by='SO/AB', ascending=False)
pitchers = pitchers[['Name', 'GP', 'AB', 'K', 'AVG', 'S%', 'P/I','K/9', 'AB/GP','K/AB', 'SO/AB', 'Opponent']]
pitchers = pitchers.style.background_gradient(cmap='YlGnBu', subset=['SO/AB', "AB/GP", "K/AB"])
pitchers.format({'SO/AB': '{:.2f}', 'AB/GP': '{:.1f}','K/AB': '{:.2f}'})

pitchers

Unnamed: 0,Name,GP,AB,K,AVG,S%,P/I,K/9,AB/GP,K/AB,SO/AB,Opponent
9,Trevor Williams,9,164,35,0.22,0.62,16.11,6.85,18.2,21.34,31.27,Seattle Mariners
12,Spencer Arrighetti,7,134,36,0.313,0.65,19.07,9.92,19.1,26.87,29.01,Oakland Athletics
18,Walker Buehler,3,53,13,0.264,0.64,17.4,8.78,17.7,24.53,28.78,Cincinnati Reds
16,Colin Rea,9,197,35,0.259,0.64,16.23,6.47,21.9,17.77,28.37,Boston Red Sox
10,Reynaldo López,8,164,44,0.201,0.66,14.68,8.49,20.5,26.83,27.9,Pittsburgh Pirates
22,Aaron Nola,10,240,60,0.208,0.66,14.8,8.31,24.0,25.0,27.85,Colorado Rockies
14,Brady Singer,10,213,61,0.216,0.64,16.61,9.69,21.3,28.64,27.23,Tampa Bay Rays
21,Miles Mikolas,10,214,39,0.294,0.68,15.87,6.62,21.4,18.22,26.65,Chicago Cubs
0,José Berríos,10,216,48,0.227,0.66,15.15,7.16,21.6,22.22,26.44,Detroit Tigers
17,Nick Pivetta,5,97,30,0.206,0.68,14.44,10.13,19.4,30.93,26.13,Milwaukee Brewers
