In [1]:
import requests
import datetime
import statsapi
import pandas as pd

In [2]:
# Using mlb-statsapi to get mlb schedule up to the all-star break
# Create dataframe from list genereated from api call
# Creating mapping of mlb team name -> team id

sched = statsapi.schedule(start_date='04/01/2024',end_date='07/16/2024')
sched_df = pd.DataFrame(sched)
teams = sched_df[['away_id', 'away_name']]
teams = teams.drop_duplicates(subset=['away_id'], keep='first')
teams = teams.sort_values(by='away_name', ascending=True)
teams = teams.rename(columns={'away_name': 'team', 'away_id': 'id'})



In [2]:
# MLB Schedule for current date

def print_schedule(schedule):
    """Prints the MLB schedule.

    Args:
        schedule (list): MLB schedule from api call.
    """
    for games in schedule:
        print(f"Date: {games['game_date']} | {games['away_name']} ({games['away_probable_pitcher']}) @ {games["home_name"]} ({games['home_probable_pitcher']})\n")

# {get_pitcher_stats(games['away_probable_pitcher'])}


today = datetime.date.today().strftime('%m/%d/%Y')
mlb_schedule = statsapi.schedule(start_date=today, end_date=today)

print_schedule(mlb_schedule)

Date: 2024-04-08 | Chicago White Sox (Tanner Banks) @ Cleveland Guardians (Triston McKenzie)

Date: 2024-04-08 | Miami Marlins (Jesús Luzardo) @ New York Yankees (Nestor Cortes)

Date: 2024-04-08 | Detroit Tigers (Reese Olson) @ Pittsburgh Pirates (Mitch Keller)

Date: 2024-04-08 | Milwaukee Brewers (Aaron Ashby) @ Cincinnati Reds (Graham Ashcraft)

Date: 2024-04-08 | Seattle Mariners (Luis Castillo) @ Toronto Blue Jays (José Berríos)

Date: 2024-04-08 | New York Mets (Julio Teheran) @ Atlanta Braves (Charlie Morton)

Date: 2024-04-08 | Los Angeles Dodgers (James Paxton) @ Minnesota Twins (Bailey Ober)

Date: 2024-04-08 | Philadelphia Phillies (Spencer Turnbull) @ St. Louis Cardinals (Miles Mikolas)

Date: 2024-04-08 | Houston Astros (Framber Valdez) @ Texas Rangers (Andrew Heaney)

Date: 2024-04-08 | Arizona Diamondbacks (Zac Gallen) @ Colorado Rockies (Kyle Freeland)

Date: 2024-04-08 | Tampa Bay Rays (Zach Eflin) @ Los Angeles Angels (Tyler Anderson)

Date: 2024-04-08 | Chicago Cubs

In [25]:
import re

# Get pitcher names from MLB schedule
# Store away/home pitcher names into dictionary
# Get pitcher stats and store into another dictionary

def get_pitcher_names(schedule):
    """Gets pitcher names from MLB.

    Args:
        schedule (list): MLB schedule for current day.

    Returns:
        list: Away/Home pitcher names.
    """

    names = []
    for games in schedule:
        names.append({"away": games["away_probable_pitcher"], "home": games["home_probable_pitcher"]})
    return names

# pitcher = statsapi.player_stats(next(x['id'] for x in statsapi.get('sports_players', {'season':2024})['people'] if x['fullName']=='Tanner Houck'), 'pitching')

def get_pitcher_stats(name):
    """Gets pitcher stats for current year.

    Args:
        name (str): Pitcher full name.

    Returns:
        str: String of all pitcher stats.
    """
    pitcher = statsapi.player_stats(next(x['id'] for x in statsapi.get('sports_players', {'season':2024})['people'] if x['fullName']==name), 'pitching')
    
    return pitcher


def store_pitcher_stats(name, pitcher):
    """Get pitcher stats for current season.

    Args:
        name (str): Pitcher name.
        pitcher (list): List of pitcher stats

    Returns:
        dictionary: for pitcher stats
    """
    
    current_pitcher = {}
    
    stats_pattern = r"(\w+):\s([^\n]+)"
    stats = re.findall(stats_pattern, pitcher)
    
    current_pitcher[name] = dict(stats)
    
    return current_pitcher

def print_pitcher_stats(names):
    
    
    
    raise NotImplementedError


pitchers = get_pitcher_names(mlb_schedule)
pitcher_stats = {}
for pitcher in pitchers:
    away = get_pitcher_stats(pitcher["away"])
    home = get_pitcher_stats(pitcher["home"])
    
    away_stats = store_pitcher_stats(pitcher["away"],away)
    home_stats = store_pitcher_stats(pitcher["home"],home)

    pitcher_stats.update(away_stats)
    pitcher_stats.update(home_stats)
    # print(f'{away_stats}')
    # print(f'{home_stats}')


pitcher_df = pd.DataFrame(pitcher_stats).transpose()



# print(pitcher)
# print(current_pitcher.keys())


{'Tanner Banks': {'gamesPlayed': '2', 'gamesStarted': '0', 'groundOuts': '1', 'airOuts': '2', 'runs': '0', 'doubles': '1', 'triples': '0', 'homeRuns': '0', 'strikeOuts': '3', 'baseOnBalls': '1', 'intentionalWalks': '0', 'hits': '1', 'hitByPitch': '0', 'avg': '.143', 'atBats': '7', 'obp': '.250', 'slg': '.286', 'ops': '.536', 'caughtStealing': '0', 'stolenBases': '0', 'stolenBasePercentage': '.---', 'groundIntoDoublePlay': '0', 'numberOfPitches': '40', 'era': '0.00', 'inningsPitched': '2.0', 'wins': '0', 'losses': '0', 'saves': '0', 'saveOpportunities': '0', 'holds': '0', 'blownSaves': '0', 'earnedRuns': '0', 'whip': '1.00', 'battersFaced': '8', 'outs': '6', 'gamesPitched': '2', 'completeGames': '0', 'shutouts': '0', 'strikes': '25', 'strikePercentage': '.630', 'hitBatsmen': '0', 'balks': '0', 'wildPitches': '0', 'pickoffs': '0', 'totalBases': '2', 'groundOutsToAirouts': '0.50', 'winPercentage': '.---', 'pitchesPerInning': '20.00', 'gamesFinished': '0', 'strikeoutWalkRatio': '3.00', 'st