In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import statsapi
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm

In [3]:
start_date = '02/01/2023'  # mm/dd/yyyy
end_date = '03/01/2023'  # mm/dd/yyyy
data_dir = Path('data')  # path/to/save/location

In [4]:
stats_games = pd.DataFrame(statsapi.schedule(date=None, start_date=start_date, end_date=end_date, team="", opponent="", sportId=1, game_id=None))
pitchers = pd.concat([stats_games.home_probable_pitcher, stats_games.away_probable_pitcher], ignore_index=True).unique()
pitchers = pitchers[pd.notna(pitchers)]  # remove NaN values

In [5]:
pitchers[:10]

array(['Oddanier Mosqueda', 'Daniel Lynch IV', 'Nick Martinez',
       'Brandon Bielak', 'Adam Wainwright', 'Mitch Keller', 'Drew Rom',
       'Kenta Maeda', 'Garrett Hill', 'Kolby Allard'], dtype=object)

In [6]:
pitcher_id = statsapi.lookup_player(pitchers[20])[0]['id']
stats = statsapi.player_stat_data(pitcher_id, group="[pitching]", type="season", sportId=1)
stats

{'id': 593958,
 'first_name': 'Eduardo',
 'last_name': 'Rodriguez',
 'active': True,
 'current_team': 'Arizona Diamondbacks',
 'position': 'P',
 'nickname': 'El Gualo',
 'last_played': None,
 'mlb_debut': '2015-05-28',
 'bat_side': 'Left',
 'pitch_hand': 'Left',
 'stats': [{'type': 'season',
   'group': 'pitching',
   'season': '2023',
   'stats': {'gamesPlayed': 26,
    'gamesStarted': 26,
    'groundOuts': 141,
    'airOuts': 158,
    'runs': 59,
    'doubles': 25,
    'triples': 2,
    'homeRuns': 15,
    'strikeOuts': 143,
    'baseOnBalls': 48,
    'intentionalWalks': 0,
    'hits': 128,
    'hitByPitch': 3,
    'avg': '.227',
    'atBats': 563,
    'obp': '.289',
    'slg': '.359',
    'ops': '.648',
    'caughtStealing': 5,
    'stolenBases': 10,
    'stolenBasePercentage': '.667',
    'groundIntoDoublePlay': 11,
    'numberOfPitches': 2446,
    'era': '3.30',
    'inningsPitched': '152.2',
    'wins': 13,
    'losses': 9,
    'saves': 0,
    'saveOpportunities': 0,
    'holds':

In [None]:
stats = statsapi.player_stat_data(pitcher_id, group="[pitching]", type="season", sportId=1)

In [22]:
stats_pitchers = []
skipped = []
for pitcher_name in tqdm(pitchers):
    pitcher_id = None
    pitcher_infos = statsapi.lookup_player(pitcher_name)  # gets a list of pitchers matching that name, could be none
    if len(pitcher_infos) == 0:
        continue  # if didn't find a pitcher then skip to next pitcher
    elif len(pitcher_infos) == 1: # if there is only one pitcher match use it
        pitcher_id = pitcher_infos[0]['id']
    else:
        for pitcher in pitcher_infos:
            if pitcher['primaryPosition']['abbreviation'] == 'P':  # only take the pitcher that is primarily a pitcher
                pitcher_id = pitcher['id']
                break  # if found pitcher then break from for loop

    if pitcher_id is None:  # if we didn't find a pitcher skip to the next one
        skipped.append(pitcher_name)
    else:  # if we did find pitcher then get stats for pitcher
        stats = statsapi.player_stat_data(pitcher_id, group="[pitching]", type="season", sportId=1)

        try:
            stats_pitchers.append({
                'pitcher_id': pitcher_id,
                'pitcher_name': pitcher_name,
                'current_team': stats['current_team'],
                'position': stats['position'],
                'pitch_hand': stats['pitch_hand'],
                'games_started': (stats['stats'][0]['stats']['gamesStarted'] if len(stats['stats']) > 0 else -1)        
            })
        except e:
            print(f'on player {player_name}, id: {player_id} had error: {e}')

print(f'skipped {len(skipped)}\n{skipped}')
    
    

  0%|          | 0/482 [00:00<?, ?it/s]

100%|██████████| 482/482 [04:41<00:00,  1.71it/s]

skipped 0
[]





In [19]:
stats_games.columns

Index(['game_id', 'game_datetime', 'game_date', 'game_type', 'status',
       'away_name', 'home_name', 'away_id', 'home_id', 'doubleheader',
       'game_num', 'home_probable_pitcher', 'away_probable_pitcher',
       'home_pitcher_note', 'away_pitcher_note', 'away_score', 'home_score',
       'current_inning', 'inning_state', 'venue_id', 'venue_name',
       'national_broadcasts', 'series_status', 'winning_team', 'losing_team',
       'winning_pitcher', 'losing_pitcher', 'save_pitcher', 'summary',
       'losing_Team'],
      dtype='object')

In [20]:
a = stats_games[stats_games.winning_pitcher=='Ryan Weiss']
a[['away_name', 'home_name', 'home_probable_pitcher', 'away_probable_pitcher', 'away_score', 'home_score', 'winning_team', 'losing_team', 'winning_pitcher', 'losing_pitcher', 'home_id', 'away_id', 'game_type']]

Unnamed: 0,away_name,home_name,home_probable_pitcher,away_probable_pitcher,away_score,home_score,winning_team,losing_team,winning_pitcher,losing_pitcher,home_id,away_id,game_type
1,Texas Rangers,Kansas City Royals,Daniel Lynch IV,Glenn Otto,5,6,Kansas City Royals,Texas Rangers,Ryan Weiss,Marc Church,118,140,S
304,Oakland Athletics,Kansas City Royals,Kris Bubic,Ken Waldichuk,0,2,Kansas City Royals,Oakland Athletics,Ryan Weiss,Ken Waldichuk,118,133,S


In [21]:
statsapi.lookup_player(656271)

[{'id': 656271,
  'fullName': 'Brock Burke',
  'firstName': 'Brock',
  'lastName': 'Burke',
  'primaryNumber': '46',
  'currentTeam': {'id': 140},
  'primaryPosition': {'code': '1', 'abbreviation': 'P'},
  'useName': 'Brock',
  'boxscoreName': 'Burke',
  'mlbDebutDate': '2019-08-20',
  'nameFirstLast': 'Brock Burke',
  'firstLastName': 'Brock Burke',
  'lastFirstName': 'Burke, Brock',
  'lastInitName': 'Burke, B',
  'initLastName': 'B Burke',
  'fullFMLName': 'Brock Christopher Burke',
  'fullLFMName': 'Burke, Brock Christopher'}]

In [22]:
print(statsapi.roster(118))

#26  2B  Adam Frazier
#48  P   Alec Marsh
#61  P   Angel Zerpa
#69  P   Anthony Veneziano
#14  C   Austin Nola
#7   SS  Bobby Witt Jr.
#51  P   Brady Singer
#43  P   Carlos Hernández
#35  P   Chris Stratton
#55  P   Cole Ragans
#44  LF  Dairon Blanco
#41  P   Daniel Lynch IV
#6   CF  Drew Waters
#34  C   Freddy Fermin
#2   SS  Garrett Hampson
#16  RF  Hunter Renfroe
#59  P   Jake Brentz
#66  P   James McArthur
#46  P   John Schreiber
#24  P   Jordan Lyles
#38  P   Josh Taylor
#28  CF  Kyle Isbel
#1   RF  MJ Melendez
#11  3B  Maikel Garcia
#65  P   Matt Sauer
#19  2B  Michael Massey
#52  P   Michael Wacha
#17  RF  Nelson Velázquez
#63  P   Nick Anderson
#12  SS  Nick Loftin
#32  1B  Nick Pratto
#13  C   Salvador Perez
#67  P   Seth Lugo
#9   1B  Vinnie Pasquantino
#31  P   Will Smith



In [23]:
stats_pitchers = pd.DataFrame(stats_pitchers)

In [24]:
stats_games

Unnamed: 0,game_id,game_datetime,game_date,game_type,status,away_name,home_name,away_id,home_id,doubleheader,...,venue_name,national_broadcasts,series_status,winning_team,losing_team,winning_pitcher,losing_pitcher,save_pitcher,summary,losing_Team
0,719496,2023-02-24T18:05:00Z,2023-02-24,E,Final,Northeastern Huskies,Boston Red Sox,343,111,N,...,JetBlue Park,[],,Boston Red Sox,Northeastern Huskies,Oddanier Mosqueda,James Quinlivan,Brendan Cellucci,2023-02-24 - Northeastern Huskies (3) @ Boston...,
1,718938,2023-02-24T20:05:00Z,2023-02-24,S,Final,Texas Rangers,Kansas City Royals,140,118,N,...,Surprise Stadium,[],KC leads 1-0,Kansas City Royals,Texas Rangers,Ryan Weiss,Marc Church,,2023-02-24 - Texas Rangers (5) @ Kansas City R...,
2,719395,2023-02-24T20:10:00Z,2023-02-24,S,Final,Seattle Mariners,San Diego Padres,136,135,N,...,Peoria Stadium,[],SEA wins Spring,Seattle Mariners,San Diego Padres,Prelander Berroa,Ryan Weathers,Riley O'Brien,2023-02-24 - Seattle Mariners (3) @ San Diego ...,
3,719391,2023-02-25T18:05:00Z,2023-02-25,S,Final,New York Mets,Houston Astros,121,117,N,...,The Ballpark of the Palm Beaches,[],HOU wins Spring,Houston Astros,New York Mets,Matt Ruppenthal,Zach Muckenhirn,Tyler Brown,2023-02-25 - New York Mets (2) @ Houston Astro...,
4,719389,2023-02-25T18:05:00Z,2023-02-25,S,Final,Washington Nationals,St. Louis Cardinals,120,138,N,...,Roger Dean Chevrolet Stadium,[],WSH wins Spring,Washington Nationals,St. Louis Cardinals,Matt Cronin,Kodi Whitley,Gerardo Carrillo,2023-02-25 - Washington Nationals (3) @ St. Lo...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3002,748547,2023-10-24T00:03:00Z,2023-10-23,L,Final,Texas Rangers,Houston Astros,140,117,N,...,Minute Maid Park,"[FOX, ESPN Radio, FS1-INT]",TEX wins 4-3,Texas Rangers,Houston Astros,Jordan Montgomery,Cristian Javier,,2023-10-23 - Texas Rangers (11) @ Houston Astr...,
3003,748537,2023-10-25T00:07:00Z,2023-10-24,L,Final,Arizona Diamondbacks,Philadelphia Phillies,109,143,N,...,Citizens Bank Park,"[TBS, TBS-INT, ESPN Radio]",AZ wins 4-3,Arizona Diamondbacks,Philadelphia Phillies,Ryan Thompson,Ranger Suárez,Paul Sewald,2023-10-24 - Arizona Diamondbacks (4) @ Philad...,
3004,748542,2023-10-28T00:05:00Z,2023-10-27,W,Final,Arizona Diamondbacks,Texas Rangers,109,140,N,...,Globe Life Field,"[FOX, ESPN Radio, FOX-INT]",TEX leads 1-0,Texas Rangers,Arizona Diamondbacks,José Leclerc,Miguel Castro,,2023-10-27 - Arizona Diamondbacks (5) @ Texas ...,
3005,748541,2023-10-29T00:03:00Z,2023-10-28,W,Final,Arizona Diamondbacks,Texas Rangers,109,140,N,...,Globe Life Field,"[FOX, ESPN Radio, FOX-INT]",Series tied 1-1,Arizona Diamondbacks,Texas Rangers,Merrill Kelly,Jordan Montgomery,,2023-10-28 - Arizona Diamondbacks (9) @ Texas ...,


In [25]:
stats_pitchers

In [26]:
stats_games.to_csv(data_dir / 'stats_games.csv', index=False)
stats_pitchers.to_csv(data_dir / 'stats_pitchers.csv', index=False)

In [27]:
pd.notna(stats_games.winning_pitcher)

0       True
1       True
2       True
3       True
4       True
        ... 
3002    True
3003    True
3004    True
3005    True
3006    True
Name: winning_pitcher, Length: 3007, dtype: bool

In [28]:
pitcher_infos

[{'id': 663738,
  'fullName': 'Daniel Lynch IV',
  'firstName': 'Daniel',
  'lastName': 'Lynch',
  'primaryNumber': '41',
  'currentTeam': {'id': 118},
  'primaryPosition': {'code': '1', 'abbreviation': 'P'},
  'useName': 'Daniel',
  'boxscoreName': 'Lynch IV',
  'mlbDebutDate': '2021-05-03',
  'nameFirstLast': 'Daniel Lynch IV',
  'firstLastName': 'Daniel Lynch IV',
  'lastFirstName': 'Lynch IV, Daniel',
  'lastInitName': 'Lynch IV, D',
  'initLastName': 'D Lynch IV',
  'fullFMLName': 'Daniel A. Lynch IV',
  'fullLFMName': 'Lynch IV, Daniel A.'}]