In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import statsapi
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm

In [3]:
start_date = '02/02/2023'  # mm/dd/yyyy
end_date = '03/02/2023'  # mm/dd/yyyy
data_dir = Path('data')  # path/to/save/location

In [22]:
stats_games = pd.DataFrame(statsapi.schedule(date=None, start_date=start_date, end_date=end_date, team="", opponent="", sportId=1, game_id=None))
pitchers = pd.concat([stats_games.home_probable_pitcher, stats_games.away_probable_pitcher], ignore_index=True).unique()
pitchers = pitchers[pd.notna(pitchers)]  # remove NaN values

In [23]:
stats_pitchers = []
skipped = []
try:
    for pitcher_name in tqdm(pitchers):
        pitcher_id = None
        pitcher_infos = statsapi.lookup_player(pitcher_name)  # gets a list of pitchers matching that name, could be none
        if len(pitcher_infos) == 0:
            pass
        elif len(pitcher_infos) == 1: # if there is only one pitcher match use it
            pitcher_id = pitcher_infos[0]['id']
        else:
            for pitcher in pitcher_infos:
                if pitcher['primaryPosition']['abbreviation'] == 'P':  # only take the pitcher that is primarily a pitcher
                    pitcher_id = pitcher['id']
                    break  # if found pitcher then break from for loop

        if pitcher_id is None:  # if we didn't find a pitcher skip to the next one
            skipped.append(pitcher_name)
        else:  # if we did find pitcher then get stats for pitcher
            stats = statsapi.player_stat_data(pitcher_id, group="[pitching]", type="season", sportId=1)
        
            stats_pitchers.append({
                'pitcher_id': pitcher_id,
                'pitcher_name': pitcher_name,
                'current_team': stats['current_team'],
                'position': stats['position'],
                'pitch_hand': stats['pitch_hand'],
                'games_started': stats['stats'][0]['stats']['gamesStarted']        
            })
except:
    pass

print(f'skipped {len(skipped)}\n{skipped}')
    
    

 82%|████████▏ | 141/172 [01:47<00:23,  1.31it/s]

skipped 64
['Oddanier Mosqueda', 'Daniel Lynch IV', 'Brandon Bielak', 'Drew Rom', 'Garrett Hill', 'Kolby Allard', 'Nick Nelson', 'Marcus Stroman', 'Nathan Eovaldi', 'Marco Gonzales', 'Robert Stock', 'Jose Butto', 'Trevor Williams', 'Andrew Bellatti', 'Robbie Erlin', 'Brad Keller', 'Zach Plesac', 'Julio Teheran', 'Janson Junk', 'Connor Thomas', 'Bruce Zimmermann', 'JT Brubaker', 'Forrest Whitley', 'Drew Rucinski', 'Hunter Greene', 'Nabil Crismatt', 'Luis L. Ortiz', 'Tyler Mahle', 'Jeffrey Springs', 'Triston McKenzie', 'Cody Bradford', 'Zach Davies', 'Trevor Rogers', 'Jesse Chavez', 'Domingo German', 'Davis Martin', 'Ryan Feltner', 'Tanner Houck', 'Matthew Boyd', 'Mike Mayers', 'Cal Quantrill', 'Noah Davis', 'Glenn Otto', 'Robbie Ray', 'Denyi Reyes', 'Louie Varland', 'Cooper Criswell', 'Michael Plassmeyer', 'Matt Dermody', 'Ryan Weber', 'Drey Jameson', 'Jose Urquidy', 'Taj Bradley', 'Joe Ryan', 'Miles Mikolas', 'Easton McGee', 'Tommy Henry', 'Robert Gasser', 'Kyle Muller', 'Elvin Rodrigu




In [20]:
stats_games.columns

Index(['game_id', 'game_datetime', 'game_date', 'game_type', 'status',
       'away_name', 'home_name', 'away_id', 'home_id', 'doubleheader',
       'game_num', 'home_probable_pitcher', 'away_probable_pitcher',
       'home_pitcher_note', 'away_pitcher_note', 'away_score', 'home_score',
       'current_inning', 'inning_state', 'venue_id', 'venue_name',
       'national_broadcasts', 'series_status', 'winning_team', 'losing_team',
       'winning_pitcher', 'losing_pitcher', 'save_pitcher', 'summary',
       'losing_Team'],
      dtype='object')

In [21]:
a = stats_games[stats_games.winning_pitcher=='Ryan Weiss']
a[['away_name', 'home_name', 'home_probable_pitcher', 'away_probable_pitcher', 'away_score', 'home_score', 'winning_team', 'losing_team', 'winning_pitcher', 'losing_pitcher', 'home_id', 'away_id', 'game_type']]

Unnamed: 0,away_name,home_name,home_probable_pitcher,away_probable_pitcher,away_score,home_score,winning_team,losing_team,winning_pitcher,losing_pitcher,home_id,away_id,game_type
1,Texas Rangers,Kansas City Royals,Daniel Lynch IV,Glenn Otto,5,6,Kansas City Royals,Texas Rangers,Ryan Weiss,Marc Church,118,140,S


In [30]:
statsapi.lookup_player(656271)

[{'id': 656271,
  'fullName': 'Brock Burke',
  'firstName': 'Brock',
  'lastName': 'Burke',
  'primaryNumber': '46',
  'currentTeam': {'id': 140},
  'primaryPosition': {'code': '1', 'abbreviation': 'P'},
  'useName': 'Brock',
  'boxscoreName': 'Burke',
  'mlbDebutDate': '2019-08-20',
  'nameFirstLast': 'Brock Burke',
  'firstLastName': 'Brock Burke',
  'lastFirstName': 'Burke, Brock',
  'lastInitName': 'Burke, B',
  'initLastName': 'B Burke',
  'fullFMLName': 'Brock Christopher Burke',
  'fullLFMName': 'Burke, Brock Christopher'}]

In [17]:
print(statsapi.roster(118))

#67  P   Alec Marsh
#61  P   Angel Zerpa
#53  P   Austin Cox
#7   SS  Bobby Witt Jr.
#51  P   Brady Singer
#43  P   Carlos Hernandez
#55  P   Cole Ragans
#44  LF  Dairon Blanco
#6   CF  Drew Waters
#65  P   Dylan Coleman
#14  LF  Edward Olivares
#34  C   Freddy Fermin
#66  P   James McArthur
#49  P   Jonathan Heasley
#24  P   Jordan Lyles
#28  CF  Kyle Isbel
#1   RF  MJ Melendez
#11  3B  Maikel Garcia
#27  DH  Matt Beaty
#15  3B  Matt Duffy
#19  2B  Michael Massey
#64  P   Nick Wittgren
#13  C   Salvador Perez
#0   2B  Samad Taylor
#41  P   Tucker Davidson
#23  P   Zack Greinke



In [7]:
stats_pitchers = pd.DataFrame(stats_pitchers)

In [49]:
stats_games

Unnamed: 0,game_id,game_datetime,game_date,game_type,status,away_name,home_name,away_id,home_id,doubleheader,...,venue_name,national_broadcasts,series_status,winning_team,losing_team,winning_pitcher,losing_pitcher,save_pitcher,summary,losing_Team
0,719496,2023-02-24T18:05:00Z,2023-02-24,E,Final,Northeastern Huskies,Boston Red Sox,343,111,N,...,JetBlue Park,[],,Boston Red Sox,Northeastern Huskies,Oddanier Mosqueda,James Quinlivan,Brendan Cellucci,2023-02-24 - Northeastern Huskies (3) @ Boston...,
1,718938,2023-02-24T20:05:00Z,2023-02-24,S,Final,Texas Rangers,Kansas City Royals,140,118,N,...,Surprise Stadium,[],KC leads 1-0,Kansas City Royals,Texas Rangers,Ryan Weiss,Marc Church,,2023-02-24 - Texas Rangers (5) @ Kansas City R...,
2,719395,2023-02-24T20:10:00Z,2023-02-24,S,Final,Seattle Mariners,San Diego Padres,136,135,N,...,Peoria Stadium,[],SEA wins Spring,Seattle Mariners,San Diego Padres,Prelander Berroa,Ryan Weathers,Riley O'Brien,2023-02-24 - Seattle Mariners (3) @ San Diego ...,
3,719391,2023-02-25T18:05:00Z,2023-02-25,S,Final,New York Mets,Houston Astros,121,117,N,...,The Ballpark of the Palm Beaches,[],HOU wins Spring,Houston Astros,New York Mets,Matt Ruppenthal,Zach Muckenhirn,Tyler Brown,2023-02-25 - New York Mets (2) @ Houston Astro...,
4,719389,2023-02-25T18:05:00Z,2023-02-25,S,Final,Washington Nationals,St. Louis Cardinals,120,138,N,...,Roger Dean Chevrolet Stadium,[],WSH wins Spring,Washington Nationals,St. Louis Cardinals,Matt Cronin,Kodi Whitley,Gerardo Carrillo,2023-02-25 - Washington Nationals (3) @ St. Lo...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,719312,2023-03-02T20:05:00Z,2023-03-02,S,Final,Oakland Athletics,Chicago Cubs,133,112,N,...,Sloan Park,[],CHC wins Spring,Chicago Cubs,Oakland Athletics,Marcus Stroman,JP Sears,,2023-03-02 - Oakland Athletics (1) @ Chicago C...,
94,719301,2023-03-02T20:05:00Z,2023-03-02,S,Final,San Francisco Giants,Cleveland Guardians,137,114,N,...,Goodyear Ballpark,[],CLE wins Spring,Cleveland Guardians,San Francisco Giants,Davis Sharpe,Raymond Burgos,Kyle Marman,2023-03-02 - San Francisco Giants (3) @ Clevel...,
95,719303,2023-03-02T20:10:00Z,2023-03-02,S,Final,San Diego Padres,Seattle Mariners,135,136,N,...,Peoria Stadium,[],SEA wins Spring,Seattle Mariners,San Diego Padres,Tommy Milone,Sean Poppen,,2023-03-02 - San Diego Padres (4) @ Seattle Ma...,
96,719305,2023-03-02T20:10:00Z,2023-03-02,S,Final,Chicago White Sox,Colorado Rockies,145,115,N,...,Salt River Fields at Talking Stick,[],CWS wins Spring,Chicago White Sox,Colorado Rockies,Matt Foster,Jeff Criswell,Tanner Banks,2023-03-02 - Chicago White Sox (6) @ Colorado ...,


In [9]:
stats_pitchers

Unnamed: 0,pitcher_id,pitcher_name,current_team,position,pitch_hand,games_started
0,661395,Jhoan Duran,Minnesota Twins,P,Right,0
1,472551,,Colorado Rockies,P,Left,0
2,605130,Scott Barlow,Kansas City Royals,P,Right,0
3,656353,Tucker Davidson,Los Angeles Angels,P,Left,0
4,665625,Elvis Peguero,Milwaukee Brewers,P,Right,0
...,...,...,...,...,...,...
355,663362,Matt Waldron,El Paso Chihuahuas,P,Right,1
356,676265,Cory Abbott,Washington Nationals,P,Right,0
357,519008,T.J. McFarland,New York Mets,P,Left,0
358,607074,Carlos Rodon,New York Yankees,P,Left,1


In [16]:
stats_games.to_csv(data_dir / 'stats_games.csv', index=False)
stats_pitchers.to_csv(data_dir / 'stats_pitchers.csv', index=False)

In [10]:
pd.notna(stats_games.winning_pitcher)

0        True
1        True
2        True
3        True
4        True
        ...  
2947    False
2948    False
2949    False
2950    False
2951    False
Name: winning_pitcher, Length: 2952, dtype: bool

In [11]:
pitcher_infos

[{'id': 547943,
  'fullName': 'Hyun Jin Ryu',
  'firstName': 'Hyun Jin',
  'lastName': 'Ryu',
  'primaryNumber': '99',
  'currentTeam': {'id': 141},
  'primaryPosition': {'code': '1', 'abbreviation': 'P'},
  'useName': 'Hyun Jin',
  'boxscoreName': 'Ryu',
  'nickName': 'Monster',
  'mlbDebutDate': '2013-04-02',
  'nameFirstLast': 'Hyun Jin Ryu',
  'firstLastName': 'Hyun Jin Ryu',
  'lastFirstName': 'Ryu, Hyun Jin',
  'lastInitName': 'Ryu, H',
  'initLastName': 'H Ryu',
  'fullFMLName': 'Hyun Jin Ryu',
  'fullLFMName': 'Ryu, Hyun Jin'}]