In [1]:
pip install python-mlb-statsapi

Note: you may need to restart the kernel to use updated packages.


In [2]:
import mlbstatsapi
import pandas as pd

In [3]:
def append_game_stats(game_cols, stats):
    stats['gamepk'] = game_cols[0]
    stats['id'] = game_cols[1]
    stats['gamenumber'] = game_cols[2]
    stats['season'] = game_cols[3]
    stats['officialdate'] = game_cols[4]
    stats['daynight'] = game_cols[5]
    stats['time'] = game_cols[6]
    stats['ampm'] = game_cols[7]
    stats['detailedstate'] = game_cols[8]
    return stats

def parse_pitching_teamstats(data, game_cols):
    parsed_data = []
    for team in data:
        team_id = team[0]
        team_name = team[1]
        stats = team[2]
        stats['team_id'] = team_id
        stats['team_name'] = team_name
        stats = append_game_stats(game_cols, stats)
        
        parsed_data.append(stats)
    
    df = pd.DataFrame(parsed_data)
    return df

def parse_input_pitcher(data, game_cols):
    parsed_data = []
    for player in data:
        player_id = player[0]
        player_name = player[1]
        stats = player[2]
        stats['player_id'] = player_id
        stats['player_name'] = player_name
        stats = append_game_stats(game_cols, stats)
        parsed_data.append(stats)
    
    df = pd.DataFrame(parsed_data)
    return df

def parse_batting_teamstats(data, game_cols):
    parsed_data = []
    for team in data:
        team_id = team[0]
        team_name = team[1]
        stats = team[2]
        stats['team_id'] = team_id
        stats['team_name'] = team_name
        stats = append_game_stats(game_cols, stats)
        
        parsed_data.append(stats)
    
    df = pd.DataFrame(parsed_data)
    return df

def parse_input_batter(data, game_cols):
    parsed_data = []
    for player in data:
        player_id = player[0]
        player_name = player[1]
        stats = player[2]
        stats['player_id'] = player_id
        stats['player_name'] = player_name
        stats = append_game_stats(game_cols, stats)
        parsed_data.append(stats)
    
    df = pd.DataFrame(parsed_data)
    return df

In [4]:
class MLBIngestHistory():
    
    def __init__(self, begin_date = '2024-03-25', end_date = '2024-06-28'):
        self.begin_date = begin_date
        self.end_date = end_date
        
        mlb = mlbstatsapi.Mlb()
        schedule = mlb.get_schedule(start_date = self.begin_date, end_date = self.end_date)
        self.games = {}
        self.failed_game_pks = []
        for date in schedule.dates:
            print(date)
            for game in date.games:
                try:
                    self.games[game.gamepk] = mlb.get_game(game_id = game.gamepk)
                except:
                    self.failed_game_pks.append(game.gamepk)
                    
        print(f"failed: {self.failed_game_pks}")
        
        self.df_batting_player, self.df_batting_team = self.prepare_batting()
        self.df_pitching_player, self.df_pitching_team = self.prepare_pitching()
        self.df_game_data = self.prepare_games()
        
    def prepare_batting(self):
        dfs_team = []
        dfs_batter = []
        for i,gamepk in enumerate(list(self.games.keys())):

            game = self.games[gamepk]
            game_cols = [game.gamepk
                ,game.gamedata.game.id
                ,game.gamedata.game.gamenumber
                ,game.gamedata.game.season
                ,game.gamedata.datetime.officialdate
                ,game.gamedata.datetime.daynight
                ,game.gamedata.datetime.time
                ,game.gamedata.datetime.ampm
                ,game.gamedata.status.detailedstate]
            list_team_batting = [[ \
                game.livedata.boxscore.teams.home.team.id, \
                game.livedata.boxscore.teams.home.team.name, \
                game.livedata.boxscore.teams.home.teamstats['batting']], \
                [game.livedata.boxscore.teams.away.team.id, \
                game.livedata.boxscore.teams.away.team.name, \
                game.livedata.boxscore.teams.away.teamstats['batting'], \
            ]]

            df_team = parse_batting_teamstats(list_team_batting, game_cols)

            home_data = (game.livedata.boxscore.teams.home.players)
            away_data = (game.livedata.boxscore.teams.away.players)

            home_batters = [home_data[x] for x in home_data]
            away_batters = [away_data[x] for x in away_data]

            home_out = [[x.person.id, x.person.fullname,x.stats['batting']] for x in home_batters if x.stats['batting']!= {}]
            away_out = [[x.person.id, x.person.fullname,x.stats['batting']] for x in away_batters if x.stats['batting']!= {}]

            df_home_batter = parse_input_batter(home_out, game_cols)
            df_away_batter = parse_input_batter(away_out, game_cols)

            df_home_batter['team_type'] = 'home'
            df_home_batter['team_name'] = game.livedata.boxscore.teams.home.team.name
            df_away_batter['team_type'] = 'away'
            df_away_batter['team_name'] = game.livedata.boxscore.teams.away.team.name

            dfs_team = dfs_team+[df_team]
            dfs_batter = dfs_batter+[df_home_batter]+[df_away_batter]
            
        return pd.concat(dfs_batter), pd.concat(dfs_team)
        
    def prepare_pitching(self):
        dfs_team_p = []
        dfs_pitcher = []
        for gamepk in list(self.games.keys()):
            game = self.games[gamepk]
            game_cols = [game.gamepk
                ,game.gamedata.game.id
                ,game.gamedata.game.gamenumber
                ,game.gamedata.game.season
                ,game.gamedata.datetime.officialdate
                ,game.gamedata.datetime.daynight
                ,game.gamedata.datetime.time
                ,game.gamedata.datetime.ampm
                ,game.gamedata.status.detailedstate]

            list_team_pitching = [[ \
                game.livedata.boxscore.teams.home.team.id, \
                game.livedata.boxscore.teams.home.team.name, \
                game.livedata.boxscore.teams.home.teamstats['pitching']], \
                [game.livedata.boxscore.teams.away.team.id, \
                game.livedata.boxscore.teams.away.team.name, \
                game.livedata.boxscore.teams.away.teamstats['pitching'], \
            ]]

            df_team_p = parse_pitching_teamstats(list_team_pitching, game_cols)


            home_data = (game.livedata.boxscore.teams.home.players)
            away_data = (game.livedata.boxscore.teams.away.players)
            list_home_pitchers = [home_data[x] for x in home_data if home_data[x].position.code=='1']
            list_away_pitchers = [away_data[x] for x in away_data if away_data[x].position.code=='1']

            home_out = [[x.person.id, x.person.fullname,x.stats['pitching']] for x in list_home_pitchers if x.stats['pitching']!= {}]
            away_out = [[x.person.id, x.person.fullname,x.stats['pitching']] for x in list_away_pitchers if x.stats['pitching']!= {}]

            df_home_pitcher = parse_input_pitcher(home_out, game_cols)
            df_away_pitcher = parse_input_pitcher(away_out, game_cols)   

            df_home_pitcher['team_type'] = 'home'
            df_home_pitcher['team_name'] = game.livedata.boxscore.teams.home.team.name
            df_away_pitcher['team_type'] = 'away'
            df_away_pitcher['team_name'] = game.livedata.boxscore.teams.away.team.name

            dfs_team_p = dfs_team_p+[df_team_p]
            dfs_pitcher = dfs_pitcher+[df_home_pitcher]+[df_away_pitcher]
            
        return pd.concat(dfs_pitcher), pd.concat(dfs_team_p)
        
    def prepare_games(self):
        game_data = []
        for i,gamepk in enumerate(list(self.games.keys())):

            game = self.games[gamepk]
            away_prob_pitcher = game.gamedata.probablepitchers.away
            home_prob_pitcher = game.gamedata.probablepitchers.home
            if away_prob_pitcher != {}:
                away_prob_pitcher_id = away_prob_pitcher.id
                away_prob_pitcher_fullname = away_prob_pitcher.fullname
            else:
                away_prob_pitcher_id = -999
                away_prob_pitcher_fullname = 'unannounced'   
            if home_prob_pitcher != {}:
                home_prob_pitcher_id = home_prob_pitcher.id
                home_prob_pitcher_fullname = home_prob_pitcher.fullname
            else:
                home_prob_pitcher_id = -999
                home_prob_pitcher_fullname = 'unannounced'  

            game_cols = [game.gamepk
                ,game.gamedata.game.id
                ,game.gamedata.game.gamenumber
                ,game.gamedata.venue.name
                ,game.gamedata.venue.location.city
                ,game.gamedata.venue.location.state
                ,game.gamedata.game.season
                ,game.gamedata.datetime.officialdate
                ,game.gamedata.datetime.daynight
                ,game.gamedata.datetime.time
                ,game.gamedata.datetime.ampm
                ,game.gamedata.teams.away.id
                ,game.gamedata.teams.away.name
                ,game.gamedata.teams.away.league.name
                ,game.gamedata.teams.away.shortname
                ,away_prob_pitcher_id
                ,away_prob_pitcher_fullname
                ,game.gamedata.teams.away.record.wins
                ,game.gamedata.teams.away.record.losses
                ,game.gamedata.teams.away.record.winningpercentage
                ,game.gamedata.teams.home.id
                ,game.gamedata.teams.home.name
                ,game.gamedata.teams.home.league.name
                ,game.gamedata.teams.home.shortname
                ,home_prob_pitcher_id
                ,home_prob_pitcher_fullname
                ,game.gamedata.teams.home.record.wins
                ,game.gamedata.teams.home.record.losses
                ,game.gamedata.teams.home.record.winningpercentage
                ,game.gamedata.status.detailedstate
                ,self.games[game.gamepk].livedata.boxscore.teams.home.teamstats['batting']['runs']
                ,self.games[game.gamepk].livedata.boxscore.teams.away.teamstats['batting']['runs']]
            game_data = game_data + [game_cols]


        # Define column headers
        columns = ['GamePK', 'GameID', 'GameNumber', 'VenueName', 'VenueCity', 'VenueState', 'Season', 'OfficialDate',
                   'DayNight', 'Time', 'AMPM', 'AwayTeamID', 'AwayTeamName', 'AwayLeagueName', 'AwayTeamShortName',
                   'AwayProbPitcherID', 'AwayProbPitcherFullName', 'AwayWins', 'AwayLosses', 'AwayWinningPercentage',
                   'HomeTeamID', 'HomeTeamName', 'HomeLeagueName', 'HomeTeamShortName', 'HomeProbPitcherID',
                   'HomeProbPitcherFullName', 'HomeWins', 'HomeLosses', 'HomeWinningPercentage', 'DetailedState',
                   'HomeRuns', 'AwayRuns']

        return pd.DataFrame(game_data, columns=columns)
    
class MLBIngestScheduled():
    
    def __init__(self, date = '2024-06-29'):
        self.date = date
        
        mlb = mlbstatsapi.Mlb()
        schedule = mlb.get_schedule(start_date = self.date, end_date = self.date)
        self.games = {}
        self.failed_game_pks = []
        for date in schedule.dates:
            for game in date.games:
                try:
                    self.games[game.gamepk] = mlb.get_game(game_id = game.gamepk)
                except:
                    self.failed_game_pks.append(game.gamepk)
                    
        print(f"failed: {self.failed_game_pks}")
        
        self.df_game_data = self.prepare_games()
        
    def prepare_games(self):
        game_data = []
        for i,gamepk in enumerate(list(self.games.keys())):

            game = self.games[gamepk]
            away_prob_pitcher = game.gamedata.probablepitchers.away
            home_prob_pitcher = game.gamedata.probablepitchers.home
            if away_prob_pitcher != {}:
                away_prob_pitcher_id = away_prob_pitcher.id
                away_prob_pitcher_fullname = away_prob_pitcher.fullname
            else:
                away_prob_pitcher_id = -999
                away_prob_pitcher_fullname = 'unannounced'   
            if home_prob_pitcher != {}:
                home_prob_pitcher_id = home_prob_pitcher.id
                home_prob_pitcher_fullname = home_prob_pitcher.fullname
            else:
                home_prob_pitcher_id = -999
                home_prob_pitcher_fullname = 'unannounced'  

            game_cols = [game.gamepk
                ,game.gamedata.game.id
                ,game.gamedata.game.gamenumber
                ,game.gamedata.venue.name
                ,game.gamedata.venue.location.city
                ,game.gamedata.venue.location.state
                ,game.gamedata.game.season
                ,game.gamedata.datetime.officialdate
                ,game.gamedata.datetime.daynight
                ,game.gamedata.datetime.time
                ,game.gamedata.datetime.ampm
                ,game.gamedata.teams.away.id
                ,game.gamedata.teams.away.name
                ,game.gamedata.teams.away.league.name
                ,game.gamedata.teams.away.shortname
                ,away_prob_pitcher_id
                ,away_prob_pitcher_fullname
                ,game.gamedata.teams.away.record.wins
                ,game.gamedata.teams.away.record.losses
                ,game.gamedata.teams.away.record.winningpercentage
                ,game.gamedata.teams.home.id
                ,game.gamedata.teams.home.name
                ,game.gamedata.teams.home.league.name
                ,game.gamedata.teams.home.shortname
                ,home_prob_pitcher_id
                ,home_prob_pitcher_fullname
                ,game.gamedata.teams.home.record.wins
                ,game.gamedata.teams.home.record.losses
                ,game.gamedata.teams.home.record.winningpercentage
                ,game.gamedata.status.detailedstate
                ,self.games[game.gamepk].livedata.boxscore.teams.home.teamstats['batting']['runs']
                ,self.games[game.gamepk].livedata.boxscore.teams.away.teamstats['batting']['runs']]
            game_data = game_data + [game_cols]


        # Define column headers
        columns = ['GamePK', 'GameID', 'GameNumber', 'VenueName', 'VenueCity', 'VenueState', 'Season', 'OfficialDate',
                   'DayNight', 'Time', 'AMPM', 'AwayTeamID', 'AwayTeamName', 'AwayLeagueName', 'AwayTeamShortName',
                   'AwayProbPitcherID', 'AwayProbPitcherFullName', 'AwayWins', 'AwayLosses', 'AwayWinningPercentage',
                   'HomeTeamID', 'HomeTeamName', 'HomeLeagueName', 'HomeTeamShortName', 'HomeProbPitcherID',
                   'HomeProbPitcherFullName', 'HomeWins', 'HomeLosses', 'HomeWinningPercentage', 'DetailedState',
                   'HomeRuns', 'AwayRuns']

        return pd.DataFrame(game_data, columns=columns)

In [5]:
year = '2021'

In [6]:
# mlb_apr = MLBIngestHistory(begin_date = f'{year}-04-01', end_date = f'{year}-04-30')

# mlb_apr.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb_apr.begin_date.replace('-','')}_{mlb_apr.end_date.replace('-','')}.csv")
# mlb_apr.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb_apr.begin_date.replace('-','')}_{mlb_apr.end_date.replace('-','')}.csv")
# mlb_apr.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb_apr.begin_date.replace('-','')}_{mlb_apr.end_date.replace('-','')}.csv")
# mlb_apr.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb_apr.begin_date.replace('-','')}_{mlb_apr.end_date.replace('-','')}.csv")
# mlb_apr.df_game_data.to_csv(f"mlb_games_history_{mlb_apr.begin_date.replace('-','')}_{mlb_apr.end_date.replace('-','')}.csv")
# del mlb_apr

In [7]:
# mlb_may = MLBIngestHistory(begin_date = f'{year}-05-01', end_date = f'{year}-05-31')
# mlb_may.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb_may.begin_date.replace('-','')}_{mlb_may.end_date.replace('-','')}.csv")
# mlb_may.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb_may.begin_date.replace('-','')}_{mlb_may.end_date.replace('-','')}.csv")
# mlb_may.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb_may.begin_date.replace('-','')}_{mlb_may.end_date.replace('-','')}.csv")
# mlb_may.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb_may.begin_date.replace('-','')}_{mlb_may.end_date.replace('-','')}.csv")
# mlb_may.df_game_data.to_csv(f"mlb_games_history_{mlb_may.begin_date.replace('-','')}_{mlb_may.end_date.replace('-','')}.csv")
# del mlb_may

In [6]:
# mlb_jun = MLBIngestHistory(begin_date = f'{year}-06-01', end_date = f'{year}-06-30')
# mlb_jun.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb_jun.begin_date.replace('-','')}_{mlb_jun.end_date.replace('-','')}.csv")
# mlb_jun.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb_jun.begin_date.replace('-','')}_{mlb_jun.end_date.replace('-','')}.csv")
# mlb_jun.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb_jun.begin_date.replace('-','')}_{mlb_jun.end_date.replace('-','')}.csv")
# mlb_jun.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb_jun.begin_date.replace('-','')}_{mlb_jun.end_date.replace('-','')}.csv")
# mlb_jun.df_game_data.to_csv(f"mlb_games_history_{mlb_jun.begin_date.replace('-','')}_{mlb_jun.end_date.replace('-','')}.csv")

ScheduleDates(date=2021-06-01, totalgames=15)
ScheduleDates(date=2021-06-02, totalgames=12)
ScheduleDates(date=2021-06-03, totalgames=12)
ScheduleDates(date=2021-06-04, totalgames=15)
ScheduleDates(date=2021-06-05, totalgames=15)
ScheduleDates(date=2021-06-06, totalgames=15)
ScheduleDates(date=2021-06-07, totalgames=3)
ScheduleDates(date=2021-06-08, totalgames=15)
ScheduleDates(date=2021-06-09, totalgames=15)
ScheduleDates(date=2021-06-10, totalgames=10)
ScheduleDates(date=2021-06-11, totalgames=14)
ScheduleDates(date=2021-06-12, totalgames=16)
ScheduleDates(date=2021-06-13, totalgames=15)
ScheduleDates(date=2021-06-14, totalgames=13)
ScheduleDates(date=2021-06-15, totalgames=15)
ScheduleDates(date=2021-06-16, totalgames=15)
ScheduleDates(date=2021-06-17, totalgames=10)
ScheduleDates(date=2021-06-18, totalgames=15)
ScheduleDates(date=2021-06-19, totalgames=16)
ScheduleDates(date=2021-06-20, totalgames=16)
ScheduleDates(date=2021-06-21, totalgames=8)
ScheduleDates(date=2021-06-22, total

In [None]:
# del mlb_jun

In [6]:
# mlb_jul = MLBIngestHistory(begin_date = f'{year}-07-01', end_date = f'{year}-07-31')
# mlb_jul.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb_jul.begin_date.replace('-','')}_{mlb_jul.end_date.replace('-','')}.csv")
# mlb_jul.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb_jul.begin_date.replace('-','')}_{mlb_jul.end_date.replace('-','')}.csv")
# mlb_jul.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb_jul.begin_date.replace('-','')}_{mlb_jul.end_date.replace('-','')}.csv")
# mlb_jul.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb_jul.begin_date.replace('-','')}_{mlb_jul.end_date.replace('-','')}.csv")
# mlb_jul.df_game_data.to_csv(f"mlb_games_history_{mlb_jul.begin_date.replace('-','')}_{mlb_jul.end_date.replace('-','')}.csv")

ScheduleDates(date=2021-07-01, totalgames=13)
ScheduleDates(date=2021-07-02, totalgames=15)
ScheduleDates(date=2021-07-03, totalgames=15)
ScheduleDates(date=2021-07-04, totalgames=16)
ScheduleDates(date=2021-07-05, totalgames=11)
ScheduleDates(date=2021-07-06, totalgames=15)
ScheduleDates(date=2021-07-07, totalgames=17)
ScheduleDates(date=2021-07-08, totalgames=11)
ScheduleDates(date=2021-07-09, totalgames=15)
ScheduleDates(date=2021-07-10, totalgames=16)
ScheduleDates(date=2021-07-11, totalgames=15)
ScheduleDates(date=2021-07-13, totalgames=1)
ScheduleDates(date=2021-07-15, totalgames=1)
ScheduleDates(date=2021-07-16, totalgames=17)
ScheduleDates(date=2021-07-17, totalgames=16)
ScheduleDates(date=2021-07-18, totalgames=18)
ScheduleDates(date=2021-07-19, totalgames=13)
ScheduleDates(date=2021-07-20, totalgames=15)
ScheduleDates(date=2021-07-21, totalgames=15)
ScheduleDates(date=2021-07-22, totalgames=9)
ScheduleDates(date=2021-07-23, totalgames=15)
ScheduleDates(date=2021-07-24, totalg

In [11]:
# del mlb_jul

In [6]:
# mlb_aug = MLBIngestHistory(begin_date = f'{year}-08-01', end_date = f'{year}-08-31')
# mlb_aug.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb_aug.begin_date.replace('-','')}_{mlb_aug.end_date.replace('-','')}.csv")
# mlb_aug.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb_aug.begin_date.replace('-','')}_{mlb_aug.end_date.replace('-','')}.csv")
# mlb_aug.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb_aug.begin_date.replace('-','')}_{mlb_aug.end_date.replace('-','')}.csv")
# mlb_aug.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb_aug.begin_date.replace('-','')}_{mlb_aug.end_date.replace('-','')}.csv")
# mlb_aug.df_game_data.to_csv(f"mlb_games_history_{mlb_aug.begin_date.replace('-','')}_{mlb_aug.end_date.replace('-','')}.csv")
# del mlb_aug

ScheduleDates(date=2021-08-01, totalgames=15)
ScheduleDates(date=2021-08-02, totalgames=8)
ScheduleDates(date=2021-08-03, totalgames=15)
ScheduleDates(date=2021-08-04, totalgames=15)
ScheduleDates(date=2021-08-05, totalgames=12)
ScheduleDates(date=2021-08-06, totalgames=15)
ScheduleDates(date=2021-08-07, totalgames=16)
ScheduleDates(date=2021-08-08, totalgames=15)
ScheduleDates(date=2021-08-09, totalgames=5)
ScheduleDates(date=2021-08-10, totalgames=17)
ScheduleDates(date=2021-08-11, totalgames=16)
ScheduleDates(date=2021-08-12, totalgames=14)
ScheduleDates(date=2021-08-13, totalgames=14)
ScheduleDates(date=2021-08-14, totalgames=16)
ScheduleDates(date=2021-08-15, totalgames=15)
ScheduleDates(date=2021-08-16, totalgames=10)
ScheduleDates(date=2021-08-17, totalgames=16)
ScheduleDates(date=2021-08-18, totalgames=15)


Response ended prematurely


ScheduleDates(date=2021-08-19, totalgames=10)
ScheduleDates(date=2021-08-20, totalgames=15)
ScheduleDates(date=2021-08-21, totalgames=15)
ScheduleDates(date=2021-08-22, totalgames=15)
ScheduleDates(date=2021-08-23, totalgames=7)
ScheduleDates(date=2021-08-24, totalgames=15)
ScheduleDates(date=2021-08-25, totalgames=14)
ScheduleDates(date=2021-08-26, totalgames=12)
ScheduleDates(date=2021-08-27, totalgames=15)
ScheduleDates(date=2021-08-28, totalgames=15)
ScheduleDates(date=2021-08-29, totalgames=14)
ScheduleDates(date=2021-08-30, totalgames=11)
ScheduleDates(date=2021-08-31, totalgames=16)
failed: [632850, 632730]


In [6]:
mlb_sep = MLBIngestHistory(begin_date = f'{year}-09-01', end_date = f'{year}-09-30')
mlb_sep.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb_sep.begin_date.replace('-','')}_{mlb_sep.end_date.replace('-','')}.csv")
mlb_sep.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb_sep.begin_date.replace('-','')}_{mlb_sep.end_date.replace('-','')}.csv")
mlb_sep.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb_sep.begin_date.replace('-','')}_{mlb_sep.end_date.replace('-','')}.csv")
mlb_sep.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb_sep.begin_date.replace('-','')}_{mlb_sep.end_date.replace('-','')}.csv")
mlb_sep.df_game_data.to_csv(f"mlb_games_history_{mlb_sep.begin_date.replace('-','')}_{mlb_sep.end_date.replace('-','')}.csv")
del mlb_sep

ScheduleDates(date=2021-09-01, totalgames=16)
ScheduleDates(date=2021-09-02, totalgames=8)
ScheduleDates(date=2021-09-03, totalgames=15)
ScheduleDates(date=2021-09-04, totalgames=16)
ScheduleDates(date=2021-09-05, totalgames=15)
ScheduleDates(date=2021-09-06, totalgames=12)
ScheduleDates(date=2021-09-07, totalgames=15)
ScheduleDates(date=2021-09-08, totalgames=15)
ScheduleDates(date=2021-09-09, totalgames=8)
ScheduleDates(date=2021-09-10, totalgames=15)
ScheduleDates(date=2021-09-11, totalgames=16)
ScheduleDates(date=2021-09-12, totalgames=15)
ScheduleDates(date=2021-09-13, totalgames=8)
ScheduleDates(date=2021-09-14, totalgames=16)
ScheduleDates(date=2021-09-15, totalgames=15)
ScheduleDates(date=2021-09-16, totalgames=9)
ScheduleDates(date=2021-09-17, totalgames=15)
ScheduleDates(date=2021-09-18, totalgames=15)
ScheduleDates(date=2021-09-19, totalgames=15)
ScheduleDates(date=2021-09-20, totalgames=12)
ScheduleDates(date=2021-09-21, totalgames=15)
ScheduleDates(date=2021-09-22, totalga

In [7]:
mlb_oct = MLBIngestHistory(begin_date = f'{year}-10-01', end_date = f'{year}-11-01')
mlb_oct.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb_oct.begin_date.replace('-','')}_{mlb_oct.end_date.replace('-','')}.csv")
mlb_oct.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb_oct.begin_date.replace('-','')}_{mlb_oct.end_date.replace('-','')}.csv")
mlb_oct.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb_oct.begin_date.replace('-','')}_{mlb_oct.end_date.replace('-','')}.csv")
mlb_oct.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb_oct.begin_date.replace('-','')}_{mlb_oct.end_date.replace('-','')}.csv")
mlb_oct.df_game_data.to_csv(f"mlb_games_history_{mlb_oct.begin_date.replace('-','')}_{mlb_oct.end_date.replace('-','')}.csv")

del mlb_oct

ScheduleDates(date=2021-10-01, totalgames=15)
ScheduleDates(date=2021-10-02, totalgames=15)
ScheduleDates(date=2021-10-03, totalgames=15)
ScheduleDates(date=2021-10-05, totalgames=1)
ScheduleDates(date=2021-10-06, totalgames=1)
ScheduleDates(date=2021-10-07, totalgames=2)
ScheduleDates(date=2021-10-08, totalgames=4)
ScheduleDates(date=2021-10-09, totalgames=2)
ScheduleDates(date=2021-10-10, totalgames=2)
ScheduleDates(date=2021-10-11, totalgames=4)
ScheduleDates(date=2021-10-12, totalgames=3)
ScheduleDates(date=2021-10-14, totalgames=1)
ScheduleDates(date=2021-10-15, totalgames=1)
ScheduleDates(date=2021-10-16, totalgames=2)
ScheduleDates(date=2021-10-17, totalgames=1)
ScheduleDates(date=2021-10-18, totalgames=1)
ScheduleDates(date=2021-10-19, totalgames=2)
ScheduleDates(date=2021-10-20, totalgames=2)
ScheduleDates(date=2021-10-21, totalgames=1)
ScheduleDates(date=2021-10-22, totalgames=1)
ScheduleDates(date=2021-10-23, totalgames=1)
ScheduleDates(date=2021-10-26, totalgames=1)
Schedul

In [5]:
mlb = MLBIngestHistory(begin_date = '2024-07-02', end_date = '2024-07-02')
mlb.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
mlb.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
mlb.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
mlb.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
mlb.df_game_data.to_csv(f"mlb_games_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")

ScheduleDates(date=2024-07-02, totalgames=15)
failed: []


In [6]:
# mlb = MLBIngestHistory(begin_date = '2024-03-25', end_date = '2024-04-30')
# mlb.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
# mlb.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
# mlb.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
# mlb.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")
# mlb.df_game_data.to_csv(f"mlb_games_history_{mlb.begin_date.replace('-','')}_{mlb.end_date.replace('-','')}.csv")

ScheduleDates(date=2024-03-25, totalgames=14)
ScheduleDates(date=2024-03-26, totalgames=11)
ScheduleDates(date=2024-03-28, totalgames=15)
ScheduleDates(date=2024-03-29, totalgames=10)
ScheduleDates(date=2024-03-30, totalgames=15)
ScheduleDates(date=2024-03-31, totalgames=15)
ScheduleDates(date=2024-04-01, totalgames=14)
ScheduleDates(date=2024-04-02, totalgames=14)
ScheduleDates(date=2024-04-03, totalgames=15)
ScheduleDates(date=2024-04-04, totalgames=6)
ScheduleDates(date=2024-04-05, totalgames=13)
ScheduleDates(date=2024-04-06, totalgames=15)
ScheduleDates(date=2024-04-07, totalgames=15)
ScheduleDates(date=2024-04-08, totalgames=13)
ScheduleDates(date=2024-04-09, totalgames=15)
ScheduleDates(date=2024-04-10, totalgames=14)
ScheduleDates(date=2024-04-11, totalgames=7)
ScheduleDates(date=2024-04-12, totalgames=15)
ScheduleDates(date=2024-04-13, totalgames=17)
ScheduleDates(date=2024-04-14, totalgames=15)
ScheduleDates(date=2024-04-15, totalgames=15)
ScheduleDates(date=2024-04-16, total

In [7]:
# del mlb

In [8]:
# mlb2 = MLBIngestHistory(begin_date = '2024-05-01', end_date = '2024-05-31')
# mlb2.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb2.begin_date.replace('-','')}_{mlb2.end_date.replace('-','')}.csv")
# mlb2.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb2.begin_date.replace('-','')}_{mlb2.end_date.replace('-','')}.csv")
# mlb2.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb2.begin_date.replace('-','')}_{mlb2.end_date.replace('-','')}.csv")
# mlb2.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb2.begin_date.replace('-','')}_{mlb2.end_date.replace('-','')}.csv")
# mlb2.df_game_data.to_csv(f"mlb_games_history_{mlb2.begin_date.replace('-','')}_{mlb2.end_date.replace('-','')}.csv")

ScheduleDates(date=2024-05-01, totalgames=15)
ScheduleDates(date=2024-05-02, totalgames=6)
ScheduleDates(date=2024-05-03, totalgames=15)
ScheduleDates(date=2024-05-04, totalgames=15)
ScheduleDates(date=2024-05-05, totalgames=15)
ScheduleDates(date=2024-05-06, totalgames=10)
ScheduleDates(date=2024-05-07, totalgames=15)
ScheduleDates(date=2024-05-08, totalgames=16)
ScheduleDates(date=2024-05-09, totalgames=7)
ScheduleDates(date=2024-05-10, totalgames=15)
ScheduleDates(date=2024-05-11, totalgames=15)
ScheduleDates(date=2024-05-12, totalgames=15)
ScheduleDates(date=2024-05-13, totalgames=14)
ScheduleDates(date=2024-05-14, totalgames=16)
ScheduleDates(date=2024-05-15, totalgames=15)
ScheduleDates(date=2024-05-16, totalgames=6)
ScheduleDates(date=2024-05-17, totalgames=15)
ScheduleDates(date=2024-05-18, totalgames=15)
ScheduleDates(date=2024-05-19, totalgames=15)
ScheduleDates(date=2024-05-20, totalgames=12)
ScheduleDates(date=2024-05-21, totalgames=15)
ScheduleDates(date=2024-05-22, totalg

In [9]:
# del mlb2

In [10]:
# mlb3 = MLBIngestHistory(begin_date = '2024-06-01', end_date = '2024-06-28')
# mlb3.df_batting_player.to_csv(f"mlb_individual_batting_history_{mlb3.begin_date.replace('-','')}_{mlb3.end_date.replace('-','')}.csv")
# mlb3.df_batting_team.to_csv(f"mlb_team_batting_history_{mlb3.begin_date.replace('-','')}_{mlb3.end_date.replace('-','')}.csv")
# mlb3.df_pitching_player.to_csv(f"mlb_individual_pitching_history_{mlb3.begin_date.replace('-','')}_{mlb3.end_date.replace('-','')}.csv")
# mlb3.df_pitching_team.to_csv(f"mlb_team_pitching_history_{mlb3.begin_date.replace('-','')}_{mlb3.end_date.replace('-','')}.csv")
# mlb3.df_game_data.to_csv(f"mlb_games_history_{mlb3.begin_date.replace('-','')}_{mlb3.end_date.replace('-','')}.csv")

ScheduleDates(date=2024-06-01, totalgames=15)
ScheduleDates(date=2024-06-02, totalgames=15)
ScheduleDates(date=2024-06-03, totalgames=8)
ScheduleDates(date=2024-06-04, totalgames=15)
ScheduleDates(date=2024-06-05, totalgames=15)
ScheduleDates(date=2024-06-06, totalgames=10)
ScheduleDates(date=2024-06-07, totalgames=14)
ScheduleDates(date=2024-06-08, totalgames=15)
ScheduleDates(date=2024-06-09, totalgames=15)
ScheduleDates(date=2024-06-10, totalgames=7)
ScheduleDates(date=2024-06-11, totalgames=15)
ScheduleDates(date=2024-06-12, totalgames=15)
ScheduleDates(date=2024-06-13, totalgames=11)
ScheduleDates(date=2024-06-14, totalgames=15)
ScheduleDates(date=2024-06-15, totalgames=15)
ScheduleDates(date=2024-06-16, totalgames=16)
ScheduleDates(date=2024-06-17, totalgames=9)
ScheduleDates(date=2024-06-18, totalgames=15)
ScheduleDates(date=2024-06-19, totalgames=15)
ScheduleDates(date=2024-06-20, totalgames=9)
ScheduleDates(date=2024-06-21, totalgames=14)
ScheduleDates(date=2024-06-22, totalga

In [6]:
mlbs = MLBIngestScheduled(date = '2024-07-03')
mlbs.df_game_data.to_csv(f"mlb_games_scheduled_{mlbs.date.replace('-','')}")

failed: []
