In [None]:
# import sys
# !{sys.executable} -m pip install --upgrade pip
# !{sys.executable} -m pip install pandas pybaseball

In [1]:
import pandas as pd
from pybaseball import team_pitching, team_batting, pitching_stats, batting_stats, playerid_reverse_lookup

# Collecting baseball data

This model will be using the following statistics (for each of home and away teams) in each game:

* Team name
* Team xwOBA differential
* Lineup xwOBA (for the 9 hitters in the starting lineup)
* Team xFIP
* Starting pitcher xFIP
* Win percentage in 1-run games
* Season run differential

To predict the following outcomes:

* Runs scored
* ... and therefore, the game's winner

All numerical data will be from the past 3 seasons.

Data are provided by Retrosheet, Baseball Savant, and FanGraphs (using pybaseball).

### Team code information

Since each data source uses different team codes (for example, the Chicago White Sox are represented by "CHA" in Retrosheet, "CWS" in Baseball Savant, and "CHW" in FanGraphs), I created some helpful dictionaries to convert from one style to the other.

In [2]:
# convert team codes from retrosheet to baseball savant style

rs_to_bs = {
    'HOU': 'HOU',
    'TEX': 'TEX',
    'ANA': 'LAA',
    'OAK': 'OAK',
    'SEA': 'SEA',

    'KCA': 'KC',
    'DET': 'DET',
    'CLE': 'CLE',
    'MIN': 'MIN',
    'CHA': 'CWS',

    'NYA': 'NYY',
    'BOS': 'BOS',
    'TBA': 'TB',
    'TOR': 'TOR',
    'BAL': 'BAL',
    
    'LAN': 'LAD',
    'SDN': 'SD',
    'COL': 'COL',
    'ARI': 'AZ',
    'SFN': 'SF',
    
    'MIL': 'MIL',
    'PIT': 'PIT',
    'SLN': 'STL',
    'CIN': 'CIN',
    'CHN': 'CHC',

    'NYN': 'NYM',
    'ATL': 'ATL',
    'PHI': 'PHI',
    'WAS': 'WSH',
    'MIA': 'MIA',
}

In [3]:
# convert team codes from fangraphs to baseball savant style

fg_to_bs = {
    'HOU': 'HOU',
    'TEX': 'TEX',
    'LAA': 'LAA',
    'OAK': 'OAK',
    'SEA': 'SEA',

    'KCR': 'KC',
    'DET': 'DET',
    'CLE': 'CLE',
    'MIN': 'MIN',
    'CHW': 'CWS',

    'NYY': 'NYY',
    'BOS': 'BOS',
    'TBR': 'TB',
    'TOR': 'TOR',
    'BAL': 'BAL',
    
    'LAD': 'LAD',
    'SDP': 'SD',
    'COL': 'COL',
    'ARI': 'AZ',
    'SFG': 'SF',
    
    'MIL': 'MIL',
    'PIT': 'PIT',
    'STL': 'STL',
    'CIN': 'CIN',
    'CHC': 'CHC',

    'NYM': 'NYM',
    'ATL': 'ATL',
    'PHI': 'PHI',
    'WSN': 'WSH',
    'MIA': 'MIA',
}

In [4]:
# convert team codes from retrosheet to fangraphs style

rs_to_fg = {
    'HOU': 'HOU',
    'TEX': 'TEX',
    'ANA': 'LAA',
    'OAK': 'OAK',
    'SEA': 'SEA',

    'KCA': 'KCR',
    'DET': 'DET',
    'CLE': 'CLE',
    'MIN': 'MIN',
    'CHA': 'CHW',

    'NYA': 'NYY',
    'BOS': 'BOS',
    'TBA': 'TBR',
    'TOR': 'TOR',
    'BAL': 'BAL',
    
    'LAN': 'LAD',
    'SDN': 'SDP',
    'COL': 'COL',
    'ARI': 'ARI',
    'SFN': 'SFG',
    
    'MIL': 'MIL',
    'PIT': 'PIT',
    'SLN': 'STL',
    'CIN': 'CIN',
    'CHN': 'CHC',

    'NYN': 'NYM',
    'ATL': 'ATL',
    'PHI': 'PHI',
    'WAS': 'WSN',
    'MIA': 'MIA',
}

### Player code information

Because player names can be entered differently in different data sources (especially regarding the use of accent markings, suffixes, abbreviated names, and hyphenated names), I created a csv file containing each player's name and IDs for each data source.

In [15]:
def fill_player_info(df, data, id_col='player_id', key_type='mlbam'):
    for index, row in df.iterrows():
        player_id = row[id_col]

        try:
            player = playerid_reverse_lookup([player_id], key_type=key_type).iloc[0]
            
            player_info['last_name'].append(player['name_last'])
            player_info['first_name'].append(player['name_first'])
            player_info['mlbam_id'].append(player['key_mlbam'])
            player_info['retrosheet_id'].append(player['key_retro'])
            player_info['fangraphs_id'].append(player['key_fangraphs'])
        except:
            print(player_id)

In [None]:
### DON'T RUN AGAIN!!!

player_info = {
    'last_name': [],
    'first_name': [],
    'mlbam_id': [],
    'retrosheet_id': [],
    'fangraphs_id': []
}


player_xwoba_df = pd.read_csv('./data/baseball/stats/player_xwoba.csv')
fill_player_info(player_xwoba_df, player_info)

player_xfip_df = pd.read_csv('./data/baseball/stats/player_xfip.csv')
fill_player_info(player_xfip_df, player_info, id_col='IDfg', key_type='fangraphs')

player_info_df = pd.DataFrame(player_info)
player_info_df.to_csv('./data/baseball/player_info.csv', index=False)

### Rookie statistics

As shown below, about 30% of all games from 2017 to 2023 have rookies playing in them, which is too much to throw away from incomplete data. Therefore, players who do not have any records for the past 3 seasons will be assumed to have a "rookie average" value. Rookie average stats will be calculated here and saved to a csv file for future access.

For the purposes of this research, a rookie is defined as any player who has never played prior to a certain season. This is different from MLB's definition of a rookie, outlined here: https://www.mlb.com/glossary/rules/rookie-eligibility

In [50]:
print(f'Games with rookies playing: {games_with_rookies}')
print(f'Total games: {total_games}')
print(f'Percentage: {games_with_rookies / total_games * 100}%')

Games with rookies playing: 4619
Total games: 15477
Percentage: 29.844285068165664%


In [20]:
def avg_sample_size(sizes):
    if len(sizes) == 0:
        return 0
        
    total = 0

    for s in sizes:
        total += s

    return total / len(sizes)

In [21]:
def generate_rookie_stats(data, df, stat, sample_size, year_col='Season', name_col='Name', start_year=2015):
    df = df.sort_values(year_col)
    
    current_year = start_year
    stats = []
    sizes = []
    
    for index, row in df.iterrows():
        if row[year_col] == start_year:
            continue
    
        if row[year_col] != current_year:
            data['stat_type'].append(stat)
            data['year'].append(current_year)
            data['value'].append(wavg_sample_size(stats, sizes))
            data['avg_sample_size'].append(avg_sample_size(sizes))
            
            current_year = row[year_col]
            avg_stats = []
            avg_sizes = []
    
        is_rookie = len(df.loc[df[name_col] == row[name_col]].loc[df[year_col] < row[year_col]]) == 0
    
        if is_rookie:
            stats.append(row[stat])
            sizes.append(row[sample_size])
    
    rookie_trad_stats['stat_type'].append(stat)
    rookie_trad_stats['year'].append(current_year)
    rookie_trad_stats['value'].append(wavg_sample_size(stats, sizes))
    rookie_trad_stats['avg_sample_size'].append(avg_sample_size(sizes))

In [25]:
### DON'T RUN AGAIN!!!

rookie_stats = {
    'stat_type': [],
    'year': [],
    'value': [],
    'avg_sample_size': []
}


player_xwoba_df = pd.read_csv('./data/baseball/stats/player_xwoba.csv')
generate_rookie_stats(rookie_stats, player_xwoba_df, 'xwoba', 'abs', year_col='year', name_col='player_name')

player_xfip_df = pd.read_csv('./data/baseball/stats/player_xfip.csv')
generate_rookie_stats(rookie_stats, player_xfip_df, 'xFIP', 'IP')

rookie_stats_df = pd.DataFrame(rookie_stats)
rookie_stats_df.to_csv('./data/baseball/stats/rookie_stats.csv', index=False)

In [25]:
### DON'T RUN AGAIN!!!

rookie_trad_stats = {
    'stat_type': [],
    'year': [],
    'value': [],
    'avg_sample_size': []
}


player_trad_batting_df = pd.read_csv('./data/baseball/stats/player_trad_batting.csv')
generate_rookie_stats(rookie_trad_stats, player_trad_batting_df, 'OPS', 'PA')

player_trad_pitching_df = pd.read_csv('./data/baseball/stats/player_trad_pitching.csv')
generate_rookie_stats(rookie_trad_stats, player_trad_pitching_df, 'ERA', 'IP')

rookie_trad_stats_df = pd.DataFrame(rookie_trad_stats)
rookie_trad_stats_df.to_csv('./data/baseball/stats/rookie_trad_stats.csv', index=False)

## Compiling team metrics

The following code will calculate the win percentage in 1-run games and the season run differential after traversing through game logs from Retrosheet. The compiled data is then saved to a csv file for later use.

In [6]:
### DON'T RUN AGAIN!!!

team_metrics = {
    'team': [],
    'year': [],
    'close_games': [],
    'close_win_pct': [],
    'runs_scored': [],
    'runs_allowed': [],
    'run_diff': []
}


for year in range(2015, 2024): # collect data for each individual season from 2015-2023
    gamelog_df = pd.read_csv(f'./data/baseball/gamelogs/gl{year}.txt')

    for team in rs_to_bs:
        total_rs = 0
        total_ra = 0
        close_wins = 0
        close_losses = 0
        
        for index, row in gamelog_df.iterrows():
            if row['away_team'] != team and row['home_team'] != team:
                continue

            if row['away_team'] == team:
                rs = row['away_score']
                ra = row['home_score']
            
            if row['home_team'] == team:
                rs = row['home_score']
                ra = row['away_score']
                
            total_rs += rs
            total_ra += ra

            if abs(rs - ra) <= 1:
                if rs > ra:
                    close_wins += 1
                
                if ra > rs:
                    close_losses += 1

        team_metrics['team'].append(rs_to_bs[team])
        team_metrics['year'].append(year)
        team_metrics['close_games'].append(close_wins + close_losses)
        team_metrics['close_win_pct'].append(close_wins / (close_wins + close_losses))
        team_metrics['runs_scored'].append(total_rs)
        team_metrics['runs_allowed'].append(total_ra)
        team_metrics['run_diff'].append(total_rs - total_ra)

In [7]:
team_metrics_df = pd.DataFrame(team_metrics)
team_metrics_df.to_csv('./data/baseball/stats/team_metrics.csv', index=False)

## Compiling batting data

The following code will calculate the team xwOBA differential after traversing through the data for team xwOBA and team xwOBA against from Baseball Savant. The compiled data is then saved to a csv file for later use.

In [21]:
### DON'T RUN AGAIN!!!

team_xwoba_diff = {
    'team': [],
    'year': [],
    'xwoba_diff': [],
}


team_xwoba_df = pd.read_csv('./data/baseball/stats/team_xwoba.csv')
team_xwoba_against_df = pd.read_csv('./data/baseball/stats/team_xwoba_against.csv')


for index1, row1 in team_xwoba_df.iterrows():
    for index2, row2 in team_xwoba_against_df.iterrows():
        if row1['player_name'] != row2['player_name'] or row1['year'] != row2['year']:
            continue

        team_xwoba_diff['team'].append(row1['player_name'])
        team_xwoba_diff['year'].append(row1['year'])
        team_xwoba_diff['xwoba_diff'].append(row1['xwoba'] - row2['xwoba'])

In [24]:
team_xwoba_diff_df = pd.DataFrame(team_xwoba_diff)
team_xwoba_diff_df.to_csv('./data/baseball/stats/team_xwoba_diff.csv', index=False)

## Compiling pitching data

The following code will retrieve the data for team and player xFIP from FanGraphs, using pybaseball. The compiled data is then saved to a csv file for later use.

https://github.com/jldbc/pybaseball

In [12]:
### DON'T RUN AGAIN!!!

team_xfip_df = team_pitching(2015, 2024)
team_xfip_df.to_csv('./data/baseball/stats/team_xfip.csv', index=False)

player_xfip_df = pitching_stats(2015, 2024, qual=20)
player_xfip_df.to_csv('./data/baseball/stats/player_xfip.csv', index=False)

## Compiling all of the data

The following code will compile all of the data into one big csv file containing information about each game from 2017 to 2023.

In [45]:
team_xwoba_diff_df = pd.read_csv('./data/baseball/stats/team_xwoba_diff.csv')
player_xwoba_df = pd.read_csv('./data/baseball/stats/player_xwoba.csv')
team_xfip_df = pd.read_csv('./data/baseball/stats/team_xfip.csv')
player_xfip_df = pd.read_csv('./data/baseball/stats/player_xfip.csv')
team_metrics_df = pd.read_csv('./data/baseball/stats/team_metrics.csv')
player_info_df = pd.read_csv('./data/baseball/player_info.csv')
rookie_stats_df = pd.read_csv('./data/baseball/stats/rookie_stats.csv')

In [47]:
team_trad_df = pd.read_csv('./data/baseball/stats/team_trad.csv')
player_trad_batting_df = pd.read_csv('./data/baseball/stats/player_trad_batting.csv')
player_trad_pitching_df = pd.read_csv('./data/baseball/stats/player_trad_pitching.csv')
rookie_stats_df = pd.read_csv('./data/baseball/stats/rookie_trad_stats.csv')

#### Assigning weights to past data

Not all data should be counted the same; stats from last season should count more than stats from three years ago, and stats from an injury-shortened season shouldn't count as much as a fully healthy season.

In [22]:
# assign weights based on recency (60%, 25%, 15%)
def wavg_recency(stats):
    return stats[2] * 0.6 + stats[1] * 0.25 + stats[0] * 0.15

In [23]:
# assign weights based on playing time (sample size for statistic)
def wavg_sample_size(stats, sizes):
    total = 0
    avg_stat = 0
    
    for s in sizes:
        total += s

    for i in range(len(stats)):
        avg_stat += stats[i] * (sizes[i] / total)

    return avg_stat

In [24]:
# assign weights based on recency and playing time
def wavg_combined(stats, sizes):
    rec = wavg_recency(stats)
    siz = wavg_sample_size(stats, sizes)

    return (rec + rec + siz) / 3

#### Helper functions for compiling data

In [42]:
def get_team_stat(year, team, df, stat, team_col='team', year_col='year'):
    data_year1 = df[stat].loc[df[team_col] == team].loc[df[year_col] == year - 2].iloc[0]
    data_year2 = df[stat].loc[df[team_col] == team].loc[df[year_col] == year - 1].iloc[0]
    data_year3 = df[stat].loc[df[team_col] == team].loc[df[year_col] == year].iloc[0]

    return wavg_recency([data_year1, data_year2, data_year3])

In [43]:
def get_lineup_stat(year, lineup, df, stat, sample_size, id_type='mlbam_id', id_col='player_id', year_col='year'):
    total_xwoba = 0
    
    rookie_stat = rookie_stats_df['value'].loc[rookie_stats_df['year'] == year].loc[rookie_stats_df['stat_type'] == stat].iloc[0]
    rookie_sample_size = rookie_stats_df['avg_sample_size'].loc[rookie_stats_df['year'] == year].loc[rookie_stats_df['stat_type'] == stat].iloc[0]
    
    for hitter in lineup:
        _id = player_info_df[id_type].loc[player_info_df['retrosheet_id'] == hitter]
        
        if len(_id) == 0: # player doesn't have past records (probably a rookie)
            total_xwoba += rookie_stat
            continue
            
        _id = _id.iloc[0]
        
        stats = []
        sizes = []
        is_rookie = []
        
        for i in range(-2, 1):
            player_data = df[[stat, sample_size]].loc[df[id_col] == _id].loc[df[year_col] == year + i]
            
            if len(player_data) == 0:
                stats.append(rookie_stat)
                sizes.append(rookie_sample_size)
                is_rookie.append(True)
            else:
                stats.append(player_data[stat].iloc[0])
                sizes.append(player_data[sample_size].iloc[0])

                if len(is_rookie) > 0 and is_rookie[0]:
                    stats[0] = stats[-1]
                    sizes[0] = sizes[-1]

                if len(is_rookie) > 1 and is_rookie[1]:
                    stats[1] = stats[-1]
                    sizes[1] = sizes[-1]
                    
                is_rookie.append(False)

        total_xwoba += wavg_combined(stats, sizes)

    return total_xwoba / 9

In [44]:
def get_starter_stat(year, pitcher, df, stat, sample_size, id_type='fangraphs_id', id_col='IDfg', year_col='Season'):
    _id = player_info_df[id_type].loc[player_info_df['retrosheet_id'] == pitcher]
    
    rookie_stat = rookie_stats_df['value'].loc[rookie_stats_df['year'] == year].loc[rookie_stats_df['stat_type'] == stat].iloc[0]
    rookie_sample_size = rookie_stats_df['avg_sample_size'].loc[rookie_stats_df['year'] == year].loc[rookie_stats_df['stat_type'] == stat].iloc[0]
    
    if len(_id) == 0: # player doesn't have past records (probably a rookie)
        return rookie_stat
    
    _id = _id.iloc[0]
    
    stats = []
    sizes = []
    is_rookie = []

    for i in range(-2, 1):
        player_data = df[[stat, sample_size]].loc[df[id_col] == _id].loc[df[year_col] == year + i]
    
        if len(player_data) == 0:
            stats.append(rookie_stat)
            sizes.append(rookie_sample_size)
            is_rookie.append(True)
        else:
            stats.append(player_data[stat].iloc[0])
            sizes.append(player_data[sample_size].iloc[0])
            
            if len(is_rookie) > 0 and is_rookie[0]:
                stats[0] = stats[-1]
                sizes[0] = sizes[-1]

            if len(is_rookie) > 1 and is_rookie[1]:
                stats[1] = stats[-1]
                sizes[1] = sizes[-1]
                
            is_rookie.append(False)
            
    return wavg_combined(stats, sizes)

In [50]:
# enter individual game data into the dictionary
def write_game_data(data, row, year):
    data['away_team'].append(rs_to_bs[row['away_team']])
    data['away_score'].append(row['away_score'])
    
    # data['away_team_xwoba_diff'].append(get_team_stat(year, rs_to_bs[row['away_team']], team_xwoba_diff_df, 'xwoba_diff'))
    # data['away_lineup_xwoba'].append(get_lineup_stat(year, [row['away_1_id'], row['away_2_id'], row['away_3_id'],
    #                                                         row['away_4_id'], row['away_5_id'], row['away_6_id'],
    #                                                         row['away_7_id'], row['away_8_id'], row['away_9_id']],
    #                                                  player_xwoba_df, 'xwoba', 'abs'))
    data['away_team_ops'].append(get_team_stat(year, rs_to_bs[row['away_team']], team_trad_df, 'ops'))
    data['away_lineup_ops'].append(get_lineup_stat(year, [row['away_1_id'], row['away_2_id'], row['away_3_id'],
                                                            row['away_4_id'], row['away_5_id'], row['away_6_id'],
                                                            row['away_7_id'], row['away_8_id'], row['away_9_id']],
                                                     player_trad_batting_df, 'OPS', 'PA',
                                                     id_type='fangraphs_id', id_col='IDfg', year_col='Season'))
    
    # data['away_team_xfip'].append(get_team_stat(year, rs_to_fg[row['away_team']], team_xfip_df, 'xFIP', team_col='Team', year_col='Season'))
    # data['away_starter_xfip'].append(get_starter_stat(year, row['away_starter_id'], player_xfip_df, 'xFIP', 'IP'))
    data['away_team_era'].append(get_team_stat(year, rs_to_bs[row['away_team']], team_trad_df, 'era'))
    data['away_starter_era'].append(get_starter_stat(year, row['away_starter_id'], player_trad_pitching_df, 'ERA', 'IP'))
    
    data['away_close_win_pct'].append(get_team_stat(year, rs_to_bs[row['away_team']], team_metrics_df, 'close_win_pct'))
    data['away_run_diff'].append(get_team_stat(year, rs_to_bs[row['away_team']], team_metrics_df, 'run_diff'))
    
    data['home_team'].append(rs_to_bs[row['home_team']])
    data['home_score'].append(row['home_score'])
    
    # data['home_team_xwoba_diff'].append(get_team_stat(year, rs_to_bs[row['home_team']], team_xwoba_diff_df, 'xwoba_diff'))
    # data['home_lineup_xwoba'].append(get_lineup_stat(year, [row['home_1_id'], row['home_2_id'], row['home_3_id'],
    #                                                         row['home_4_id'], row['home_5_id'], row['home_6_id'],
    #                                                         row['home_7_id'], row['home_8_id'], row['home_9_id']],
    #                                                  player_xwoba_df, 'xwoba', 'abs'))
    data['home_team_ops'].append(get_team_stat(year, rs_to_bs[row['home_team']], team_trad_df, 'ops'))
    data['home_lineup_ops'].append(get_lineup_stat(year, [row['home_1_id'], row['home_2_id'], row['home_3_id'],
                                                            row['home_4_id'], row['home_5_id'], row['home_6_id'],
                                                            row['home_7_id'], row['home_8_id'], row['home_9_id']],
                                                     player_trad_batting_df, 'OPS', 'PA',
                                                     id_type='fangraphs_id', id_col='IDfg', year_col='Season'))
    
    # data['home_team_xfip'].append(get_team_stat(year, rs_to_fg[row['home_team']], team_xfip_df, 'xFIP', team_col='Team', year_col='Season'))
    # data['home_starter_xfip'].append(get_starter_stat(year, row['home_starter_id'], player_xfip_df, 'xFIP', 'IP'))
    data['home_team_era'].append(get_team_stat(year, rs_to_bs[row['home_team']], team_trad_df, 'era'))
    data['home_starter_era'].append(get_starter_stat(year, row['home_starter_id'], player_trad_pitching_df, 'ERA', 'IP'))
    
    data['home_close_win_pct'].append(get_team_stat(year, rs_to_bs[row['home_team']], team_metrics_df, 'close_win_pct'))
    data['home_run_diff'].append(get_team_stat(year, rs_to_bs[row['home_team']], team_metrics_df, 'run_diff'))

#### Compile all game data

Using gamelogs from 2017 to 2023, compile a database of game data with appropriate columns that can be used to make predictions.

In [51]:
train_data = {
    'away_team': [],
    'away_score': [],
    
    # 'away_team_xwoba_diff': [],
    # 'away_lineup_xwoba': [],
    'away_team_ops': [],
    'away_lineup_ops': [],
    
    # 'away_team_xfip': [],
    # 'away_starter_xfip': [],
    'away_team_era': [],
    'away_starter_era': [],
    
    'away_close_win_pct': [],
    'away_run_diff': [],
    
    'home_team': [],
    'home_score': [],
    
    # 'home_team_xwoba_diff': [],
    # 'home_lineup_xwoba': [],
    'home_team_ops': [],
    'home_lineup_ops': [],
    
    # 'home_team_xfip': [],
    # 'home_starter_xfip': [],
    'home_team_era': [],
    'home_starter_era': [],
    
    'home_close_win_pct': [],
    'home_run_diff': []
}

In [52]:
for year in range(2017, 2024):
    gamelog_df = pd.read_csv(f'./data/baseball/gamelogs/gl{year}.txt')
    
    for index, row in gamelog_df.iterrows():
        write_game_data(train_data, row, year)

In [54]:
version = 7

game_data_df = pd.DataFrame(train_data)
game_data_df.to_csv(f'./data/baseball/training/game_data_v{version}.csv', index=False)

## Bonus: using traditional stats

In [5]:
team_stats = {
    'team': [],
    'year': [],
    'ops': [],
    'era': []
}

In [6]:
team_pitching_df = team_pitching(2015, 2023, ind=1)

for index, row in team_pitching_df.sort_values(['Season', 'Team']).iterrows():
    team_stats['team'].append(fg_to_bs[row['Team']])
    team_stats['year'].append(row['Season'])
    team_stats['ops'].append(0)
    team_stats['era'].append(row['ERA'])

team_batting_df = team_batting(2015, 2023, ind=1)
count = 0

for index, row in team_batting_df.sort_values(['Season', 'Team']).iterrows():
    team_stats['ops'][count] = row['OPS']
    count += 1

In [7]:
team_stats_df = pd.DataFrame(team_stats)
team_stats_df.to_csv('./data/baseball/stats/team_trad.csv', index=False)

In [34]:
batting_stats_df = batting_stats(2015, 2023, qual=50, ind=1)[['IDfg', 'Season', 'Name', 'OPS', 'PA']]
batting_stats_df.to_csv('./data/baseball/stats/player_trad_batting.csv', index=False)

In [35]:
pitching_stats_df = pitching_stats(2015, 2023, qual=20, ind=1)[['IDfg', 'Season', 'Name', 'ERA', 'IP']]
pitching_stats_df.to_csv('./data/baseball/stats/player_trad_pitching.csv', index=False)