In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import urllib
import os
import unidecode
import unicodedata
import re
import datetime
from datetime import date
pd.options.mode.chained_assignment = None  # default='warn'


baseball_path = r"C:\Users\james\OneDrive\Documents\MLB\Data"

In [2]:
todaysdate = date.today()
todaysdate_dash = str(todaysdate)
todaysdate = todaysdate_dash.replace("-", "")
todaysdate

'20221014'

In [3]:
# This reads in a map of team name, codes, and the number Fangraphs uses in their URLs
team_map = pd.read_csv(os.path.join(baseball_path, "Utilities", "Team Map.csv"))

In [4]:
def f_remove_accents(old):
    new = re.sub(r'[àáâãäå]', 'a', old)
    new = re.sub(r'[èéêë]', 'e', new)
    new = re.sub(r'[ìíîï]', 'i', new)
    new = re.sub(r'[òóôõö]', 'o', new)
    new = re.sub(r'[ùúûü]', 'u', new)
    new = re.sub(r'[ñ]', 'n', new)
    return new

# Clean names for consistency
def name_clean(df):    
    df['name_first'] = df['name_first'].str.replace(" ", "")
    df['name_last'] = df['name_last'].str.replace(" ", "")
    
    df['Name_Chadwick'] = df['name_first'] + " " + df['name_last']
    
    try: 
        df.rename(columns={'Name_x':'Name'}, inplace=True)
        df.drop(columns={'Name_y'}, inplace=True)
        
    except:
        pass
    
    df['Name'] = np.where(df['Name_Chadwick'].isna(), df['Name'], df['Name_Chadwick'])
    df['Name2'] = df.apply(lambda x: f_remove_accents(x['Name']), axis=1)  # remove accents
    df['Name2'] = df['Name2'].str.replace('[^a-zA-Z0-9 ]', '')
    
    df['Name2'] = np.where(df['Name2'] == "Josh HSmith", "Josh Smith", df['Name2'])
    
    
    df['Name2'] = df['Name2'].str.replace("III", "")
    df['Name2'] = df['Name2'].str.replace("II", "")
    df['Name2'] = df['Name2'].str.replace("Sr", "")
    df['Name2'] = df['Name2'].str.replace("Jr", "")
    
    # Separate first and last names
    df['First'] = df['Name2'].str.split(" ").str[0]
    df['First'] = df['First'].str.lower()
    df['Last'] = df['Name2'].str.split(" ").str[1]
    df['Last'] = df['Last'].str.lower()
    
    
    # Take first 2 letters of first name and first 5 of last as a sort of merge code when there's no actual merge code
    df['First2'] = df['First'].str.slice(0,2)
    df['Last5'] = df['Last'].str.slice(0,5)    
    
    return df

In [5]:
# This reads in Chadwick register with player codes.
chadwick = pd.read_csv("https://raw.githubusercontent.com/chadwickbureau/register/master/data/people.csv", low_memory=False)

# Keep only the relevant codes
chadwick = chadwick[['key_mlbam', 'key_bbref_minors', 'key_bbref', 'key_fangraphs', 'name_last', 'name_first', 'name_nick']]

# Take first 2 letters of first name and first 5 of last as a sort of merge code when there's no actual merge code
chadwick['First2'] = chadwick['name_first'].str.slice(0,2).str.lower()
chadwick['Last5'] = chadwick['name_last'].str.slice(0,5).str.lower()
chadwick['Last5'] = np.where(chadwick['Last5'] == "rodra", "rodri", chadwick['Last5'])

# Keep only those with MLBAM ID
chadwick = chadwick.dropna(subset=['key_mlbam'])

In [6]:
# Edit missing fangraphs IDs (if all else fails)
chadwick['key_fangraphs'] = np.where(chadwick['key_mlbam'] == 668881, 22182, chadwick['key_fangraphs']) # Hunter Greene
chadwick['key_fangraphs'] = np.where(chadwick['key_mlbam'] == 665161, 21636, chadwick['key_fangraphs']) # Jeremy Pena
chadwick['key_fangraphs'] = np.where(chadwick['key_mlbam'] == 677951, 25764, chadwick['key_fangraphs']) # Bobby Witt Jr.
chadwick['key_fangraphs'] = np.where(chadwick['key_mlbam'] == 669701, 26396, chadwick['key_fangraphs']) # Josh Smith
chadwick['key_fangraphs'] = np.where(chadwick['key_mlbam'] == 687093, 26031, chadwick['key_fangraphs']) # Vaughn Grissom
# chadwick['key_fangraphs'] = np.where(chadwick['key_mlbam'] == 676551, xxxx, chadwick['key_fangraphs']) # Brewer Hicklen

In [7]:
# This creates dataframes a position group
def scrape_roster(team_num, pos):
    # All position players
    if pos == "Position":
        # Use this table in Fangraphs
        table = 16
    # Starting pitchers
    elif pos == "SP":
        table = 17 
    # Relief pitchers
    elif pos == "RP":
        table = 18
    
    url = urllib.request.urlopen(f"https://www.fangraphs.com/depthcharts.aspx?position=ALL&teamid={team_num}#RP").read()

    # Drop last row (Totals)
    df = pd.read_html(url, encoding='utf-8')[table]
    df = df[:-1]
    
    # Drop players listed as unknown
    df = df[df['Name'] != "Unknown"]
    df.index = range(len(df))

    # Read in table
    soup = BeautifulSoup(url)
    soup = soup.find_all('table')[table]

    # Witchcraft to pull out links on player names
    link_list = []
    for line in soup.find_all('a'):
        link_list.append(line.get('href'))
    url_df = pd.DataFrame(link_list, columns=["URLs"])

    df = df.join(url_df)
    df['key_fangraphs'] = df['URLs'].str.split('=').str[1]
    df['key_fangraphs'] = df['key_fangraphs'].str.split('&').str[0]
    
    # Create Position column
    df['Position'] = pos
    
    # Keep relevant variables
    df = df[['Name', 'Position', 'key_fangraphs']]
    
    return df

In [8]:
df = scrape_roster(11, "RP")
df

Unnamed: 0,Name,Position,key_fangraphs
0,Paul Sewald,RP,13892
1,Andrés Muñoz,RP,20373
2,Erik Swanson,RP,16587
3,Matt Brash,RP,25756
4,Diego Castillo,RP,17496
5,Matthew Boyd,RP,15440
6,Matthew Festa,RP,19330
7,Penn Murfee,RP,25174
8,George Kirby,RP,25436
9,Chris Flexen,RP,13896


In [9]:
# This creates the batter dataframe
def scrape_all(team_num):
    # Scrape batters
    b = scrape_roster(team_num, "Position") 
    # Scrape relief pitchers
    rp = scrape_roster(team_num, "RP")
    # Scrape starters
    sp = scrape_roster(team_num, "SP")
    

    return b, rp, sp

In [10]:
all_rosters = []

for i in range(len(team_map)):
    print(team_map['BBREFTEAM'][i])
    # Create batter using Fangraphs code to scrape
    batters, relievers, starters = scrape_all(team_map['FANGRAPHSNUM'][i])
    # Create team abbreviation code, which is the code Baseball Reference uses
    batters['TeamAbbrev'] = team_map['BBREFTEAM'][i]
    relievers['TeamAbbrev'] = team_map['BBREFTEAM'][i]
    starters['TeamAbbrev'] = team_map['BBREFTEAM'][i]
    all_rosters.append(batters)
    all_rosters.append(relievers)
    all_rosters.append(starters)

all_rosters_df = pd.concat(all_rosters)
all_rosters_df

ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
HOU
KCR
LAA
LAD
MIA
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBR
TEX
TOR
WSN


Unnamed: 0,Name,Position,key_fangraphs,TeamAbbrev
0,Daulton Varsho,Position,19918,ARI
1,Corbin Carroll,Position,25878,ARI
2,Christian Walker,Position,13419,ARI
3,Josh Rojas,Position,19734,ARI
4,Carson Kelly,Position,13620,ARI
...,...,...,...,...
3,Erick Fedde,SP,17425,WSN
4,Cory Abbott,SP,20277,WSN
5,Paolo Espino,SP,8246,WSN
6,Tommy Romero,SP,23465,WSN


In [12]:
all_rosters_df[['name_first', 'name_last']] = all_rosters_df['Name'].str.split(" ", n=1, expand=True)
all_rosters_df = name_clean(all_rosters_df)
all_rosters_df['key_fangraphs'] = all_rosters_df['key_fangraphs'].str.replace("sa", "")
all_rosters_df['key_fangraphs'].fillna(0, inplace=True) # this isn't a great solution. Patch added in playoffs
all_rosters_df['key_fangraphs'] = all_rosters_df['key_fangraphs'].astype('int')

  df['Name2'] = df['Name2'].str.replace('[^a-zA-Z0-9 ]', '')


In [13]:
all_rosters_df.query('key_fangraphs == 21132')

Unnamed: 0,Name,Position,key_fangraphs,TeamAbbrev,name_first,name_last,Name_Chadwick,Name2,First,Last,First2,Last5
1,Alexis Díaz,RP,21132,CIN,Alexis,Díaz,Alexis Díaz,Alexis Diaz,alexis,diaz,al,diaz


In [14]:
all_rosters_df.query('key_fangraphs == 26396')

Unnamed: 0,Name,Position,key_fangraphs,TeamAbbrev,name_first,name_last,Name_Chadwick,Name2,First,Last,First2,Last5
7,Josh H.Smith,Position,26396,TEX,Josh,H.Smith,Josh H.Smith,Josh Smith,josh,smith,jo,smith


In [15]:
# Merge with the Chadwick register
# First try fangraphs key

all_rosters_df_fixed = all_rosters_df.merge(chadwick, on='key_fangraphs', how='left', suffixes=("", "_cw"))
all_rosters_df_fixed.drop(all_rosters_df_fixed.columns[all_rosters_df_fixed.columns.str.contains('_cw')], axis=1, inplace=True)

# Then try First Last5
all_rosters_df_fixed = all_rosters_df_fixed.merge(chadwick, on=['First2', 'Last5'], how='left', suffixes=("", "_cw"))
all_rosters_df_fixed['key_fangraphs'].fillna(all_rosters_df_fixed['key_fangraphs_cw'], inplace=True)
all_rosters_df_fixed['key_mlbam'].fillna(all_rosters_df_fixed['key_mlbam_cw'],inplace=True)

# If multiple players have same First2 Last5, this will keep the one where the fangraphs keys match
all_rosters_df_fixed['key_fangraphs_cw'].fillna(all_rosters_df_fixed['key_fangraphs'], inplace=True)
all_rosters_df_fixed = all_rosters_df_fixed[all_rosters_df_fixed['key_fangraphs'] == all_rosters_df_fixed['key_fangraphs_cw']]
all_rosters_df_fixed.drop_duplicates(subset='key_fangraphs', inplace=True)


all_rosters_df_fixed.drop(all_rosters_df_fixed.columns[all_rosters_df_fixed.columns.str.contains('_cw')], axis=1, inplace=True)

In [16]:
# Add daily rosters
filename = "Rosters_" + todaysdate + ".csv"
all_rosters_df.to_csv(os.path.join(baseball_path, "Rosters", "Raw", filename))

# Load in player stats
batter_df = pd.read_csv(os.path.join(baseball_path, "Model Input", "Batters.csv"))
pitcher_df = pd.read_csv(os.path.join(baseball_path, "Model Input", "Pitchers.csv"))

# Idk why there are duplicates. Investigate (probably already fixed)
batter_df.drop_duplicates(subset='batter', inplace=True)
pitcher_df.drop_duplicates(subset='pitcher', inplace=True)

batter_df.query('batter == 669701')

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,batter,batterName,batSide_l,so_b_l,b1_b_l,b2_b_l,b3_b_l,hr_b_l,...,hbp_r,hr_r,lo_r,po_r,so_r,woba_r,obp_r,slg_r,batter_fg2,imp
404,404,2206,669701,Josh H. Smith,L,0.121951,0.04878,0.0,0.0,0.02439,...,0.028505,0.009629,0.064451,0.044278,0.204136,0.275115,0.311217,0.250122,,0


In [17]:
batter_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,batter,batterName,batSide_l,so_b_l,b1_b_l,b2_b_l,b3_b_l,hr_b_l,...,hbp_r,hr_r,lo_r,po_r,so_r,woba_r,obp_r,slg_r,batter_fg2,imp
0,0,1608,543105,Alex Dickerson,L,0.200000,0.160000,0.030000,0.010000,0.020000,...,0.001943,0.025967,0.063389,0.076798,0.241912,0.199610,0.199274,0.221504,,0
1,1,1679,664119,Austin Allen,L,0.492295,0.077923,0.006431,0.000379,0.001947,...,0.056637,0.003833,0.023021,0.023453,0.526056,0.169683,0.197702,0.148216,,0
2,2,1681,656252,Bobby Bradley,L,0.404255,0.042553,0.053191,0.000000,0.053191,...,0.001128,0.002755,0.044829,0.019196,0.478513,0.128506,0.137398,0.145470,,0
3,3,1683,546990,Anthony Alford,R,0.423913,0.065217,0.010870,0.000000,0.032609,...,0.001103,0.003447,0.044052,0.014686,0.385524,0.199973,0.218277,0.225967,,0
4,4,1685,429664,Robinson Cano,L,0.120000,0.120000,0.050000,0.000000,0.060000,...,0.001819,0.009546,0.074085,0.057121,0.238253,0.177052,0.186102,0.199043,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,658,2330,661531,Brian Serven,R,0.212766,0.148936,0.042553,0.000000,0.021277,...,0.008339,0.025793,0.062763,0.062401,0.215658,0.252027,0.249902,0.302216,,0
677,664,2335,678662,Ezequiel Tovar,R,0.214509,0.149109,0.027963,0.001001,0.004688,...,0.001367,0.005228,0.069221,0.035094,0.244819,0.228323,0.247102,0.249016,,1
678,671,2342,656248,Sean Bouchard,R,0.242837,0.126206,0.039959,0.001068,0.016862,...,0.001658,0.028448,0.042444,0.023506,0.250431,0.371903,0.401767,0.346832,,0
679,674,2345,660707,Elehuris Montero,R,0.277778,0.097222,0.111111,0.000000,0.041667,...,0.002815,0.030683,0.051413,0.053826,0.314514,0.292941,0.261651,0.401588,,0


In [18]:
all_batters = all_rosters_df_fixed.query('Position == "Position"')

# Merge on MLB ID
all_batters1 = all_batters.merge(batter_df, left_on='key_mlbam', right_on='batter', how='inner', suffixes=("", "_1"))
# Merge on First2, Last5
all_batters2 = all_batters.merge(batter_df, on=['First2', 'Last5'], how='inner', suffixes=("", "_2"))

# List of batters who merged
batter_merged1_list = all_batters1['Name'].unique()
batter_merged2_list = all_batters2['Name'].unique()

# Keep only those in the second group who didn't merge in the first group
all_batters2 = all_batters2[~all_batters2['Name'].isin(batter_merged1_list)]

# Append them together
all_batters = pd.concat([all_batters1, all_batters2], axis=0)

In [19]:
all_batters.query("Name == 'Josh Smith'")

Unnamed: 0,Name,Position,key_fangraphs,TeamAbbrev,name_first,name_last,Name_Chadwick,Name2,First,Last,...,so_r,woba_r,obp_r,slg_r,batter_fg2,imp,key_mlbam_2,key_fangraphs_2,name_first_2,name_last_2


In [20]:
all_pitchers = all_rosters_df_fixed[(all_rosters_df_fixed['Position'] != "Position") | (all_rosters_df_fixed['Name'] == "Shohei Ohtani")]
all_pitchers['Position'] = np.where(all_pitchers['Name'] == "Shohei Ohtani", "SP", all_pitchers['Position'])

# Merge on MLB ID
all_pitchers1 = all_pitchers.merge(pitcher_df, left_on='key_mlbam', right_on='pitcher', how='inner', suffixes=("", "_1"))
# Merge on First2, Last5
all_pitchers2 = all_pitchers.merge(pitcher_df, on=['First2', 'Last5'], how='inner', suffixes=("", "_2"))

# List of batters who merged
pitcher_merged1_list = all_pitchers1['Name'].unique()
pitcher_merged2_list = all_pitchers2['Name'].unique()

# Keep only those in the second group who didn't merge in the first group
all_pitchers2 = all_pitchers2[~all_pitchers2['Name'].isin(pitcher_merged1_list)]

# Append them together
all_pitchers = pd.concat([all_pitchers1, all_pitchers2], axis=0)
    
all_pitchers.query('Name == "Shohei Ohtani"')

Unnamed: 0,Name,Position,key_fangraphs,TeamAbbrev,name_first,name_last,Name_Chadwick,Name2,First,Last,...,so_r,woba_r,obp_r,slg_r,pitcher_fg2,imp,key_mlbam_2,key_fangraphs_2,name_first_2,name_last_2
200,Shohei Ohtani,SP,19755,LAA,Shohei,Ohtani,Shohei Ohtani,Shohei Ohtani,shohei,ohtani,...,0.3442,0.245068,0.256977,0.268265,,0,,,,


In [21]:
# Select columns to keep and order them
# Later: drop redundancies (key_mlbam/batter)
batter_keep = ['Position', 'Name', 'name_first', 'name_last', 'Name_Chadwick', 'Name2', 'First', 'Last', 'First2', 'Last5', 'name_nick', 'batterName', 
               'key_fangraphs', 'key_mlbam', 'key_bbref_minors', 'key_bbref', 'batter',
               'TeamAbbrev', 'imp',
               'batSide_l', 'batSide_r',
               'so_b_l', 'b1_b_l', 'b2_b_l', 'b3_b_l', 'hr_b_l', 'bb_b_l', 'hbp_b_l', 'lo_b_l', 'po_b_l', 'go_b_l', 'fo_b_l', 'pa_b_l', 'ab_b_l',
               'woba_b_l', 'slg_b_l', 'obp_b_l',
               'so_b_long_l', 'b1_b_long_l', 'b2_b_long_l', 'b3_b_long_l', 'hr_b_long_l', 'bb_b_long_l', 'hbp_b_long_l', 'lo_b_long_l', 'po_b_long_l',
               'go_b_long_l', 'fo_b_long_l', 'pa_b_long_l', 'ab_b_long_l',
               'woba_b_long_l', 'slg_b_long_l', 'obp_b_long_l',
               'so_b_r', 'b1_b_r', 'b2_b_r', 'b3_b_r', 'hr_b_r', 'bb_b_r', 'hbp_b_r', 'lo_b_r', 'po_b_r', 'go_b_r', 'fo_b_r', 'pa_b_r', 'ab_b_r',
               'woba_b_r', 'slg_b_r', 'obp_b_r', 
               'so_b_long_r', 'b1_b_long_r', 'b2_b_long_r', 'b3_b_long_r', 'hr_b_long_r', 'bb_b_long_r', 'hbp_b_long_r', 'lo_b_long_r', 'po_b_long_r',
               'go_b_long_r', 'fo_b_long_r', 'pa_b_long_r', 'ab_b_long_r',
               'woba_b_long_r', 'slg_b_long_r', 'obp_b_long_r',
               'sba_2b', 'sba_3b', 'sb_2b', 'sb_3b',
               'b1_l', 'b2_l', 'b3_l', 'bb_l', 'fo_l', 'go_l', 'hbp_l', 'hr_l', 'lo_l', 'po_l', 'so_l',
               'woba_l', 'obp_l', 'slg_l',
               'b1_r', 'b2_r', 'b3_r', 'bb_r', 'fo_r', 'go_r', 'hbp_r', 'hr_r', 'lo_r', 'po_r', 'so_r',
               'woba_r', 'obp_r', 'slg_r']

pitcher_keep = ['Position', 'Name', 'name_first', 'name_last', 'Name_Chadwick', 'Name2', 'First', 'Last', 'First2', 'Last5', 'name_nick', 'pitcherName', 
               'key_fangraphs', 'key_mlbam', 'key_bbref_minors', 'key_bbref', 'pitcher',         
               'TeamAbbrev', 'imp',
               'pitchHand_l', 'pitchHand_r', 
               'so_p_l', 'b1_p_l', 'b2_p_l', 'b3_p_l', 'hr_p_l', 'bb_p_l', 'hbp_p_l', 'lo_p_l', 'po_p_l', 'go_p_l', 'fo_p_l', 'pa_p_l', 'ab_p_l',
               'woba_p_l', 'slg_p_l', 'obp_p_l',
               'so_p_long_l', 'b1_p_long_l', 'b2_p_long_l', 'b3_p_long_l', 'hr_p_long_l', 'bb_p_long_l', 'hbp_p_long_l', 'lo_p_long_l', 'po_p_long_l',
               'go_p_long_l', 'fo_p_long_l', 'pa_p_long_l', 'ab_p_long_l',
               'woba_p_long_l', 'slg_p_long_l', 'obp_p_long_l',
               'so_p_r', 'b1_p_r', 'b2_p_r', 'b3_p_r', 'hr_p_r', 'bb_p_r', 'hbp_p_r', 'lo_p_r', 'po_p_r', 'go_p_r', 'fo_p_r', 'pa_p_r', 'ab_p_r',
               'woba_p_r', 'slg_p_r', 'obp_p_r',
               'so_p_long_r', 'b1_p_long_r', 'b2_p_long_r', 'b3_p_long_r', 'hr_p_long_r', 'bb_p_long_r', 'hbp_p_long_r', 'lo_p_long_r', 'po_p_long_r',
               'go_p_long_r', 'fo_p_long_r', 'pa_p_long_r', 'ab_p_long_r',
               'woba_p_long_r', 'slg_p_long_r', 'obp_p_long_r',
               'b1_l', 'b2_l', 'b3_l', 'bb_l', 'fo_l', 'go_l', 'hbp_l', 'hr_l', 'lo_l', 'po_l', 'so_l',
               'woba_l', 'obp_l', 'slg_l',
               'b1_r', 'b2_r', 'b3_r', 'bb_r', 'fo_r', 'go_r', 'hbp_r', 'hr_r', 'lo_r', 'po_r', 'so_r',
               'woba_r', 'obp_r', 'slg_r',
               'outs', 'avgFaced', 'starter', 'Leverage']

In [22]:
all_players = pd.concat([all_batters, all_pitchers], axis=0)

In [23]:
for i in range(len(team_map)):
    team_name = team_map['BBREFTEAM'][i]
    print(team_name)
    b = all_players[(all_players['TeamAbbrev'] == team_name) & (all_players['Position'] == 'Position')].reset_index()
    
    # Cut variables 
    try:
        b.drop(columns={'Unnamed: 0', 'pitcher', 'avgouts', 'starter_sample', 'p_throws'}, axis=1, inplace=True)
    except:
        pass
    
    try:
        b.drop(columns={'index', 'level_0'}, axis=1, inplace=True)
    except:
        pass
   
    
    sp = all_players[(all_players['TeamAbbrev'] == team_name) & (all_players['Position'] == 'SP')].reset_index()
    sp['Leverage'] = 0

    rp = all_players[(all_players['TeamAbbrev'] == team_name) & (all_players['Position'] == 'RP')].reset_index()
    rp['Leverage'] = 0
    for i in range(len(rp)):
        if i == 0:
            # The top guy should be the closer
            rp['Leverage'][i] = 4
        elif i < 6:
            # Then the next five are set up/high leverage
            rp['Leverage'][i] = 3
        elif i < 11:
            # Then low leverage
            rp['Leverage'][i] = 2
            
    p = pd.concat([sp,rp], axis=0)
        
    # May be in as SP and RP. Keep SP in case they need to start
    try:
        p.drop_duplicates(subset='pitcher', keep='last', inplace=True)
    except:
        pass    
    
    # Cut variables
    try:
        p.drop(columns={'Unnamed: 0', 'batter', 'sba_2b', 'sba_3b', 'sb_2b', 'sb_3b', 'stand'}, axis=1, inplace=True)
    except:
        pass
    try:
        p.drop(columns={'index', 'level_0'}, axis=1, inplace=True)
    except:
        pass
    
    # Save them to their own sheet
    file_name = team_name + ".xlsx"
    
    file_name = os.path.join(baseball_path, "Rosters", file_name)
    
    b.drop_duplicates(subset=['batter', 'Last'],inplace=True)
    p.drop_duplicates(subset=['pitcher', 'Last'],inplace=True)
    
    # This mostly fixes Michael Harris getting Mitch Harris. It's reasonable to keep the most recent, but this isn't a good fix
    b.drop_duplicates(subset=['Name'],inplace=True)
    p.drop_duplicates(subset=['Name'],inplace=True)
    
    b['batSide_l'].fillna("R", inplace=True)
    p['pitchHand_l'].fillna("R", inplace=True)
    
    b = b[batter_keep]
    p = p[pitcher_keep]
    b.to_excel(file_name, sheet_name="Batters", engine='openpyxl')
    
    with pd.ExcelWriter(file_name, mode='a', engine='openpyxl') as writer:  
        p.to_excel(writer, sheet_name='Pitchers')
        
print("Done")

ARI
ATL
BAL
BOS
CHC
CHW
CIN
CLE
COL
DET
HOU
KCR
LAA
LAD
MIA
MIL
MIN
NYM
NYY
OAK
PHI
PIT
SDP
SEA
SFG
STL
TBR
TEX
TOR
WSN
Done


In [24]:
print("Code was last run on: " + str(datetime.date.today()))

Code was last run on: 2022-10-14
