# 09. Matchups
Source: <br>
1. #02. Contests/C.Salaries (from DK API)
2. #07. Stats <br>

Description: This uses salaries and stats to create matchup Excel files as simulation input <br>

### Lineups 

In [20]:
# Scrape team lineups
def scrape_lineups(team_map, date=todaysdate_dash, early=False):
    # Read in daily lineups from Baseball Monster
    url = f"https://baseballmonster.com/Lineups.aspx?csv=1&d={date}"
    lineups = pd.read_csv(url)

    # Merge with team map to get proper team code
    lineups = lineups.merge(team_map, left_on='team code', right_on='SFBBTEAM', how='inner') 

    # Fill missings
    lineups[' mlb id'] = np.where(lineups[' player name'] == "Masataka Yoshida", 807799, lineups[' mlb id'])

    # Check missings
    lineups[' mlb id'].fillna(999999, inplace=True)
    for i in range(len(lineups)):
        if lineups[' mlb id'][i] == 999999:
            print("Missing: ", lineups[' player name'][i])

    # Determine if it's the first or second game of a double header
    lineups['count'] = lineups.groupby('BBREFTEAM').cumcount()+1
    lineups['game'] = np.where(lineups['count'] <= 10, 1, 2)
    lineups['games'] = lineups.groupby('BBREFTEAM')['game'].transform('max')
    
    # If it's early, keep the first game
    if early == True:
        lineups = lineups[lineups['game'] == 1]
    # If it's not, keep the last game
    else:
        lineups = lineups[lineups['game'] == lineups['games']]

    projected = lineups[lineups[' confirmed'] == "N"]
    projected.drop_duplicates('team code', inplace=True)
    projected = projected[['team code']]
    projected = list(projected['team code'])
    
    
    # Keep relevant variables
    lineups = lineups[[' mlb id', ' batting order', 'BBREFTEAM']]
    lineups.rename(columns={' mlb id': 'key_mlbam', ' batting order':'batting_order_fill'}, inplace=True)
    
    lineups.to_csv(os.path.join(baseball_path, "9. Matchups", "A. Lineups", "Lineups " + todaysdate_dash + ".csv"))    
    

    return lineups, projected

### DK Salaries

In [22]:
# This reads in saves salary files and keeps relevant variables. It also creates a list of games.
def clean_dk_salaries(team_map, contestKey):
    # Read in DK Salaries
    dk_salaries = pd.read_csv(os.path.join(baseball_path, "2. Contests", "C. Salaries", "Salaries " + str(contestKey) + ".csv"), encoding='iso-8859-1')
    
    # Drop if missing game info
    dk_salaries = dk_salaries[~dk_salaries['Game Info'].isna()]
    
    # Give Shohei Ohtani a special ID
    dk_salaries['ID'] = np.where(dk_salaries['Name'] == "Shohei Ohtani", 134045, dk_salaries['ID'])
    
    # Clean game info
    # For scraped, we already fix the names - make this better universally
    dk_salaries['Game Info'] = dk_salaries['Game Info'].replace({"CWS":"CHW", "KC": "KCR", "SD": "SDP", "SF":"SFG", "TB":"TBR", "WAS":"WSN", "@": "_", ":": "", "/": ""}, regex=True)
    dk_salaries['Game Info'] = dk_salaries['Game Info'].replace({"SDPP": "SDP", 'TBRR':'TBR', 'KCRR':'KCR'}, regex=True)
    dk_salaries['Game Info'] = dk_salaries['Game Info'].replace({"@": "_", ":": "", "/": ""}, regex=True)
    # Merge with team map to get baseball reference names
    dk_salaries = dk_salaries.merge(team_map, left_on='TeamAbbrev', right_on='DKTEAM', how='left')

    # Convert to Baseball Reference team code
    dk_salaries['TeamAbbrev'] = dk_salaries['BBREFTEAM']

    # Clean names
    dk_salaries = name_clean(dk_salaries)

    # This is all we need to merge. 
    dk_salaries_cut = dk_salaries[['Name', 'First2', 'Last5', 'BBREFTEAM', 'Salary', 'ID']]


    # We also want a separate game info df with teams, time, who's home, date
    game_info = dk_salaries[['Game Info']]

    # This is the list of all matchups
    matchups = game_info['Game Info'].unique()
    matchups = matchups.tolist()
    
    print(matchups)

    # This could be cleaned up
    try:
        matchups.remove('Postponed')
    except:
        pass
    try:
        matchups.remove('Cancelled')
    except:
        pass
    
    # Define a custom sorting key function that extracts the numbers from characters 17-20 in each string (the game time)
    # Note: AM games can mess this up a bit
    def sort_key(string):
        return int(string[17:21])
    
    try:
        # Sort matchups on game time
        matchups.sort(key=sort_key)
    except:
        pass
    
    return dk_salaries_cut, matchups

In [23]:
def fill_ins(df):
    # Take first two characters of first name
    df['First2_fill'] = df['firstName'].str.slice(0,2)
    # And first 5 characters of last name
    df['Last5_fill'] = df['lastName'].str.slice(0,5)

    # Make lower case
    df['First2_fill'] = df['First2_fill'].str.lower()
    df['Last5_fill'] = df['Last5_fill'].str.lower()

    # Make string (this makes the f_remove_accents function work properly
    df['First2_fill'] = df['First2_fill'].astype(str) # this one is necessary
    df['Last5_fill'] = df['Last5_fill'].astype(str) # this one is not

    # Remove accents
    df['First2_fill'] = df.apply(lambda x: remove_accents(x['First2_fill']), axis=1)  # remove accents
    df['Last5_fill'] = df.apply(lambda x: remove_accents(x['Last5_fill']), axis=1)  # remove accents

    # Remove abnormal characters
    df['First2_fill'] = df['First2_fill'].str.replace('[^a-zA-Z0-9 ]', '')
    df['Last5_fill'] = df['Last5_fill'].str.replace('[^a-zA-Z0-9 ]', '')
    
    df['First2'].fillna(df['First2_fill'],inplace=True)
    df['Last5'].fillna(df['Last5_fill'],inplace=True)
    
    try:
        df['outs'].fillna(9, inplace=True)
        df['avgFaced'].fillna(15, inplace=True)
        df['starter_api'].fillna(1, inplace=True)
    except:
        pass
    
    df.drop(columns=['First2_fill', 'Last5_fill'], inplace=True)
    
    return df

### Matchup

In [24]:
def clean_matchups(df):
    # Add RP leverage to starting pitcher leverage (1 if starter)
    df['Leverage'] = np.where(df['starter'] == 1, 1, df['Leverage'])
    
    # Determine batting order
    df['batting_order'] = np.nan
    for i in range(9):
        df['batting_order'] = np.where(df['order'] == (i+1)*100, i+1, df['batting_order'])
    
    # Imputed flag
    try:
        df['imp_b_r'] = np.where(df['pa_b_long_r'] < 40, 1, 0)
        df['imp_b_l'] = np.where(df['pa_b_long_l'] < 40, 1, 0)
    except:
        df['imp_p_r'] = np.where(df['pa_p_long_r'] < 40, 1, 0)
        df['imp_p_l'] = np.where(df['pa_p_long_l'] < 40, 1, 0)
        
    # Delete unnamed columns
    df = df.loc[:,~df.columns.str.startswith('Unnamed')]
    
    return df

In [25]:
def clean_leverage(df, team="Away"):
    # Check if 'Leverage' column contains any 1
    if 1 not in df['Leverage'].values:
        print("{} starting pitcher was imputed.".format(team))
        # Find the index of the first occurrence of 2 in 'Leverage' column
        index_of_2 = df['Leverage'].eq(2).idxmax()

        # Change the value of the first occurrence of 2 to 1 in 'Leverage' column
        df.loc[index_of_2, 'Leverage'] = 1

        # Change the value of 'starter' column to 1 for the same row
        df.loc[index_of_2, 'starter'] = 1
        
    return df

In [26]:
def clean_order(df, team="Away"):
    df = df.copy()  # Create a copy of the input DataFrame to avoid modifying the original

    # Find duplicate values in batting_order column
    duplicated_numbers = list(df[df['batting_order'].duplicated(keep=False)]['batting_order'].unique())

    duplicated_numbers = [x for x in duplicated_numbers if not np.isnan(x)]
    
    for number in duplicated_numbers:
        print(f"{team} batter {number} was duplicated.")
        
    df.sort_values(['batting_order', 'Salary'], ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)
    for i in range(len(df)):
        try:
            if df['batting_order'][i] == df['batting_order'][i-1]:
                df['batting_order'][i] = np.nan
        except:
            pass
        
    # Find missing numbers in batting_order column
    missing_numbers = set(range(1, 10)) - set(df['batting_order'])

    # Find the row with the lowest Salary for each missing number
    for number in missing_numbers:
        lowest_salary_index = df[df['batting_order'].isna()].sort_values('Salary').index[0]
        df.loc[lowest_salary_index, 'batting_order'] = number
        print(f"{team} number {number} was filled in.")

    return df


In [27]:
def create_matchups(team_map, contestKey, date=todaysdate, backtest=False, early=False):        
    # Read in clean DK Salaries data
    dk_salaries_cut, matchups = clean_dk_salaries(team_map, contestKey)
    
    # Scrape lineups 
    date_dash = date[:4] + "-" + date[4:6] + "-" + date[6:]
    print(date_dash)
    lineups, projected = scrape_lineups(team_map, date=date_dash, early=early)
    
    daily_folder = "Daily" + date
    
    for matchup in matchups:
        print(matchup)
        # Create new folder with daily rosters
        matchup_folder = "Matchups" + str(contestKey)
        try:
            os.mkdir(os.path.join(baseball_path, "9. Matchups", "B. Matchups", matchup_folder))
        except:
            pass
        
        away = matchup[0:3]
        home = matchup[4:7]
        
        away_file = away + date + ".xlsx"
        home_file = home + date + ".xlsx"
        
        # Read in away data
        away_batters = pd.read_excel(os.path.join(baseball_path, "7. Stats", "C. Teams", daily_folder, away_file), sheet_name='Batters')
        away_pitchers = pd.read_excel(os.path.join(baseball_path, "7. Stats", "C. Teams", daily_folder, away_file), sheet_name='Pitchers')
        
        # Read in home data
        home_batters = pd.read_excel(os.path.join(baseball_path, "7. Stats", "C. Teams", daily_folder, home_file), sheet_name='Batters')
        home_pitchers = pd.read_excel(os.path.join(baseball_path, "7. Stats", "C. Teams", daily_folder, home_file), sheet_name='Pitchers')
            
        
#         # Fill in missings
#         away_batters = fill_ins(away_batters)
#         away_pitchers = fill_ins(away_pitchers)
#         home_batters = fill_ins(home_batters)
#         home_pitchers = fill_ins(home_pitchers)

        # Create name variable so Sims reads in right objects
        away_batters['Name'] = away_batters['fullName']
        away_pitchers['Name'] = away_pitchers['fullName']
        home_batters['Name'] = home_batters['fullName']
        home_pitchers['Name'] = home_pitchers['fullName']
        
#         # Merge with DK Salaries
#         away_batters = away_batters.merge(dk_salaries_cut, on=['First2', 'Last5', 'BBREFTEAM'], how='left')
#         away_pitchers = away_pitchers.merge(dk_salaries_cut, on=['First2', 'Last5', 'BBREFTEAM'], how='left')
        
#         home_batters = home_batters.merge(dk_salaries_cut, on=['First2', 'Last5', 'BBREFTEAM'], how='left')
#         home_pitchers = home_pitchers.merge(dk_salaries_cut, on=['First2', 'Last5', 'BBREFTEAM'], how='left')
        
        # Merge with DK Salaries
        away_batters = away_batters.merge(dk_salaries_cut, on=['Name', 'BBREFTEAM'], how='left')
        away_pitchers = away_pitchers.merge(dk_salaries_cut, on=['Name', 'BBREFTEAM'], how='left')
        
        home_batters = home_batters.merge(dk_salaries_cut, on=['Name', 'BBREFTEAM'], how='left')
        home_pitchers = home_pitchers.merge(dk_salaries_cut, on=['Name', 'BBREFTEAM'], how='left')
             
        # Clean
        away_batters = clean_matchups(away_batters)
        away_pitchers = clean_matchups(away_pitchers)
        
        home_batters = clean_matchups(home_batters)
        home_pitchers = clean_matchups(home_pitchers)   
        
        # Since they're getting fixed, we can make imp all 0, eventually get rid of it
        away_batters['imp'] = 0
        away_pitchers['imp'] = 0
        home_batters['imp'] = 0
        home_pitchers['imp'] = 0
        
        
        # Add lineups
        away_batters = away_batters.merge(lineups, on=['key_mlbam', 'BBREFTEAM'], how='left')
        home_batters = home_batters.merge(lineups, on=['key_mlbam', 'BBREFTEAM'], how='left')
        away_pitchers = away_pitchers.merge(lineups, on=['key_mlbam', 'BBREFTEAM'], how='left')
        home_pitchers = home_pitchers.merge(lineups, on=['key_mlbam', 'BBREFTEAM'], how='left')
        
        # If backtesting, use statsapi order
        if backtest == True:
            away_batters['batting_order'] = np.nan
            home_batters['batting_order'] = np.nan
            for i in range(1, 10):
                away_batters['batting_order'] = np.where(away_batters['order'] == i * 100, i, away_batters['batting_order'])
                home_batters['batting_order'] = np.where(home_batters['order'] == i * 100, i, home_batters['batting_order'])
            
                
        if backtest == False:
            # Fill in batting orders
            away_batters['batting_order_fill'].fillna("-1", inplace=True)
            away_batters['batting_order'] = away_batters['batting_order_fill'].astype('int')
            away_batters['batting_order'] = np.where(away_batters['batting_order'] == -1, np.nan, away_batters['batting_order'])
            away_batters.drop(columns={'batting_order_fill'}, inplace=True)

            home_batters['batting_order_fill'].fillna("-1", inplace=True)
            home_batters['batting_order'] = home_batters['batting_order_fill'].astype('int')
            home_batters['batting_order'] = np.where(home_batters['batting_order'] == -1, np.nan, home_batters['batting_order'])
            home_batters.drop(columns={'batting_order_fill'}, inplace=True)

            away_pitchers['batting_order'] = away_pitchers['batting_order_fill']
            away_pitchers['starter'] = np.where(away_pitchers['batting_order'] == "SP", 1, 0)
            away_pitchers['Leverage'] = np.where(away_pitchers['batting_order'] == "SP", 1, away_pitchers['Leverage'])
            away_pitchers.drop(columns={'batting_order_fill'}, inplace=True)

            home_pitchers['batting_order'] = home_pitchers['batting_order_fill']
            home_pitchers['starter'] = np.where(home_pitchers['batting_order'] == "SP", 1, 0)
            home_pitchers['Leverage'] = np.where(home_pitchers['batting_order'] == "SP", 1, home_pitchers['Leverage'])
            home_pitchers.drop(columns={'batting_order_fill'}, inplace=True)
        
        
        batter_info = ['fullName', 'firstName', 'lastName', 'Salary', 'batting_order', 'starter', 'Leverage', 'batSide', 'pitchHand', 
         'position', 'BBREFTEAM', 
         'ID', 'id', 'key_mlbam', 'key_fangraphs', 'key_bbref_minors', 'key_bbref', 
         'name_first', 'name_last', 'First2', 'Last5', 'Name', 'order',        
         'status', 'venue_id', 'game_date', 'game_type', 'game_num', 'summary', 
         'weather', 'wind', 'missing']
        
        batter_stat = [               
         'b_L', 'batSide_l', 
         'b1_b_l', 'b2_b_l', 'b3_b_l', 'hr_b_l', 'bb_b_l', 'hbp_b_l',               
         'so_b_l', 'lo_b_l', 'po_b_l', 'go_b_l', 'fo_b_l', 
         'woba_b_l', 'estimated_woba_using_speedangle_b_l', 'slg_b_l', 'obp_b_l', 'iso_b_l', 
         'to_left_b_l', 'to_middle_b_l', 'to_right_b_l',                      
         'hard_hit_b_l', 'barrel_b_l', 'totalDistance_b_l', 'launchSpeed_b_l', 
         'maxSpeed_b_l', 'maxSpin_b_l',
         'pa_b_l', 'ab_b_l', 
                       
         'b1_b_long_l', 'b2_b_long_l', 'b3_b_long_l', 'hr_b_long_l', 'bb_b_long_l', 'hbp_b_long_l',              
         'so_b_long_l', 'lo_b_long_l', 'po_b_long_l', 'go_b_long_l', 'fo_b_long_l', 
         'woba_b_long_l', 'estimated_woba_using_speedangle_b_long_l', 'slg_b_long_l', 'obp_b_long_l', 'iso_b_long_l',
         'to_left_b_long_l', 'to_middle_b_long_l', 'to_right_b_long_l',                      
         'hard_hit_b_long_l', 'barrel_b_long_l', 'totalDistance_b_long_l', 'launchSpeed_b_long_l', 
         'maxSpeed_b_long_l', 'maxSpin_b_long_l',                       
         'pa_b_long_l', 'ab_b_long_l', 
                       
                       
         'batSide_r', 
         'b1_b_r', 'b2_b_r', 'b3_b_r', 'hr_b_r', 'bb_b_r', 'hbp_b_r',
         'so_b_r', 'lo_b_r', 'po_b_r', 'go_b_r', 'fo_b_r', 
         'woba_b_r', 'estimated_woba_using_speedangle_b_r', 'slg_b_r', 'obp_b_r', 'iso_b_r', 
         'to_left_b_r', 'to_middle_b_r', 'to_right_b_r',                      
         'hard_hit_b_r', 'barrel_b_r', 'totalDistance_b_r', 'launchSpeed_b_r',
         'maxSpeed_b_r', 'maxSpin_b_r',        
         'pa_b_r', 'ab_b_r', 
                       
         'b1_b_long_r', 'b2_b_long_r', 'b3_b_long_r', 'hr_b_long_r', 'bb_b_long_r', 'hbp_b_long_r', 
         'so_b_long_r', 'lo_b_long_r', 'po_b_long_r', 'go_b_long_r', 'fo_b_long_r', 
         'woba_b_long_r', 'estimated_woba_using_speedangle_b_long_r', 'slg_b_long_r', 'obp_b_long_r','iso_b_long_r', 
         'to_left_b_long_r', 'to_middle_b_long_r', 'to_right_b_long_r',                      
         'hard_hit_b_long_r', 'barrel_b_long_r', 'totalDistance_b_long_r', 'launchSpeed_b_long_r',
         'maxSpeed_b_long_r', 'maxSpin_b_long_r',
         'pa_b_long_r', 'ab_b_long_r', 
                       
         'b1_rate', 'b2_rate', 'b3_rate', 'hr_rate', 'bb_rate', 'hbp_rate', 'so_rate', 'obp', 'slg', 'woba', 
         'sba_imp', 'sbr', 'sba_2b', 'sba_3b', 'sb_2b', 'sb_3b', 
         'imp', 'imp_b_l', 'imp_b_r']
        
        pitcher_info = ['fullName', 'firstName', 'lastName', 'Salary', 'batting_order', 'starter', 'Leverage', 'batSide', 'pitchHand', 
         'position', 'BBREFTEAM', 
         'ID', 'id', 'key_mlbam', 'key_fangraphs', 'key_bbref_minors', 'key_bbref',  
         'name_first', 'name_last', 'First2', 'Last5', 'Name', 'order',               
         'status', 'venue_id', 'game_date', 'game_type', 'game_num', 'summary', 
         'weather', 'wind', 'missing']
          
        pitcher_stat = [
         'p_L', 'pitchHand_l', 
         'b1_p_l', 'b2_p_l', 'b3_p_l', 'hr_p_l', 'bb_p_l', 'hbp_p_l', 
         'so_p_l', 'lo_p_l', 'po_p_l', 'go_p_l', 'fo_p_l', 
         'woba_p_l', 'estimated_woba_using_speedangle_p_l', 'slg_p_l', 'obp_p_l', 'iso_p_l', 
         'to_left_p_l', 'to_middle_p_l', 'to_right_p_l', 
         'hard_hit_p_l', 'barrel_p_l', 'totalDistance_p_l', 'launchSpeed_p_l',
         'maxSpeed_p_l', 'maxSpin_p_l', 
         'pa_p_l', 'ab_p_l',
                        
         'b1_p_long_l', 'b2_p_long_l', 'b3_p_long_l', 'hr_p_long_l', 'bb_p_long_l', 'hbp_p_long_l',                
         'so_p_long_l', 'lo_p_long_l', 'po_p_long_l', 'go_p_long_l', 'fo_p_long_l', 
         'woba_p_long_l', 'estimated_woba_using_speedangle_p_long_l', 'slg_p_long_l', 'obp_p_long_l', 'iso_p_long_l', 
         'to_left_p_long_l', 'to_middle_p_long_l', 'to_right_p_long_l', 
         'hard_hit_p_long_l', 'barrel_p_long_l', 'totalDistance_p_long_l', 'launchSpeed_p_long_l',
         'maxSpeed_p_long_l', 'maxSpin_p_long_l',                        
         'pa_p_long_l', 'ab_p_long_l', 
                        
         'pitchHand_r', 
         'b1_p_r', 'b2_p_r', 'b3_p_r', 'hr_p_r', 'bb_p_r', 'hbp_p_r',               
         'so_p_r', 'lo_p_r', 'po_p_r', 'go_p_r', 'fo_p_r', 
         'woba_p_r', 'estimated_woba_using_speedangle_p_r', 'slg_p_r', 'obp_p_r', 'iso_p_r', 
         'to_left_p_r', 'to_middle_p_r', 'to_right_p_r', 
         'hard_hit_p_r', 'barrel_p_r', 'totalDistance_p_r', 'launchSpeed_p_r',
         'maxSpeed_p_r', 'maxSpin_p_r', 
         'pa_p_r', 'ab_p_r', 
                        
         'b1_p_long_r', 'b2_p_long_r', 'b3_p_long_r', 'hr_p_long_r', 'bb_p_long_r', 'hbp_p_long_r',                
         'so_p_long_r', 'lo_p_long_r', 'po_p_long_r', 'go_p_long_r', 'fo_p_long_r', 
         'woba_p_long_r', 'estimated_woba_using_speedangle_p_long_r', 'slg_p_long_r', 'obp_p_long_r', 'iso_p_long_r', 
         'to_left_p_long_r', 'to_middle_p_long_r', 'to_right_p_long_r', 
         'hard_hit_p_long_r', 'barrel_p_long_r', 'totalDistance_p_long_r', 'launchSpeed_p_long_r',
         'maxSpeed_p_long_r', 'maxSpin_p_long_r',          
         'pa_p_long_r', 'ab_p_long_r', 
                        
         'H9', 'HR9', 'K9', 'BB9',                
         'outs', 'avgFaced', 'starter_api',
         'imp', 'imp_p_l', 'imp_p_r']
        
        batter_list = batter_info + batter_stat
        pitcher_list = pitcher_info + pitcher_stat 
        
        # Clean this up later, pretty sure we don't need
        batter_list.remove('First2')
        batter_list.remove('Last5')
        pitcher_list.remove('First2')
        pitcher_list.remove('Last5')
            
        # Keep relevant variables
        away_batters = away_batters[batter_list]
        home_batters = home_batters[batter_list]
        away_pitchers = away_pitchers[pitcher_list]
        home_pitchers = home_pitchers[pitcher_list]
        
        
        away_pitchers = clean_leverage(away_pitchers, "Away")
        home_pitchers = clean_leverage(home_pitchers, "Home")
        
        away_batters = clean_order(away_batters, "Away")
        home_batters = clean_order(home_batters, "Home")
        
        
        # Create file named after matchup
        matchup_file = matchup + ".xlsx"
        
        
        # Delete if the game didn't happen
        if backtest == True:
            # No order (game didn't happen)
            if (away_batters['order'].sum() == 0 and home_batters['order'].sum() == 0):
                # Delete file
                try:
                    file_path = os.path.join(baseball_path, "9. Matchups", "B. Matchups", matchup_folder, matchup_file)
                    if os.path.isfile(file_path):
                        os.remove(file_path)
                    print("Deleted. Has no batters")
                except:
                    pass

                # Skip to next iteration
                continue

            # Postponed (game didn't happen)
            if "Postponed" in away_batters['summary'][0]:
                # Delete file
                try:
                    file_path = os.path.join(baseball_path, "9. Matchups", "B. Matchups", matchup_folder, matchup_file)
                    if os.path.isfile(file_path):
                        os.remove(file_path)
                    print("Deleted. Game was postponed.")
                except:
                    pass

                # Skip to next iteration
                continue 

        
        
        
        
        
        # Fill in missings with team averages
        for stat in batter_stat:
            try:
                stat_mean = away_batters[stat].mean()
                away_batters[stat].fillna(stat_mean, inplace=True)
            except:
                away_batters[stat].fillna("R", inplace=True)
                
            try:
                stat_mean = home_batters[stat].mean()
                home_batters[stat].fillna(stat_mean, inplace=True)
            except:
                home_batters[stat].fillna("R", inplace=True)
                
        for stat in pitcher_stat:
            try:
                stat_mean = away_pitchers[stat].mean()
                away_pitchers[stat].fillna(stat_mean, inplace=True)
            except:
                away_pitchers[stat].fillna("R", inplace=True)
                
            try:
                stat_mean = home_pitchers[stat].mean()
                home_pitchers[stat].fillna(stat_mean, inplace=True)
            except:
                home_pitchers[stat].fillna("R", inplace=True)
                        
                    
        
        away_batters.sort_values('batting_order', inplace=True)
        home_batters.sort_values('batting_order', inplace=True)
        away_pitchers.sort_values('Leverage', inplace=True)
        home_pitchers.sort_values('Leverage', inplace=True)
        
        away_batters.fillna(-99, inplace=True)
        away_pitchers.fillna(-99, inplace=True)
        home_batters.fillna(-99, inplace=True)
        home_pitchers.fillna(-99, inplace=True)
        
        # Maybe print out players that are imputed and starting
        away_batters['imp'] = np.where(away_batters['name_last'] == -99, 1, 0)
        away_pitchers['imp'] = np.where(away_pitchers['name_last'] == -99, 1, 0)
        home_batters['imp'] = np.where(home_batters['name_last'] == -99, 1, 0)
        home_pitchers['imp'] = np.where(home_pitchers['name_last'] == -99, 1, 0)
        
        away_pitchers.rename(columns={'starter_api':'starts'}, inplace=True)
        home_pitchers.rename(columns={'starter_api':'starts'}, inplace=True)
        
        # Write to Excel
        away_batters.to_excel(os.path.join(baseball_path, "9. Matchups", "B. Matchups", matchup_folder, matchup_file), sheet_name="AwayBatters", engine='openpyxl')

        with pd.ExcelWriter(os.path.join(baseball_path, "9. Matchups", "B. Matchups", matchup_folder, matchup_file), mode='a', engine='openpyxl') as writer:  
            away_pitchers.to_excel(writer, sheet_name='AwayPitchers')

        with pd.ExcelWriter(os.path.join(baseball_path, "9. Matchups", "B. Matchups", matchup_folder, matchup_file), mode='a', engine='openpyxl') as writer:  
            home_batters.to_excel(writer, sheet_name='HomeBatters')

        with pd.ExcelWriter(os.path.join(baseball_path, "9. Matchups", "B. Matchups", matchup_folder, matchup_file), mode='a', engine='openpyxl') as writer:  
            home_pitchers.to_excel(writer, sheet_name='HomePitchers')

### Run

In [5]:
def run_matchups(team_map, contestKey=None, date=None, backtest=False, early=False, historic=False):
    if historic == False:
        create_matchups(team_map,  contestKey=contestKey, date=date, backtest=backtest, early=early)
    
    else:
        for i in range(len(history)):
            print("Contest: {}, Date: {}".format(history['contestKey'][i], history['date'][i]))
            create_matchups(team_map, history['contestKey'][i], date=history['date'][i], backtest=True, early=False)