# C02. Projections

### Sims

Merge together game sims for a given stat.

In [3]:
def create_game_player_sims(date, matchup, position="Both", stat="FP"):
    # Identify simulation folder
    simulation_folder = os.path.join(baseball_path, "B02. Simulations", "2. Player Sims", f"Matchups {date}", matchup)
    
    # Read in file names
    player_game_sim_list = os.listdir(simulation_folder)

    # Sort the file names numerically (by the number after the underscore)
    sorted_file_names = sorted(player_game_sim_list, key=lambda x: int(x.split('_')[-1].split('.')[0]))
    
    # Identify the number of the last sim    
    max_sim = int(sorted_file_names[-1].replace(".csv", "").split("_")[1])
    
    # Initialize merged_df outside the loop
    merged_df = None
    
    
    # List of single game sim results
    df_list = []
    
    # Loop over sims
    for sim_num in range(max_sim+1):
        if position in ["Both", "Batters"]:
            ### Batters
            # If it's the first sim,
            if sim_num == 0:
                # Read in batters, keeping imputation flags
                batter_df = pd.read_csv(os.path.join(simulation_folder, f"batters_{sim_num}.csv"), encoding='iso-8859-1', usecols=['fullName', 'imp_b_l', 'imp_b_r', 'confirmed', 'batting_order', f'{stat}'])
                # Assign position
                batter_df['Position'] = "batter"
                # Rearrange
                batter_df = batter_df[['fullName', 'Position', 'imp_b_l', 'imp_b_r', 'confirmed', 'batting_order', f'{stat}']]
                # Rename 
                batter_df.rename(columns={'imp_b_l':'imp_l', 'imp_b_r':'imp_r'}, inplace=True)
            else:
                # Read in batters (only need stat because it's always in the same order)
                batter_df = pd.read_csv(os.path.join(simulation_folder, f"batters_{sim_num}.csv"), encoding='iso-8859-1', usecols=[f'{stat}'])

            # Rename to be sim-specific
            batter_df.rename(columns={f'{stat}':f'{stat}_{sim_num}'}, inplace=True)

        
        if position in ["Both", "Pitchers"]:
            ### Pitchers
            # If it's the first sim,
            if sim_num == 0:
                # Read in pitchers, keeping imputation flags
                pitcher_df = pd.read_csv(os.path.join(simulation_folder, f"pitchers_{sim_num}.csv"), encoding='iso-8859-1', usecols=['fullName', 'imp_p_l', 'imp_p_r', 'confirmed', f'{stat}'])
                # Assign position
                pitcher_df['Position'] = "pitcher"
                # Rearrange
                pitcher_df = pitcher_df[['fullName', 'Position', 'imp_p_l', 'imp_p_r', 'confirmed', f'{stat}']]
                # Rename 
                pitcher_df.rename(columns={'imp_p_l':'imp_l', 'imp_p_r':'imp_r'}, inplace=True)
            else:
                # Read in pitchers (only need stat because it's always in the same order)
                pitcher_df = pd.read_csv(os.path.join(simulation_folder, f"pitchers_{sim_num}.csv"), encoding='iso-8859-1', usecols=[f'{stat}'])

            # Rename to be sim-specific
            pitcher_df.rename(columns={f'{stat}':f'{stat}_{sim_num}'}, inplace=True)
        
        if position == "Both":
            # Concatenate batters and pitchers together 
            player_df = pd.concat([batter_df, pitcher_df], axis=0)
        elif position == "Batters":
            player_df = batter_df.copy()
        else:
            player_df = pitcher_df.copy()
            
        # Add to list of dataframes
        df_list.append(player_df)
        
    # Concatenate all dataframes together
    merged_df = pd.concat(df_list, axis=1)
    
    # Drop duplicates (this is Ohtani)
    merged_df.drop_duplicates('fullName', keep='last', inplace=True)
        
    # If you want to fill missing values with 0
    merged_df.fillna(-99, inplace=True)
    
    # Free up memory
    del player_game_sim_list, sorted_file_names, max_sim, simulation_folder
    
    return merged_df

Merge together all player sims for a given contest.

In [None]:
def create_contest_player_sims(guide, contestKey, position="Both", stat="FP"):
    # Pick date
    date = guide['date'][0]
    
    # Identify matchup folder
    matchup_folder = os.path.join(baseball_path, "B02. Simulations", "2. Player Sims", f"Matchups {date}")
    
    # Game simulations
    game_player_sims_list = []
    
    # Loop over matchups
    for matchup in os.listdir(matchup_folder):
        # Identify gamePk
        gamePk = matchup.split(" ")[1]
    
        # If it's in the slate,
        if int(gamePk) in list(guide['game_id'].unique()):
            # Merge together all sim dataframes
            game_player_sims = create_game_player_sims(date, matchup, position, stat)
            
            game_player_sims['gamePk'] = gamePk
            
            game_player_sims_list.append(game_player_sims)
            del game_player_sims, gamePk
            
            
            
    # Concatenate all together
    player_sims = pd.concat(game_player_sims_list, axis=0)
    
    del game_player_sims_list, guide, date, matchup_folder
        
    return player_sims

### Projections

##### Me

In [2]:
def read_my_projections(guide, contestKey, stat="FP"):
    # Read in player sims
    player_sims = create_contest_player_sims(guide, contestKey, stat="FP")
    
    # Create universal name variable
    my_projections = name_clean(player_sims, "fullName")

    # Create a new column to average sim score
    my_projections['Projection_Me'] = my_projections.filter(regex=f'^{stat}_').mean(axis=1)
    
    return my_projections

##### DFF

In [4]:
def read_dff_projections(guide):
    # Identify slate
    dff_slate = guide['dff_slate'][0]
    
    # Identify date
    date = guide['date'][0]

    # If slate is missing,
    if pd.isna(dff_slate) or dff_slate == "" or dff_slate is None:
        # Use date
        dff_slate = date
        # Add dashes to match file names
        dff_slate = str(dff_slate)
        dff_slate = f'{dff_slate[0:4]}-{dff_slate[4:6]}-{dff_slate[6:8]}'
        # Read in projections
        dff_projections = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "1. DFF", "2. Projections", "Date", f"DFF_MLB_cheatsheet_{dff_slate}.csv"))

    else:
        # Read in projections
        dff_projections = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "1. DFF", "2. Projections", f"DFF Projections {dff_slate}.csv"))

    print(dff_projections.head())
    try:
        dff_projections['Name'] = dff_projections['first_name'] + " " + dff_projections['last_name'] 
        dff_projections.rename(columns={'ppg_projection':'Projection_DFF'}, inplace=True)

    except:
        dff_projections['Name'] = dff_projections['First Name'] + " " + dff_projections['Last Name']    
        dff_projections.rename(columns={'PPG':'Projection_DFF'}, inplace=True)
        
    dff_projections = dff_projections[['Name', 'Projection_DFF']]
    # dff_projections.rename(columns={'ppg_projection':'Projection_DFF'}, inplace=True)
    dff_projections = name_clean(dff_projections, "Name")
    
    return dff_projections

##### RotoWire

In [5]:
def read_roto_projections(guide):
    # Identify slate
    roto_slate = guide['roto_slate'][0]

    # Read in projections
    roto_projections = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "2. RotoWire", "2. Projections", f"RotoWire Projections {roto_slate}.csv"))

    roto_projections['Name'] = roto_projections['firstName'] + " " + roto_projections['lastName'] 
    roto_projections = roto_projections[['Name', 'points', 'rostership']]
    roto_projections.rename(columns={'points':'Projection_Roto'}, inplace=True)
    roto_projections = name_clean(roto_projections, "Name")
    
    return roto_projections

### Pre

In [6]:
def evaluate_projections_pre(daily, quietly=False):
    ### Correlations
    # My projections with DFF
    dff_corr = daily['Projection_Me'].corr(daily['Projection_DFF'])
    # My projections with RotoWire
    roto_corr = daily['Projection_Me'].corr(daily['Projection_Roto'])
    # DFF's projections with Rotowire's
    their_corr = daily['Projection_DFF'].corr(daily['Projection_Roto'])
    
    if quietly == False:
        print(f"Correlations: Me and DFF {round(dff_corr, 3)}, Me and Roto {round(roto_corr, 3)}, DFF and Roto {round(their_corr, 3)}")

    
    ### Outliers
    # DFF
    daily['Diff_DFF'] = daily['Projection_Me'] - daily['Projection_DFF']
    daily.sort_values(by='Diff_DFF', ascending=True, inplace=True)
    
    print("I'm lower than DFF on:")
    print(daily[['Name', 'Projection_Me', 'Projection_DFF']].head(10))

    daily.sort_values(by='Diff_DFF', ascending=False, inplace=True)
    
    print("I'm higher than DFF on:")
    print(daily[['Name', 'Projection_Me', 'Projection_DFF']].head(10))

    
    # RotoWire
    daily['Diff_Roto'] = daily['Projection_Me'] - daily['Projection_Roto']
    daily.sort_values(by='Diff_Roto', ascending=True, inplace=True)
    
    print("I'm lower than RotoWire on:")
    print(daily[['Name', 'Projection_Me', 'Projection_Roto']].head(10))

    daily.sort_values(by='Diff_Roto', ascending=False, inplace=True)
    
    print("I'm higher than RotoWire on:")
    print(daily[['Name', 'Projection_Me', 'Projection_Roto']].head(10))

    return daily

### Post

In [7]:
def evaluate_projections_post(projections_and_results_df):    
    # My errors
    projections_and_results_df['error_Me'] = projections_and_results_df['Projection_Me'] - projections_and_results_df['FPTS']
    projections_and_results_df['MSE_Me'] = projections_and_results_df['error_Me'] ** 2
    
    # DFF's errors
    projections_and_results_df['error_DFF'] = projections_and_results_df['Projection_DFF'] - projections_and_results_df['FPTS']
    projections_and_results_df['MSE_DFF'] = projections_and_results_df['error_DFF'] ** 2
    
    # RotoWire's errors
    projections_and_results_df['error_Roto'] = projections_and_results_df['Projection_Roto'] - projections_and_results_df['FPTS']
    projections_and_results_df['MSE_Roto'] = projections_and_results_df['error_Roto'] ** 2
    
    # Closer on player 
    projections_and_results_df['beat_DFF'] = (projections_and_results_df['MSE_DFF'] > projections_and_results_df['MSE_Me']).astype('int')
    projections_and_results_df['beat_DFF'] = np.where(pd.isna(projections_and_results_df['MSE_DFF']), np.nan, projections_and_results_df['beat_DFF']) 
    projections_and_results_df['beat_Roto'] = (projections_and_results_df['MSE_Roto'] > projections_and_results_df['MSE_Me']).astype('int')    
    projections_and_results_df['beat_Roto'] = np.where(pd.isna(projections_and_results_df['MSE_Roto']), np.nan, projections_and_results_df['beat_Roto']) 
                               
    # May want to add RotoWire's ownership error
    
    return projections_and_results_df

Run evaluations from just contestKey

In [None]:
def run_contest_evaluation(contestKey):
    # Read in guide
    guide = pd.read_csv(os.path.join(baseball_path, "A09. Contest Guides", f"Contest Guide {contestKey}.csv"))
    
    # Read in projections
    my_projections = read_my_projections(guide, contestKey)
    try:
        dff_projections = read_dff_projections(guide)
    except:
        print("No DFF Projections")
    roto_projections = read_roto_projections(guide)
    
        
    # Merge projections
    try:
        projections_df = pd.merge(my_projections[['Name', 'Position', 'imp_l', 'imp_r', 'Projection_Me']], dff_projections, on='Name', how='outer')
    except:
        projections_df = my_projections[['Name', 'Position', 'imp_l', 'imp_r', 'Projection_Me']]
        projections_df['Projection_DFF'] = np.nan
    projections_df = pd.merge(projections_df, roto_projections, on='Name', how='outer')
    valid_projections_df = projections_df.dropna().query('Projection_Me > 0 and Projection_DFF > 0 and Projection_Roto > 0')
    
    # Read in results
    player_results_df = pd.read_csv(os.path.join(baseball_path, "A01. DraftKings", "6. Player Results", f"Player Results {contestKey}.csv"), encoding='iso-8859-1')
    results_df = name_clean(player_results_df, "Player")
    results_df = results_df[['Name', 'FPTS']]
    
    # Merge onto projections
    projections_and_results_df = projections_df.merge(results_df, on='Name', how='left')
    
    # Run evaluations
    projections_and_results_df = evaluate_projections_post(projections_and_results_df)

    # Extract 
    date = guide['date'][0]
    
    # Add date and contestKey
    projections_and_results_df['date'] = date
    projections_and_results_df['contestKey'] = contestKey
    
    # Drop all instances of a duplicate (Luises Garcia)
    projections_and_results_df.drop_duplicates(subset='Name', keep=False)
    
    return projections_and_results_df