# Imports

In [6]:
# Read in my projections
def read_player_sims(contestKey):
    # Read in projections
    my_df = pd.read_csv(os.path.join(baseball_path, "B02. Simulations", "2. Player Sims", f"Simulations {draftGroupId}.csv"), encoding='iso-8859-1')
    
    # Remove accents
    my_df['Name'] = my_df['Name'].apply(remove_accents)
    
    # Remove suffixes
    my_df['Name'] = my_df['Name'].str.replace(r'\s+(Jr\.|Sr\.|II|III)$', '', regex=True)

    my_df['lastName'] = my_df['Name'].str.split(" ").str[-1]
    
    
    # Remove low scores (relief pitchers, mostly)
    # my_df = my_df[my_df['AvgPointsPerGame'] > 4]

    # Rename for consistency
    my_df.rename(columns={'AvgPointsPerGame': 'Projection_Me'}, inplace=True)
    
    # Keep relevant variables
    my_df = my_df[['Name', 'lastName', 'TeamAbbrev', 'Salary', 'Projection_Me', 'imp']]
    
    return my_df

In [None]:
# Identify DraftKings slate name
def pick_slate(Name):
    if "(Early)" in Name:
        slate = "Early"
    elif "(Late Night)" in Name:
        slate = "Late Night"
    elif "Night" in Name:
        slate = "Night"
    elif "Afternoon" in Name:
        slate = "Afternoon"
    else:
        slate = "All"
        
    return slate

In [None]:
# For evaluations
def read_dff(contestKey, history):
    # Identify index of contest
    history_index = history.loc[history['Contest_Key'] == contestKey].index[0]
    
    # Convert the "Contest_Date_EST" column to datetime without specifying the format
    history['Contest_Date_EST'] = pd.to_datetime(history['Contest_Date_EST'])

    # Create a new "date" column formatted as YYYYMMDD
    history['date'] = history['Contest_Date_EST'].dt.strftime('%Y%m%d')
    
    # Extract name and date
    name = history.loc[history_index, 'Entry']
    date = history.loc[history_index, 'date']
    
    # Use name to pick slate type (All, Afternoon, etc...)
    slate = pick_slate(name)

    try:
        # Match projections exactly using slate name and date
        # Read in slates
        dff_slates = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "1. DFF", "1. Slates", f"DFF Slates {date}.csv"))
        dff_index = dff_slates.loc[dff_slates['Slate Type'] == slate].index[0]
        dff_slate = dff_slates.loc[dff_index, 'URL']
        
        dff_projections = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "1. DFF", "2. Projections", f"DFF Projections {dff_slate}.csv"), encoding='iso-8859-1')
    
        # print("DFF - Slate Number")
    
    except:
        # Match projections using date (worse match)
        date_dash = date[:4] + "-" + date[4:6] + "-" + date[6:]
        dff_projections = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "1. DFF", "2. Projections", "Date", f"DFF_MLB_cheatsheet_{date_dash}.csv"), encoding='iso-8859-1')
        
        # print("DFF - Slate Date")
        
    return dff_projections

In [None]:
def read_roto(contestKey, history):
    # Identify index of contest
    history_index = history.loc[history['Contest_Key'] == contestKey].index[0]
    
    # Convert the "Contest_Date_EST" column to datetime without specifying the format
    history['Contest_Date_EST'] = pd.to_datetime(history['Contest_Date_EST'])

    # Create a new "date" column formatted as YYYYMMDD
    history['date'] = history['Contest_Date_EST'].dt.strftime('%Y%m%d')
    
    # Extract name and date
    name = history.loc[history_index, 'Entry']
    date = history.loc[history_index, 'date']
    
    # Use name to pick slate type (All, Afternoon, etc...)
    slate = pick_slate(name)
    
    # Read in slates
    roto_slates = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "2. RotoWire", "1. Slates", f"RotoWire Slates {date}.csv"))
    roto_index = roto_slates.loc[roto_slates['name'] == slate].index[0]
    roto_slate = roto_slates.loc[roto_index, 'slateID']

    roto_projections = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "2. RotoWire", "2. Projections", f"RotoWire Projections {roto_slate}.csv"), encoding='iso-8859-1')
    
    return roto_projections

In [None]:
def merge_projections(my_df, dff_df, roto_df, quietly=False):
    # Read in RotoWire's projections
    roto_df['Name'] = roto_df['firstName'] + " " + roto_df['lastName']
    roto_df.rename(columns={'teamAbbr':'TeamAbbrev', 'salary': 'Salary'}, inplace=True)    
    
    # Merge using Name and Team
    projection_df = pd.merge(my_df, roto_df[['Name', 'TeamAbbrev', 'rostership', 'points']], on=['Name', 'TeamAbbrev'], how='left', validate='one_to_one', indicator=True)
    # Rename merge indicator (to allow for another)
    projection_df.rename(columns={'_merge': '_merge1'}, inplace=True)    
    # Merge using Last Name, Team, and Salary
    projection_df = pd.merge(projection_df, roto_df[['lastName', 'TeamAbbrev', 'Salary', 'rostership', 'points']], on=['lastName', 'TeamAbbrev', 'Salary'], how='left', indicator=True)
    # Drop duplicates
    projection_df.drop_duplicates(['Name', 'TeamAbbrev'], inplace=True)
    
    # Fill in missing points
    projection_df['points_x'].fillna(projection_df['points_y'], inplace=True)
    projection_df['rostership_x'].fillna(projection_df['rostership_y'], inplace=True)
    
    # Identify those that didn't merge
    if quietly == False:
        print("RotoWire Didn't Merge:")
        print(projection_df[(projection_df['_merge1'] == "left_only") & (projection_df['_merge'] == "left_only")]['Name'])
        
    # Rename column
    projection_df.rename(columns={'points_x':'Projection_Roto', 'rostership_x':'ownership'}, inplace=True)

    # Keep relevant variables
    projection_df = projection_df[['Name', 'lastName', 'TeamAbbrev', 'Salary', 'imp', 'ownership', 'Projection_Me', 'Projection_Roto']]
    
    
    ### DFF
    # If it's the old version (merged on date, downloaded csv), rename to match
    try:
        dff_df.rename(columns={'first_name': 'First Name', 'last_name': 'Last Name', 'team': 'Team', 'salary': 'Salary', 'ppg_projection': 'FP'}, inplace=True)
    except:
        pass
    
    # Create Name variable
    dff_df['Name'] = dff_df['First Name'] + " " + dff_df['Last Name']
    dff_df['Name'] = dff_df['Name'].str.replace(r'\s+(Jr\.|Sr\.|II|III)$', '', regex=True)
    dff_df.rename(columns={'Team':'TeamAbbrev', 'Last Name': 'lastName'}, inplace=True)
    
    # Merge using Name and Team
    projection_df = pd.merge(projection_df, dff_df[['Name', 'TeamAbbrev', 'FP']], on=['Name', 'TeamAbbrev'], how='left', validate='one_to_one', indicator=True)
    # Rename merge indicator (to allow for another)
    projection_df.rename(columns={'_merge': '_merge1'}, inplace=True)
    # Merge using Last Name, Team, and Salary
    projection_df = pd.merge(projection_df, dff_df[['lastName', 'TeamAbbrev', 'Salary', 'FP']], on=['lastName', 'TeamAbbrev', 'Salary'], how='left', indicator=True)
    # Drop duplicates
    projection_df.drop_duplicates(['Name', 'TeamAbbrev'], inplace=True)
    
    # Fill in missing points
    projection_df['FP_x'].fillna(projection_df['FP_y'], inplace=True)    
    
    # Identify those that didn't merge
    if quietly == False: 
        print("DFF Didn't Merge:")
        print(projection_df[(projection_df['_merge1'] == "left_only") & (projection_df['_merge'] == "left_only")]['Name'])
    
    # Rename column
    projection_df.rename(columns={'FP_x':'Projection_DFF'}, inplace=True)
    
    # Keep relevant variables
    projection_df = projection_df[['Name', 'lastName', 'TeamAbbrev', 'Salary', 'imp', 'ownership', 'Projection_Me', 'Projection_Roto', 'Projection_DFF']]
    
    
    return projection_df

In [8]:
def evaluate_projections_pre(daily, quietly=False):
    ### Correlations
    # My projections with DFF
    dff_corr = daily['Projection_Me'].corr(daily['Projection_DFF'])
    # My projections with RotoWire
    roto_corr = daily['Projection_Me'].corr(daily['Projection_Roto'])
    # DFF's projections with Rotowire's
    their_corr = daily['Projection_DFF'].corr(daily['Projection_Roto'])
    
    if quietly == False:
        print(dff_corr)
        print(roto_corr)
        print(their_corr)
    
    ### Outliers
    # DFF
    daily['Diff_DFF'] = daily['Projection_Me'] - daily['Projection_DFF']
    daily.sort_values(by='Diff_DFF', ascending=True, inplace=True)
    
    print("I'm lower than DFF on:")
    print(daily[['Name', 'Projection_Me', 'Projection_DFF']].head(10))

    daily.sort_values(by='Diff_DFF', ascending=False, inplace=True)
    
    print("I'm higher than DFF on:")
    print(daily[['Name', 'Projection_Me', 'Projection_DFF']].head(10))

    
    # RotoWire
    daily['Diff_Roto'] = daily['Projection_Me'] - daily['Projection_Roto']
    daily.sort_values(by='Diff_Roto', ascending=True, inplace=True)
    
    print("I'm lower than RotoWire on:")
    print(daily[['Name', 'Projection_Me', 'Projection_Roto', 'ownership']].head(10))

    daily.sort_values(by='Diff_Roto', ascending=False, inplace=True)
    
    print("I'm higher than RotoWire on:")
    print(daily[['Name', 'Projection_Me', 'Projection_Roto', 'ownership']].head(10))

    return daily

# After the day

In [9]:
# Read in results
def read_results(contestKey):
    score_df = pd.read_csv(os.path.join(baseball_path, "A01. DraftKings", "6. Player Results", f"Player Results {contestKey}.csv"), encoding='iso-8859-1')
    score_df = score_df[['Player', 'Roster Position', '%Drafted', 'FPTS']]
    score_df.dropna(inplace=True)
    score_df.rename(columns={'Player':'Name'}, inplace=True)
    
    # Remove accents
    score_df['Name'] = score_df['Name'].apply(remove_accents)
    
    # Remove suffixes
    score_df['Name'] = score_df['Name'].str.replace(r'\s+(Jr\.|Sr\.|II|III)$', '', regex=True)
    
    # Replace pitcher 0s with missing
    score_df['FPTS'] = np.where(((score_df['FPTS'] == 0) & (score_df['Roster Position'] == "P")), np.nan, score_df['FPTS'])

    score_df = score_df[['Name', 'FPTS', '%Drafted', 'Roster Position']]
    
    score_df.dropna(inplace=True)             
    return score_df

In [10]:
def evaluate_projections_post(projection_df, score_df):    
    # Merge projections with actual scores, keeping only if player was projected
    complete_df = projection_df.merge(score_df, on=['Name'], how='inner')
    
    # My errors
    complete_df['error_Me'] = complete_df['Projection_Me'] - complete_df['FPTS']
    complete_df['MSE_Me'] = complete_df['error_Me'] ** 2
    
    # DFF's errors
    complete_df['error_DFF'] = complete_df['Projection_DFF'] - complete_df['FPTS']
    complete_df['MSE_DFF'] = complete_df['error_DFF'] ** 2
    
    # RotoWire's errors
    complete_df['error_Roto'] = complete_df['Projection_Roto'] - complete_df['FPTS']
    complete_df['MSE_Roto'] = complete_df['error_Roto'] ** 2
    
    # Closer on player 
    complete_df['beat_DFF'] = (complete_df['MSE_DFF'] > complete_df['MSE_Me']).astype('int')
    complete_df['beat_Roto'] = (complete_df['MSE_Roto'] > complete_df['MSE_Me']).astype('int')    
                               
    # May want to add RotoWire's ownership error
    
    return complete_df

In [11]:
# Compare my projections to DFF's and RotoWire's based on actual results
def compare_projections(complete_df):
    # Just look at ones where we all have projections
    comparable_df = complete_df[(~complete_df['Projection_Me'].isna()) & (~complete_df['Projection_DFF'].isna()) & (~complete_df['Projection_Roto'].isna())]
    
    # Average MSEs
    MSE_Me = comparable_df['MSE_Me'].mean()
    MSE_DFF = comparable_df['MSE_DFF'].mean()
    MSE_Roto = comparable_df['MSE_Roto'].mean()
    
    # Create dictionary with variable names and values
    mse_dict = {'MSE_Me': MSE_Me, 'MSE_DFF': MSE_DFF, 'MSE_Roto': MSE_Roto}

    # Sort dictionary by values
    sorted_mse_dict = dict(sorted(mse_dict.items(), key=lambda item: item[1]))

    # Extract names and values in order
    mse_names = list(sorted_mse_dict.keys())
    mse_values = list(sorted_mse_dict.values())

    # print("Rankings:")
    # for i in range(len(mse_names)):
    #     print(f"{i+1}. {mse_names[i]}: {mse_values[i]}")
        
    # Separate batters and pitchers
    batter_df = comparable_df[~comparable_df['Roster Position'].isin(['SP', 'RP', 'P'])]
    pitcher_df = comparable_df[comparable_df['Roster Position'].isin(['SP', 'RP', 'P'])]
        
    # Rate at beating other projections
    beat_DFF = comparable_df['beat_DFF'].mean() * 100
    beat_Roto = comparable_df['beat_Roto'].mean() * 100
    
    beat_DFF_B = batter_df['beat_DFF'].mean() * 100
    beat_DFF_P = pitcher_df['beat_DFF'].mean() * 100
    
    beat_Roto_B = batter_df['beat_Roto'].mean() * 100
    beat_Roto_P = pitcher_df['beat_Roto'].mean() * 100
    
    # print("There were {} players we all projected.".format(len(comparable_df)))
    # print("I beat DFF on {:2.1f}% of players (Batters: {:2.1f}%, Pitchers: {:2.1f}%).".format(beat_DFF, beat_DFF_B, beat_DFF_P))
    # print("I beat RotoWire on {:2.1f}% of players (Batters: {:2.1f}%, Pitchers: {:2.1f}%).".format(beat_Roto, beat_Roto_B, beat_Roto_P))
    # print("\n")
    
    
    
    return comparable_df