In [1]:
import pandas as pd
import numpy as np
import os
import unidecode
import datetime
from datetime import date
import warnings
warnings.simplefilter(action="ignore")

from plotnine import *

baseball_path = r"C:\Users\james\Documents\MLB\Data"

In [3]:
todaysdate = date.today()
todaysdate = str(todaysdate)

# Set custom date
todaysdate = "2022-04-07"

# Create name for Daily Fantasy Fuel Files
dff_name = "DFF_MLB_cheatsheet_" + todaysdate + ".csv"

# Remove dashes for other file names
# todaysdate = todaysdate.replace("-", "")

result_name = "Results " + todaysdate + ".csv"

sims_name = "Player_Sims_" + todaysdate + ".csv"

In [4]:
# Clean names for consistency
def name_clean(df):    
    df['Name_Adjusted'] = df['Name'].apply(unidecode.unidecode)  # remove accents
    df['Name_Adjusted'] = df['Name_Adjusted'].str.replace('[^a-zA-Z0-9 ]', '')
    
    # Separate first and last names
    df['First'] = df['Name_Adjusted'].str.split(" ").str[0] 
    df['First'] = df['First'].str.lower()
    df['Last'] = df['Name_Adjusted'].str.split(" ").str[1]
    df['Last'] = df['Last'].str.lower()
    
    # Take first 2 letters of first name and first 5 of last as a sort of merge code
    df['First2'] = df['First'].str.slice(0,2)
    df['Last5'] = df['Last'].str.slice(0,5)
    
    return df

In [5]:
def create_my_df(nn, date=todaysdate):
    # Remove dashes for other file names
    date = date.replace("-", "")
    
    sims_name = "Player_Sims_" + date + ".csv"
    
    # Read in my projections
    my_df = pd.read_csv(os.path.join(baseball_path, "Player Sims", nn, sims_name))
    my_df = my_df[['Name', 'AvgPointsPerGame']]
    my_df = my_df[my_df['AvgPointsPerGame'] > 3]
    my_df = name_clean(my_df)
    
    return my_df

In [6]:
# Read in Daily Fantasy Fuel Projections
def create_dff_df(date=todaysdate):
    # Create name for Daily Fantasy Fuel Files
    dff_name = "DFF_MLB_cheatsheet_" + date + ".csv"

    dff_df = pd.read_csv(os.path.join(baseball_path, "Online Projections", dff_name))
    dff_df['Name'] = dff_df['first_name'] + " " + dff_df['last_name']
    dff_df = dff_df[['Name', 'ppg_projection']]
    dff_df = name_clean(dff_df)
    dff_df = dff_df[dff_df['ppg_projection'] > 0]

    return dff_df

In [7]:
dff_df = create_dff_df("2022-04-27")
dff_df

Unnamed: 0,Name,ppg_projection,Name_Adjusted,First,Last,First2,Last5
0,Shohei Ohtani,22.1,Shohei Ohtani,shohei,ohtani,sh,ohtan
1,Joe Ryan,20.3,Joe Ryan,joe,ryan,jo,ryan
2,Charlie Morton,17.8,Charlie Morton,charlie,morton,ch,morto
3,Jordan Montgomery,16.2,Jordan Montgomery,jordan,montgomery,jo,montg
4,Pablo Lopez,16.0,Pablo Lopez,pablo,lopez,pa,lopez
...,...,...,...,...,...,...,...
158,Sam Long,4.9,Sam Long,sam,long,sa,long
159,Ryan McKenna,4.9,Ryan McKenna,ryan,mckenna,ry,mcken
160,Niko Goodrum,4.8,Niko Goodrum,niko,goodrum,ni,goodr
161,Mickey McDonald,4.4,Mickey McDonald,mickey,mcdonald,mi,mcdon


In [8]:
def create_score_df(date=todaysdate):
    date = date.replace("-", "")

    result_name = "Results " + date + ".csv"
    
    # Read in results
    score_df = pd.read_csv(os.path.join(baseball_path, "Results", result_name))
    score_df = score_df[['Player', 'Roster Position', '%Drafted', 'FPTS']]
    score_df.dropna(inplace=True)
    score_df.rename(columns={'Player':'Name'}, inplace=True)
    score_df = name_clean(score_df)
    score_df['FPTS'] = np.where(((score_df['FPTS'] == 0) & (score_df['Roster Position'] == "P")), np.nan, score_df['FPTS'])

    score_df.dropna(inplace=True)             
    return score_df

In [9]:
def pregame(nn='200200200r', date=todaysdate):
    my_dff = create_my_df(nn, date)
    dff_df = create_dff_df(date)
    
    proj_df = my_dff.merge(dff_df, on=['First2', 'Last5'], how='inner', suffixes=("", "_x"))
    
    print("My correlation with DFF was " + str(proj_df['AvgPointsPerGame'].corr(proj_df['ppg_projection'])))
    
    proj_df['diff'] = proj_df['AvgPointsPerGame'] - proj_df['ppg_projection']
    
    proj_df = proj_df[['Name', 'AvgPointsPerGame', 'diff']]
    
    print("Not Even Close List:")
    print(proj_df.query('diff > 3 | diff < -3'))
    
    return proj_df 

In [10]:
proj_df = pregame()

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\james\\Documents\\MLB\\Data\\Player Sims\\200200200r\\Player_Sims_20220427.csv'

In [11]:

def postgame(nn='200200200r', date=todaysdate):
    my_df = create_my_df(nn, date)
    dff_df = create_dff_df(date)
    score_df = create_score_df(date)
    
    complete_df = my_df.merge(dff_df, on=['First2', 'Last5'], how='inner', suffixes=("", "_dff"))
    complete_df = complete_df.merge(score_df, on=['First2', 'Last5'], how='inner', suffixes=("", "_dk"))
    complete_df = complete_df[['Name', 'Roster Position', 'AvgPointsPerGame', 'ppg_projection', 'FPTS']]
    
    # Calculate errors
    complete_df['my_error'] = complete_df['AvgPointsPerGame'] - complete_df['FPTS']
    complete_df['dff_error'] = complete_df['ppg_projection'] - complete_df['FPTS']
    
    complete_df['my_abs_error'] = abs(complete_df['my_error'])
    complete_df['dff_abs_error'] = abs(complete_df['dff_error'])
    
    complete_df['my_error_sq'] = complete_df['my_error'] ** 2
    complete_df['dff_error_sq'] = complete_df['dff_error'] ** 2

    
    # Correlations
    # print("My correlation with actual scores was " + str(complete_df['AvgPointsPerGame'].corr(complete_df['FPTS'])))
    # print("DFF's correlation with actual scores was " + str(complete_df['ppg_projection'].corr(complete_df['FPTS'])))
    
    return complete_df

In [12]:
def evaluations(nn):
    all_df_list = []
    for file in os.listdir(os.path.join(baseball_path, "Player Sims", nn)):
        date = file.replace("Player_Sims_", "")
        date = date.replace(".csv", "")
        year = date[0:4]
        month = date[4:6]
        day = date[6:8]
        date = year + "-" + month + "-" + day

        complete_df = postgame(nn, date)
        
        all_df_list.append(complete_df)
        
    all_df = pd.concat(all_df_list, axis=0, ignore_index=True)
    
    print("All")
    print("my error: " + str(all_df['my_abs_error'].mean()))
    print("DFF error: " + str(all_df['dff_abs_error'].mean()))
    
    print("my MSE: " + str(all_df['my_error_sq'].mean()))
    print("DFF MSE: " + str(all_df['dff_error_sq'].mean()))
    
    print("Pitchers")
    pitcher_df = all_df[all_df["Roster Position"] == 'P']
    print("my error: " + str(pitcher_df['my_abs_error'].mean()))
    print("DFF error: " + str(pitcher_df['dff_abs_error'].mean()))
    
    print("my MSE: " + str(pitcher_df['my_error_sq'].mean()))
    print("DFF MSE: " + str(pitcher_df['dff_error_sq'].mean()))
    
    print("Batters")
    batter_df = all_df[all_df["Roster Position"] != 'P']
    print("my error: " + str(batter_df['my_abs_error'].mean()))
    print("DFF error: " + str(batter_df['dff_abs_error'].mean()))
    
    print("my MSE: " + str(batter_df['my_error_sq'].mean()))
    print("DFF MSE: " + str(batter_df['dff_error_sq'].mean()))
    
    # print(all_df)
    # print(pitcher_df)
    # print(batter_df)
    
    return all_df

In [13]:
all_df = evaluations("200200200t")
all_df

All
my error: 5.5819211929421
DFF error: 5.697679324894512
my MSE: 54.40447018132807
DFF MSE: 53.453892405063264
Pitchers
my error: 8.833239738805977
DFF error: 8.348694029850744
my MSE: 118.03971511974812
DFF MSE: 109.74670708955219
Batters
my error: 5.209388755878596
DFF error: 5.3939290294997875
my MSE: 47.1132150964643
DFF MSE: 47.00392475416846


Unnamed: 0,Name,Roster Position,AvgPointsPerGame,ppg_projection,FPTS,my_error,dff_error,my_abs_error,dff_abs_error,my_error_sq,dff_error_sq
0,Shohei Ohtani,P,26.77195,24.1,33.35,-6.57805,-9.25,6.57805,9.25,43.270742,85.5625
1,Aaron Nola,P,22.80690,19.5,18.35,4.45690,1.15,4.45690,1.15,19.863958,1.3225
2,Hunter Greene,P,19.24970,14.2,33.10,-13.85030,-18.90,13.85030,18.90,191.830810,357.2100
3,George Kirby,P,19.22540,16.2,22.50,-3.27460,-6.30,3.27460,6.30,10.723005,39.6900
4,Cole Irvin,P,19.06950,10.8,9.55,9.51950,1.25,9.51950,1.25,90.620880,1.5625
...,...,...,...,...,...,...,...,...,...,...,...
2602,Michael Toglia,OF,4.93400,5.5,5.00,-0.06600,0.50,0.06600,0.50,0.004356,0.2500
2603,Martin Maldonado,C,4.78600,5.8,14.00,-9.21400,-8.20,9.21400,8.20,84.897796,67.2400
2604,Brandon Marsh,OF,4.72200,5.7,2.00,2.72200,3.70,2.72200,3.70,7.409284,13.6900
2605,Geraldo Perdomo,SS,4.59300,5.2,2.00,2.59300,3.20,2.59300,3.20,6.723649,10.2400


In [14]:
evaluations("220220220r")

All
my error: 5.584455937499999
DFF error: 5.676984374999998
my MSE: 53.99791172639224
DFF MSE: 52.94432421874997
Pitchers
my error: 8.493325379939215
DFF error: 8.181003039513682
my MSE: 111.21536400874623
DFF MSE: 104.05202887537989
Batters
my error: 5.251116318355975
DFF error: 5.390038314176243
my MSE: 47.441122523712174
DFF MSE: 47.08767676767676


Unnamed: 0,Name,Roster Position,AvgPointsPerGame,ppg_projection,FPTS,my_error,dff_error,my_abs_error,dff_abs_error,my_error_sq,dff_error_sq
0,Corbin Burnes,P,21.83490,21.1,14.35,7.48490,6.75,7.48490,6.75,56.023728,45.5625
1,Drew Smyly,P,19.45315,13.1,20.25,-0.79685,-7.15,0.79685,7.15,0.634970,51.1225
2,David Peterson,P,19.43195,17.5,-10.25,29.68195,27.75,29.68195,27.75,881.018156,770.0625
3,Sonny Gray,P,18.52540,17.8,33.35,-14.82460,-15.55,14.82460,15.55,219.768765,241.8025
4,Patrick Corbin,P,16.49280,12.7,16.50,-0.00720,-3.80,0.00720,3.80,0.000052,14.4400
...,...,...,...,...,...,...,...,...,...,...,...
3195,Bo Naylor,C,4.62200,6.8,0.00,4.62200,6.80,4.62200,6.80,21.362884,46.2400
3196,Austin Hedges,C,4.61300,5.0,2.00,2.61300,3.00,2.61300,3.00,6.827769,9.0000
3197,Gabriel Arias,2B,4.43300,5.5,8.00,-3.56700,-2.50,3.56700,2.50,12.723489,6.2500
3198,Will Benson,OF,3.71500,6.0,0.00,3.71500,6.00,3.71500,6.00,13.801225,36.0000


In [14]:
evaluations("200200r")


All
my error: 5.693567659492689
DFF error: 5.701825518831666
my MSE: 54.230471484470556
DFF MSE: 53.528488662567234
Pitchers
my error: 8.659279291044777
DFF error: 8.348694029850746
my MSE: 114.82507113329285
DFF MSE: 109.74670708955219
Batters
my error: 5.353031790916875
DFF error: 5.397900599828621
my MSE: 47.27273682042417
DFF MSE: 47.07326906598119


Unnamed: 0,Name,Roster Position,AvgPointsPerGame,ppg_projection,FPTS,my_error,dff_error,my_abs_error,dff_abs_error,my_error_sq,dff_error_sq
0,Shohei Ohtani,P,23.84375,24.1,33.35,-9.50625,-9.25,9.50625,9.25,90.368789,85.5625
1,Aaron Nola,P,19.91090,19.5,18.35,1.56090,1.15,1.56090,1.15,2.436409,1.3225
2,Brandon Woodruff,P,19.33140,19.4,35.80,-16.46860,-16.40,16.46860,16.40,271.214786,268.9600
3,Zac Gallen,P,18.59865,16.2,11.80,6.79865,4.40,6.79865,4.40,46.221642,19.3600
4,Cole Irvin,P,17.55750,10.8,9.55,8.00750,1.25,8.00750,1.25,64.120056,1.5625
...,...,...,...,...,...,...,...,...,...,...,...
2597,Nick Maton,OF,5.09000,5.2,0.00,5.09000,5.20,5.09000,5.20,25.908100,27.0400
2598,Brandon Marsh,OF,5.08300,5.7,2.00,3.08300,3.70,3.08300,3.70,9.504889,13.6900
2599,Sergio Alcantara,SS,5.04400,5.3,2.00,3.04400,3.30,3.04400,3.30,9.265936,10.8900
2600,Geraldo Perdomo,SS,4.77700,5.2,2.00,2.77700,3.20,2.77700,3.20,7.711729,10.2400


In [15]:
evaluations("120120120r")

All
my error: 5.67422824750193
DFF error: 5.701825518831665
my MSE: 54.0506574535858
DFF MSE: 53.52848866256721
Pitchers
my error: 8.662270149253727
DFF error: 8.348694029850744
my MSE: 113.96442250970158
DFF MSE: 109.74670708955219
Batters
my error: 5.331128320479866
DFF error: 5.3979005998286205
my MSE: 47.17109916950728
DFF MSE: 47.07326906598117


Unnamed: 0,Name,Roster Position,AvgPointsPerGame,ppg_projection,FPTS,my_error,dff_error,my_abs_error,dff_abs_error,my_error_sq,dff_error_sq
0,Shohei Ohtani,P,25.46420,24.1,33.35,-7.88580,-9.25,7.88580,9.25,62.185842,85.5625
1,Brandon Woodruff,P,21.72065,19.4,35.80,-14.07935,-16.40,14.07935,16.40,198.228096,268.9600
2,Aaron Nola,P,20.60155,19.5,18.35,2.25155,1.15,2.25155,1.15,5.069477,1.3225
3,Zac Gallen,P,19.70095,16.2,11.80,7.90095,4.40,7.90095,4.40,62.425011,19.3600
4,Chris Bassitt,P,18.46000,19.1,29.90,-11.44000,-10.80,11.44000,10.80,130.873600,116.6400
...,...,...,...,...,...,...,...,...,...,...,...
2597,Brandon Marsh,OF,5.14100,5.7,2.00,3.14100,3.70,3.14100,3.70,9.865881,13.6900
2598,Matt Duffy,3B,4.96000,5.8,8.00,-3.04000,-2.20,3.04000,2.20,9.241600,4.8400
2599,Sergio Alcantara,SS,4.94800,5.3,2.00,2.94800,3.30,2.94800,3.30,8.690704,10.8900
2600,Romy Gonzalez,2B,4.71700,5.9,2.00,2.71700,3.90,2.71700,3.90,7.382089,15.2100


In [15]:
evaluations("200200200r")

All
my error: 5.677690392006151
DFF error: 5.701825518831664
my MSE: 54.16724182571296
DFF MSE: 53.52848866256721
Pitchers
my error: 8.658500746268668
DFF error: 8.348694029850744
my MSE: 113.70182694861933
DFF MSE: 109.74670708955219
Batters
my error: 5.335420822622108
DFF error: 5.3979005998286205
my MSE: 47.33122262565349
DFF MSE: 47.073269065981194
