In [7]:
import requests
import pandas as pd
import os
import unidecode
import re
import import_ipynb
from Utilities import *
import warnings
import time
warnings.simplefilter(action="ignore")

baseball_path = r"C:\Users\james\Documents\MLB\Data"

In [8]:
# Scrape results, including minimum score needed to cash, from Fantasy Cruncher
def scrape_fc(date):
    url = (
    "https://www.fantasycruncher.com/funcs/tournament-analyzer/get-contests.php"
    )

    data = {
        "sites[]": [
            "draftkings",
            "draftkings_pickem",
            "draftkings_showdown",
            "fanduel",
            "fanduel_single",
            "fanduel_super",
            "fantasydraft",
            "yahoo",
            "superdraft",
        ],
        "leagues[]": "MLB",
        "periods[]": date,
    }

    data = requests.post(url, data=data).json()

    df = pd.json_normalize(data)
    df = df.query('site == "draftkings"')
    
    return df

In [9]:
# This imports actual scores from results sheets
def import_score_df(date):
    result_name = "Results " + date + ".csv"

    # Read in results
    score_df = pd.read_csv(os.path.join(baseball_path, "Results", result_name))
    score_df = score_df[['Player', 'Roster Position', '%Drafted', 'FPTS']]
    score_df.dropna(inplace=True)
    score_df.rename(columns={'Player':'Name'}, inplace=True)
    score_df = name_clean(score_df)
    score_df['FPTS'] = np.where(((score_df['FPTS'] == 0) & (score_df['Roster Position'] == "P")), np.nan, score_df['FPTS'])
    
    score_df = score_df[['Name_Adjusted', 'FPTS']]
    
    return score_df

score_df = import_score_df("20220929")
score_df.query('Name_Adjusted == "HaSeong Kim"')

Unnamed: 0,Name_Adjusted,FPTS
46,HaSeong Kim,3.0


In [10]:
# This imports my lineups, adds actual scores, identifies missing players, and calculates whether a lineup would have won or lost
def import_lineups(date):
    # Import player scores
    score_df = import_score_df(date)
    
    
    # Import lineups
    lineup_name = "Lineup_Sims_" + date + ".csv"
    
    # Read in lineups
    lineup_df = pd.read_csv(os.path.join(baseball_path, "Lineup Sims", lineup_name))
    
    
    # Add actual scores to lineups
    # Position scores
    score_list = []
    position_list = ['P', 'P.1', 'C', '1B', '2B', '3B', 'SS', 'OF', 'OF.1', 'OF.2']
    
    for pos in position_list:
        # Column name for names without IDs
        no_id = pos + "_clean"
        lineup_df[no_id] = lineup_df[pos].str.replace('[(0-9)-.\']', '')
    
        # Column name for points
        pos_pts = pos + "_pts"
        
        # Merge with actual points
        lineup_df = lineup_df.merge(score_df, left_on=no_id, right_on='Name_Adjusted', how='left')
        lineup_df.rename(columns={'FPTS':pos_pts}, inplace=True)
        
        # Add points column name to list
        score_list.append(pos_pts)
        
        # Remove unnecessary columns
        lineup_df = lineup_df.loc[:,~lineup_df.columns.str.startswith('Name_Adjusted')]
        lineup_df = lineup_df.loc[:,~lineup_df.columns.str.endswith('_clean')]
      
    # Create count of missing scores
    lineup_df['Missing'] = lineup_df[score_list].isna().sum(axis=1)
    
    # Create variable with total points scored by that lineup
    lineup_df['Total'] = lineup_df[score_list].sum(axis=1)
  
    # Scrape minimum cash score from FC
    df = scrape_fc(date)
    mincash = df[~df['name'].str.contains("Early")&~df['name'].str.contains("Night")&df['name'].str.contains("Double Up")].query('cost == 5').reset_index()['mincash_score'][0]
    
    # Create column with the minimum points needed to cash
    lineup_df['mincash'] = mincash

    # Calculate winning percentage
    lineup_df['Win'] = np.where(lineup_df['Total'] > lineup_df['mincash'], 1, 0)
    lineup_df['Win'] = np.where(lineup_df['Missing'] == 0, lineup_df['Win'], np.nan) 

    # lineup_df = lineup_df.query('Missing == 0')
    
    return lineup_df

In [11]:
# Success rate for each night
win_list = []

# Loop over Lineup Sims documents
for filename in os.listdir(r"C:\Users\james\Documents\MLB\Data\Lineup Sims"):
    if filename.endswith(".csv"):
        # Extract date
        year = filename[12:16]
        month = filename[16:18]
        day = filename[18:20]
        date = year + "-" + month + "-" + day
        date_short = year + month + day
        
        # Import lineups (with actual points)
        lineup_df = import_lineups(date_short)
        
        # Calculate win rate
        win_rate = lineup_df['Win'].mean()
        win_list.append(win_rate)
        
        # Print out results
        print(date)
        print("Missing: " + str(lineup_df['Missing'].sum()))
        print(win_rate)
        
        # Seems to be necessary to avoid errors
        time.sleep(0.5)

win_list = [item for item in win_list if str(item) != 'nan']
np.mean(win_list)

2022-09-14
Missing: 0
0.84
2022-09-15
Missing: 0
0.02
2022-09-16
Missing: 0
0.18
2022-09-17
Missing: 0
0.44
2022-09-18
Missing: 0
0.4
2022-09-19
Missing: 0
0.88
2022-09-20
Missing: 0
0.6
2022-09-21
Missing: 0
0.5
2022-09-22
Missing: 0
0.78
2022-09-23
Missing: 0
0.76
2022-09-24
Missing: 0
0.0
2022-09-25
Missing: 0
0.46
2022-09-26
Missing: 0
0.0
2022-09-27
Missing: 0
0.26
2022-09-28
Missing: 0
0.44
2022-09-29
Missing: 26
0.7916666666666666
2022-09-30
Missing: 0
0.02
2022-10-01
Missing: 0
0.92
2022-10-02
Missing: 0
0.48


0.46166666666666667

In [12]:
win_list = [item for item in win_list if str(item) != 'nan']
win_list

[0.84,
 0.02,
 0.18,
 0.44,
 0.4,
 0.88,
 0.6,
 0.5,
 0.78,
 0.76,
 0.0,
 0.46,
 0.0,
 0.26,
 0.44,
 0.7916666666666666,
 0.02,
 0.92,
 0.48]

# To do:
Managed to reduce missings down to just 9/29/2022. Not sure if I have the wrong results or if the slate changed or something. 

200: 0.418907470955472 <br>
100: 0.43321981424148615 <br>
50: 0.46 <br>
20: 0.42368421052631583 <br>
12% randomizationL 0.39308077887025256 <br>