# Scrape Fantasy Cruncher
This scrapes www.fantasycruncher.com to find double up tournament scores <br>
It then evaluates how my lineups would have performed


In [1]:
import requests
import pandas as pd
import os
import unidecode
import re
import import_ipynb
from Utilities import *
import warnings
import time
warnings.simplefilter(action="ignore")

baseball_path = r"C:\Users\james\Documents\MLB\Data"

In [8]:
# Scrape results, including minimum score needed to cash, from Fantasy Cruncher
def scrape_fc(date):
    url = (
    "https://www.fantasycruncher.com/funcs/tournament-analyzer/get-contests.php"
    )

    data = {
        "sites[]": [
            "draftkings",
            "draftkings_pickem",
            "draftkings_showdown",
            "fanduel",
            "fanduel_single",
            "fanduel_super",
            "fantasydraft",
            "yahoo",
            "superdraft",
        ],
        "leagues[]": "MLB",
        "periods[]": date,
    }

    data = requests.post(url, data=data).json()

    df = pd.json_normalize(data)
    df = df.query('site == "draftkings"')
    # df = df.query('cost == 4')
    # df = df.query('max_entries == 20')
    # df = df[~df['name'].str.contains('Early|Late|Night', case=False)]
    
    
    return df

In [9]:
df = scrape_fc("20230803")
df

Unnamed: 0,id,site,league,slate,site_id,name,period,max_entries,max_entrants,cost,...,mincash_score,startdate,winning_payout,mincash_payout,DateTime,Title,game_cnt,winner_cnt,winner,has_lineups
2,1273639806,draftkings,MLB,90497,147607875,MLB $75K MEGA mini-MAX [150 Entry Max],2023-08-03,150,29726,3.00,...,107.45,1691104500,7500.0,6.0,2023-08-03 19:15:00,,6,1,SeanGeezy,1
3,1273497352,draftkings,MLB,90497,147591495,MLB $10K mini-MAX [150 Entry Max],2023-08-03,150,23781,0.50,...,107.50,1691104500,1000.0,1.0,2023-08-03 19:15:00,,6,1,AltonBurns,1
4,1273506287,draftkings,MLB,90493,147591378,MLB $20K mini-MAX [150 Entry Max] (Early),2023-08-03,150,23781,1.00,...,104.65,1691085900,2000.0,2.0,2023-08-03 14:05:00,(Early),4,2,"MustangMinny,cdodson50",1
5,1273639804,draftkings,MLB,90497,147607873,MLB $200K Relay Throw [$50K to 1st],2023-08-03,150,15686,15.00,...,107.90,1691104500,50000.0,25.0,2023-08-03 19:15:00,,6,1,eppy12588,1
6,1273497368,draftkings,MLB,90497,147591457,MLB $3K Quarter Jukebox [Just $0.25!],2023-08-03,20,14268,0.25,...,105.60,1691104500,150.0,0.5,2023-08-03 19:15:00,,6,1,tbird4955,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2038,1274312115,draftkings,MLB,90501,147665522,MLB Satellite to NFL Best Ball $10M Millionair...,2023-08-03,1,3,3.70,...,88.25,1691113080,10.0,10.0,2023-08-03 21:38:00,(Night),2,1,Jacobrial88,1
2039,1274312176,draftkings,MLB,90501,147664919,MLB $5 3-Player (Winner Takes All) (Night),2023-08-03,1,3,5.00,...,101.50,1691113080,13.5,13.5,2023-08-03 21:38:00,(Night),2,1,PLee11,1
2040,1274322266,draftkings,MLB,90501,147666266,MLB $10 3-Player (Winner Takes All) (Night),2023-08-03,1,3,10.00,...,122.75,1691113080,27.0,27.0,2023-08-03 21:38:00,(Night),2,1,brando3344,1
2041,1274322270,draftkings,MLB,90501,147665901,MLB Satellite to NFL $5 Fantasy Football Milli...,2023-08-03,1,3,1.85,...,122.75,1691113080,5.0,5.0,2023-08-03 21:38:00,(Night),2,1,punchie_ca,1


In [7]:
df.columns

Index(['id', 'site', 'league', 'slate', 'site_id', 'name', 'period',
       'max_entries', 'max_entrants', 'cost', 'prizepool', 'places_paid',
       'total_entrants', 'winning_score', 'mincash_score', 'startdate',
       'winning_payout', 'mincash_payout', 'DateTime', 'Title', 'game_cnt',
       'winner_cnt', 'winner', 'has_lineups'],
      dtype='object')

In [15]:
# This imports actual scores from results sheets
def import_score_df(date):
    result_name = "Results " + date + ".csv"

    # Read in results
    score_df = pd.read_csv(os.path.join(baseball_path, "Results Scraped", result_name))
    try:
        score_df = score_df[['Player', 'Roster Position', '%Drafted', 'FPTS']]
    except:
        score_df = score_df[['Name', 'Roster Position', 'Salary', 'FPTS']]
        score_df.rename(columns={'Name':'Player', 'Salary':'%Drafted'}, inplace=True)
    score_df.dropna(inplace=True)
    score_df.rename(columns={'Player':'Name'}, inplace=True)
    score_df = name_clean(score_df)
    score_df['FPTS'] = np.where(((score_df['FPTS'] == 0) & (score_df['Roster Position'] == "P")), np.nan, score_df['FPTS'])
    
    score_df = score_df[['Name_Adjusted', 'FPTS']]
    
    return score_df

In [16]:
score_df = import_score_df("20230427")
score_df

Unnamed: 0,Name_Adjusted,FPTS
0,Gerrit Cole,26.80
1,Shane McClanahan,17.05
2,Joey Lucchesi,7.80
3,Aaron Judge,0.00
4,Byron Buxton,18.00
...,...,...
126,Brad Miller,0.00
127,Luis Garcia,7.00
128,Derek Hill,0.00
129,Charlie Culberson,0.00


In [5]:
# This imports my lineups, adds actual scores, identifies missing players, and calculates whether a lineup would have won or lost
def import_lineups(date):
    # Import player scores
    score_df = import_score_df(date)
    
    
    # Import lineups
    lineup_name = "Lineup_Sims_" + date + ".csv"
    
    # Read in lineups
    lineup_df = pd.read_csv(os.path.join(baseball_path, "Lineup Sims", lineup_name))
    
    
    # Add actual scores to lineups
    # Position scores
    score_list = []
    position_list = ['P', 'P.1', 'C', '1B', '2B', '3B', 'SS', 'OF', 'OF.1', 'OF.2']
    
    for pos in position_list:
        # Column name for names without IDs
        no_id = pos + "_clean"
        lineup_df[no_id] = lineup_df[pos].str.replace('[(0-9)-.\']', '')
    
        # Column name for points
        pos_pts = pos + "_pts"
        
        # Merge with actual points
        lineup_df = lineup_df.merge(score_df, left_on=no_id, right_on='Name_Adjusted', how='left')
        lineup_df.rename(columns={'FPTS':pos_pts}, inplace=True)
        
        # Add points column name to list
        score_list.append(pos_pts)
        
        # Remove unnecessary columns
        lineup_df = lineup_df.loc[:,~lineup_df.columns.str.startswith('Name_Adjusted')]
        lineup_df = lineup_df.loc[:,~lineup_df.columns.str.endswith('_clean')]
      
    # Create count of missing scores
    lineup_df['Missing'] = lineup_df[score_list].isna().sum(axis=1)
    
    # Create variable with total points scored by that lineup
    lineup_df['Total'] = lineup_df[score_list].sum(axis=1)
  
    # Scrape minimum cash score from FC
    df = scrape_fc(date)
    mincash = df[~df['name'].str.contains("Early")&~df['name'].str.contains("Night")&df['name'].str.contains("Double Up")].query('cost == 5').reset_index()['mincash_score'][0]
    
    # Create column with the minimum points needed to cash
    lineup_df['mincash'] = mincash

    # Calculate winning percentage
    lineup_df['Win'] = np.where(lineup_df['Total'] > lineup_df['mincash'], 1, 0)
    lineup_df['Win'] = np.where(lineup_df['Missing'] == 0, lineup_df['Win'], np.nan) 

    # lineup_df = lineup_df.query('Missing == 0')
    
    return lineup_df

In [None]:
# Success rate for each night
win_list = []

# Loop over Lineup Sims documents
for filename in os.listdir(r"C:\Users\james\Documents\MLB\Data\Lineup Sims"):
    if filename.endswith(".csv"):
        # Extract date
        year = filename[12:16]
        month = filename[16:18]
        day = filename[18:20]
        date = year + "-" + month + "-" + day
        date_short = year + month + day
        
        # Import lineups (with actual points)
        # try:
        lineup_df = import_lineups(date_short)
        # print(lineup_df.head())
        # Calculate win rate
        win_rate = lineup_df['Win'].mean()
        win_list.append(win_rate)

        # Print out results
        print(date)
        print("Missing: " + str(lineup_df['Missing'].sum()))
        print(win_rate)
        print("Average: " + str(lineup_df['Total'].mean()))
        print("Minimum: " + str(lineup_df['mincash'][0]))

        # Seems to be necessary to avoid errors
        time.sleep(0.5)
            
        # except:
        #     print("No results for:" + filename)
            
        

win_list = [item for item in win_list if str(item) != 'nan']
np.mean(win_list)

2022-04-07
Missing: 0
0.0
Average: 66.77000000000001
Minimum: 87.2
2022-04-08
Missing: 0
0.4
Average: 89.64000000000003
Minimum: 92.35
2022-04-09
Missing: 0
0.35
Average: 70.39000000000001
Minimum: 73.75
2022-04-10
Missing: 0
0.4
Average: 85.40999999999995
Minimum: 92.35
2022-04-11
Missing: 0
1.0
Average: 94.45699999999997
Minimum: 74.55
2022-04-12
Missing: 0
0.65
Average: 95.90000000000003
Minimum: 92.15
2022-04-13
Missing: 0
0.4
Average: 110.47099999999998
Minimum: 115.7
2022-04-14
Missing: 0
1.0
Average: 95.89999999999996
Minimum: 85.4
2022-04-15
Missing: 0
0.8
Average: 100.49999999999997
Minimum: 95.5
2022-04-16
Missing: 0
0.7
Average: 140.69999999999996
Minimum: 130.7
2022-04-17
Missing: 0
0.25
Average: 66.805
Minimum: 78.7
2022-04-18
Missing: 1
0.8421052631578947
Average: 92.68000000000002
Minimum: 87.8
2022-04-19
Missing: 0
0.6
Average: 115.19999999999996
Minimum: 109.6
2022-04-20
Missing: 0
0.95
Average: 93.86000000000003
Minimum: 82.35
2022-04-21
Missing: 0
1.0
Average: 95.589

0.3553156864868538

In [32]:
win_list = [item for item in win_list if str(item) != 'nan']
np.mean(win_list)

0.3553156864868538

# To do:
Managed to reduce missings down to just 9/29/2022. Not sure if I have the wrong results or if the slate changed or something. 

200: 0.418907470955472 <br>
100: 0.43321981424148615 <br>
50: 0.46 <br>
20: 0.42368421052631583 <br>
12% randomization: 0.39308077887025256 <br>
50, trained on only those who those with significant PAs, removed imp: 0.46166666666666667 <br>