# DraftKings Prop Analysis

In [422]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import json
from datetime import datetime, timedelta

In [596]:
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

## DraftKings - NBA and NFL 

In [484]:
# Create a dataframe to append results to
bet_offers_df = pd.DataFrame(columns=['League', 'Game', 'StartDate', 'StartTime', 'BetLabel', 'Outcome1_Label', 
                                      'Outcome1_Line', 'Outcome1_Odds', 'Outcome2_Label', 'Outcome2_Line', 'Outcome2_Odds'])

# Create a list of the DraftKings URLs to parse
urls = ["https://sportsbook.draftkings.com/leagues/basketball/88670846", 
        "https://sportsbook.draftkings.com/leagues/football/88670561"]

# Loop over the URLs and parse results
for url in urls:
    print(url)
    # Get the BeautifulSoup data from the DraftKings website
    dk_response = requests.get(url)
    dk_soup = BeautifulSoup(dk_response.text, "html.parser")

    # Narrow the Beautiful Soup extract to just the field of interest "window.__INITIAL_STATE__"

    # Filter out opening and closing <script> tags
    dk_scrape = str(list(list(list(dk_soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
    # Remove leading and trailing whitespace
    dk_scrape = dk_scrape.strip()
    # Split sections
    dk_scrape = dk_scrape.split(";\n")
    # Isolate to json of interest
    dk_scrape = dk_scrape[6].strip()
    # Format as json dictionary
    dk_scrape = dk_scrape.replace("window.__INITIAL_STATE__ = ","")
    dk_scrape = json.loads(dk_scrape)
    
    # Grab the sport ID from the scrape
    sportId = list(dk_scrape['eventGroups'].keys())[0]
    
    # From the full scrape of the page, pull a list of the games to loop over and extract data from
    games = dk_scrape['eventGroups'][sportId]['events'].keys()
    print("Games: ", games)
    
    # Loop over games
    for index, game in enumerate(games):
        print(game)
        # Set the game JSON as variable
        game_details = dk_scrape['eventGroups'][sportId]['events'][game]
        # Get the eventId so I can scrape the actual props
        eventId = game_details['eventId']

        # Web scrape Draft Kings for player props
        props_url = f"https://sportsbook.draftkings.com/event/{eventId}"
        response = requests.get(props_url)
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Clean BeautifulSoup response
        # Filter out opening and closing <script> tags
        scrape = str(list(list(list(soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
        # Remove leading and trailing whitespace
        scrape = scrape.strip()
        # Split sections
        scrape = scrape.split(";\n")
        # Isolate to json of interest
        scrape = scrape[6].strip()
        # Format as json dictionary
        scrape = scrape.replace("window.__INITIAL_STATE__ = ","")
        scrape = json.loads(scrape)
        
        # Parse the scrape results
        eventGroupId = list(scrape['eventGroups'].keys())[0]
        providerEventId = list(scrape['eventGroups'][eventGroupId]['events'].keys())[0]
        providerOfferId = list(scrape['offers'][eventGroupId].keys())[0]
        eventId = scrape['eventGroups'][eventGroupId]['events'][providerEventId]['eventId']
        game_details = scrape['eventGroups'][eventGroupId]['events'][providerEventId]
        game = game_details['name']
        eventGroup = game_details['eventGroupName']
        teamName1 = game_details['teamName1']
        teamName1 = game_details['teamName2']
        startDate = game_details['startDate']
        date, time = startDate.split('T')
        dt = date + ' ' + time[:8]
        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')
        # Format as right timezone (-5 hours)
        dt = dt - timedelta(hours=5)
        date = dt.strftime('%a, %b %d')
        time = dt.strftime('%I:%M %p')

        # Isolate the bet offers from the beautiful soup scrape
        offers = scrape['offers'][eventGroupId]

        # Loop over offers and add to dataframe
        for index, offer in enumerate(offers):

            # Create a list for parsing the offers 
            offers_parsed = [eventGroup, game, date, time, '', '', '', '', '', '', '']

            # Parse the betting offer
            offer_dict = scrape['offers'][eventGroupId][offer]
            try:
                providerOfferId = offer_dict['providerOfferId']
                providerId = offer_dict['providerId']
                providerEventId = offer_dict['providerEventId']
                bet_label = offer_dict['label']
                isOpen = offer_dict['isOpen']
                outcomes = offer_dict['outcomes']
            except:
                continue

            # Assign to list
            offers_parsed[4] = bet_label    

            # Extract outcomes
            if len(outcomes) == 1:

                # Parse the outcome
                outcome_label = outcomes[0]['label']
                outcome_odds = outcomes[0]['oddsAmerican']
                offers_parsed[5] = outcome_label
                offers_parsed[7] = outcome_odds

                # Append the list to the dataframe 
                bet_offers_df.loc[len(bet_offers_df)] = offers_parsed

            elif len(outcomes) == 2:

                # Parse the outcomes
                for i, x in enumerate(outcomes):
                    if i == 0:
                        outcome_label = outcomes[i]['label']
                        try:
                            outcome_line = outcomes[i]['line']
                        except:
                            outcome_line = ''
                        outcome_odds = outcomes[i]['oddsAmerican']
                        offers_parsed[5] = outcome_label
                        offers_parsed[6] = outcome_line
                        offers_parsed[7] = outcome_odds
                    else:
                        outcome_label = outcomes[i]['label']
                        try:
                            outcome_line = outcomes[i]['line']
                        except:
                            continue
                        outcome_odds = outcomes[i]['oddsAmerican']
                        offers_parsed[8] = outcome_label
                        offers_parsed[9] = outcome_line
                        offers_parsed[10] = outcome_odds

                # Append the list to the dataframe 
                bet_offers_df.loc[len(bet_offers_df)] = offers_parsed

            else:

                continue

bet_offers_df

https://sportsbook.draftkings.com/leagues/basketball/88670846
Games:  dict_keys(['25913984', '25913985', '25913987', '25913998', '25914010'])
25913984
25913985
25913987
25913998
25914010
https://sportsbook.draftkings.com/leagues/football/88670561
Games:  dict_keys(['22932871', '22932873', '22932877', '22932886', '22932889', '22932890', '22932891', '22932894', '22932895', '22932896', '22932897', '22932898', '22932907', '22932908', '22932909', '22932910'])
22932871
22932873
22932877
22932886
22932889
22932890
22932891
22932894
22932895
22932896
22932897
22932898
22932907
22932908
22932909
22932910


Unnamed: 0,League,Game,StartDate,StartTime,BetLabel,Outcome1_Label,Outcome1_Line,Outcome1_Odds,Outcome2_Label,Outcome2_Line,Outcome2_Odds
0,NBA,MEM Grizzlies @ CLE Cavaliers,"Tue, Jan 04",07:11 PM,Total Points Odd/Even - 1st Half,Odd,,-115,,,
1,NBA,MEM Grizzlies @ CLE Cavaliers,"Tue, Jan 04",07:11 PM,Total Points Odd/Even - 2nd Quarter,Odd,,-105,,,
2,NBA,MEM Grizzlies @ CLE Cavaliers,"Tue, Jan 04",07:11 PM,Total Points Odd/Even,Odd,,-120,,,
3,NBA,MEM Grizzlies @ CLE Cavaliers,"Tue, Jan 04",07:11 PM,Desmond Bane Assists + Rebounds,Over,7.5,+105,Under,7.5,-140
4,NBA,MEM Grizzlies @ CLE Cavaliers,"Tue, Jan 04",07:11 PM,Desmond Bane Assists,Over,1.5,-185,Under,1.5,+140
...,...,...,...,...,...,...,...,...,...,...,...
1188,NFL,SEA Seahawks @ ARI Cardinals,"Sun, Jan 09",04:25 PM,Last to Score - 1st Quarter,SEA Seahawks,,+135,,,
1189,NFL,SEA Seahawks @ ARI Cardinals,"Sun, Jan 09",04:25 PM,1st Touchdown Yards,Over,10.5,-110,Under,10.5,-115
1190,NFL,SEA Seahawks @ ARI Cardinals,"Sun, Jan 09",04:25 PM,1st Score Yards,Over,21.5,-115,Under,21.5,-110
1191,NFL,SEA Seahawks @ ARI Cardinals,"Sun, Jan 09",04:25 PM,Largest Lead of the Game,Over,14.5,-115,Under,14.5,-115


In [485]:
# Save the odds file as a csv for analysis
bet_offers_df.to_csv('Data/BetOdds.csv', index=False)

## Fantasy Pros NFL Projections

In [608]:
# Create lookup for the stat columns we are collecting
stats_lookup = {'QB': {1: 'PASS_ATT', 2: 'CMP', 3: 'PASS_YDS', 4: 'PASS_TDS',
             5: 'INTS', 6: 'RUSH_ATT', 7: 'RUSH_YDS', 8: 'RUSH_TDS', 10: 'FPTS'},
    'RB': {1: 'RUSH_ATT', 2: 'RUSH_YDS', 3: 'RUSH_TDS', 4: 'REC', 5: 'REC_YDS',
             6: 'REC_TDS', 8: 'FPTS'}, 
    'WR': {1: 'REC', 2: 'REC_YDS', 3: 'REC_TDS', 4: 'RUSH_ATT', 5: 'RUSH_YDS',
             6: 'RUSH_TDS', 8: 'FPTS'}, 
    'TE': {1: 'REC', 2: 'REC_YDS', 3: 'REC_TDS', 5: 'FPTS'},
    'DST': {1: 'SACK', 2: 'INT', 3: 'FR', 4: 'FF', 5: 'DEF_TD', 6: 'SAFETY',
             7: 'PA', 8: 'YDS_AGN', 9: 'FPTS'},
    'K': {1: 'FG', 2: 'FGA', 3: 'XPT', 4: 'FPTS'}}

# Need to make an additional dictionary for the index of FPTS of each position
position_fpts_index = {'QB': 10, 'RB': 8, 'WR': 8,
                                'TE': 5, 'DST': 9, 'K': 4}

# Create a lookup between a full team name and the abbreviation
team_abrv_lookup = {'New England Patriots': 'NE',
                    'Washington Football Team': 'WAS',
                    'Dallas Cowboys': 'DAL',
                    'Baltimore Ravens': 'BAL',
                    'Buffalo Bills': 'BUF',
                    'Chicago Bears': 'CHI',
                    'Indianapolis Colts': 'IND',
                    'Las Vegas Raiders': 'OAK',
                    'Kansas City Chiefs': 'KC',
                    'Los Angeles Chargers': 'LAC', 
                    'Carolina Panthers': 'CAR',
                    'Denver Broncos': 'DEN',
                    'Atlanta Falcons': 'ATL',
                    'Tennessee Titans': 'TEN',
                    'Minnesota Vikings': 'MIN',
                    'Los Angeles Rams': 'LAR',
                    'Tampa Bay Buccaneers': 'TB',
                    'Green Bay Packers': 'GB',
                    'Seattle Seahawks': 'SEA',
                    'New Orleans Saints': 'NO',
                    'Arizona Cardinals': 'ARI',
                    'Miami Dolphins': 'MIA',
                    'San Francisco 49ers': 'SF',
                    'Cleveland Browns': 'CLE',
                    'Pittsburgh Steelers': 'PIT',
                    'Philadelphia Eagles': 'PHI',
                    'Jacksonville Jaguars': 'JAX',
                    'Detroit Lions': 'DET',
                    'New York Jets': 'NYJ',
                    'Cincinnati Bengals': 'CIN',
                    'Houston Texans': 'HOU',
                    'New York Giants': 'NYG'}

# To add to the dataframe we'll need a consistent format of the lists
projection_format = {'NFL_WEEK': 0, 'PLAYER': 1, 'POSITION': 2, 'TEAM': 3, 'PASS_ATT': 4, 
                     'CMP': 5, 'PASS_YDS': 6, 'PASS_TDS': 7, 'INTS': 8, 'RUSH_ATT': 9,
                     'RUSH_YDS': 10, 'RUSH_TDS': 11, 'REC': 12, 'REC_YDS': 13, 'REC_TDS': 14,
                     'SACK': 15, 'INT': 16, 'FR': 17, 'FF': 18, 'DEF_TD': 19, 'SAFETY': 20,
                     'PA': 21, 'YDS_AGN': 22, 'FG': 23, 'FGA': 24, 'XPT': 25, 'FPTS': 26}

# Create an empty dataframe to append data to (excluding Def and K)
fp_projections = pd.DataFrame(columns = ['LEAGUE', 'PLAYER', 'POSITION', 'TEAM', 'PASS_ATT', 
                                         'CMP', 'PASS_YDS', 'PASS_TDS', 'INTS', 
                                         'RUSH_ATT', 'RUSH_YDS', 'RUSH_TDS', 'REC', 
                                         'REC_YDS', 'REC_TDS', 'SACK', 'INT',
                                         'FR', 'FF', 'DEF_TD', 'SAFETY', 'PA',
                                         'YDS_AGN', 'FG', 'FGA', 'XPT', 'FPTS'])

# A dictionary of the URLs to hit for each position
fantasy_pros_projection_urls = {'QB': 'https://www.fantasypros.com/nfl/projections/qb.php',
                                'RB': 'https://www.fantasypros.com/nfl/projections/rb.php?scoring=HALF',
                                'WR': 'https://www.fantasypros.com/nfl/projections/wr.php?scoring=HALF',
                                'TE': 'https://www.fantasypros.com/nfl/projections/te.php?scoring=HALF',
                                'DST': 'https://www.fantasypros.com/nfl/projections/dst.php',
                                'K': 'https://www.fantasypros.com/nfl/projections/k.php'}

In [609]:
# Web scrape Fantasy Pros for relevant information
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

In [610]:
# Iterate over dictionary of URLs to add data to dictionary
for position, url in fantasy_pros_projection_urls.items():  
    # Get the stat keys lookup for position
    lookup = stats_lookup[position]
    # Lookup the fpts index 
    fpts_index = position_fpts_index[position]
    # Web scrape Fantasy Pros for relevant information
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    projections_list = list(list(list(list(list(list(list(list(list(list(list(soup.children)[2])[3])[15])[13])[1])[1])[1])[11])[1])[3])
    nfl_week = int(find_between(str(list(list(list(soup.children)[2])[1])[1]), 'Week ', ' ' + position))
    for num, item in enumerate(projections_list):
        if item != '\n':
            # Create an empty list to update the data for
            player_projections = [0] * 27
            # Add the league name to the list
            player_projections[0] = 'NFL'
            # Split the projection string into a list to parse
            proj_list = list(item)
            # filter out the '\n'
            proj_list = [x for x in proj_list if x != '\n']
            # Extract the player name
            player_name = str(proj_list[0]).split("fp-player-name=")[1]
            start, stop = [m.start() for m in re.finditer('"', player_name)][0:2]
            player_name = player_name[start+1:stop]
            player_projections[1] = player_name
            # Add the position to the list
            player_projections[2] = position
            # Extract the team of the player
            team = list(proj_list[0])[1].strip() if position != 'DST' else team_abrv_lookup[find_between(str(list(proj_list[0])[0]), '>', '<')]
            player_projections[3] = team
            for i, stat in enumerate(proj_list):
                if i in lookup.keys():
                    if i != fpts_index:
                        # For default format of stat results
                        stat_name = lookup[i] # get the stat name from lookup
                        list_index = projection_format[stat_name] # get the list index for this stat
                        result = re.search('<td class="center">(.*)</td>', str(stat))
                        stat_result = float(result.group(1))
                        player_projections[list_index] = stat_result
                    else:
                        # There is a different output for fantasy points
                        stat_name = lookup[i] # get the stat name from lookup
                        list_index = projection_format[stat_name] # get the list index for this stat
                        result = re.search('<td class="center" data-sort-value="(.*)">(.*)</td>', str(stat))
                        stat_result = float(result.group(2))
                        player_projections[list_index] = stat_result
                else:
                    continue
            # Append the list to the dataframe         
            fp_projections.loc[len(fp_projections)] = player_projections

# Remove rows where fpts is 0, since there are not needed
fp_projections = fp_projections[fp_projections['FPTS'] != 0.0]

fp_projections

Unnamed: 0,LEAGUE,PLAYER,POSITION,TEAM,PASS_ATT,CMP,PASS_YDS,PASS_TDS,INTS,RUSH_ATT,...,FR,FF,DEF_TD,SAFETY,PA,YDS_AGN,FG,FGA,XPT,FPTS
0,NFL,Josh Allen,QB,BUF,34.6,22.3,258.7,2.1,0.7,8.7,...,0,0,0,0,0,0,0,0,0,25.6
1,NFL,Kyler Murray,QB,ARI,35.4,23.7,264.6,1.6,0.8,6.8,...,0,0,0,0,0,0,0,0,0,21.0
2,NFL,Justin Herbert,QB,LAC,35.6,23.6,272.2,2.0,0.7,4.0,...,0,0,0,0,0,0,0,0,0,21.0
3,NFL,Patrick Mahomes II,QB,KC,37.3,24.2,275.3,1.9,0.8,4.0,...,0,0,0,0,0,0,0,0,0,20.4
4,NFL,Dak Prescott,QB,DAL,38.0,25.6,274.5,1.9,0.7,3.6,...,0,0,0,0,0,0,0,0,0,19.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
470,NFL,Chase McLaughlin,K,CLE,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1.4,1.5,1.8,5.9
471,NFL,Brian Johnson,K,WAS,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1.5,0.0,1.4,5.8
472,NFL,Lirim Hajrullahu,K,CAR,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1.4,1.8,1.5,5.6
473,NFL,Matthew Wright,K,JAC,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1.4,1.6,1.3,5.3


## SportsLine Projections

In [645]:
# Web scrape Fantasy Pros for relevant information
url = 'https://www.sportsline.com/nfl/expert-projections/simulation/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

In [672]:
# Isolate soup to the table of interest
output = str(list(list(list(soup.children)[1])[1])[1])
# Format as json dictionary
output = output.replace("""<script id="__NEXT_DATA__" type="application/json">""","").replace("</script>","")
output = json.loads(output)
projections = output['props']['initialState']['fantasyState']['projectionsPageState']['data']['projections']

In [685]:
projectionFields = projection['projectionFields']
projectionFields

[{'field': 'PLAYER', 'value': 'Josh Allen', 'displayValue': 'Josh Allen'},
 {'field': 'POS', 'value': 'QB', 'displayValue': 'QB'},
 {'field': 'TEAM', 'value': 'BUF', 'displayValue': 'BUF'},
 {'field': 'GAME', 'value': 'NYJ@BUF', 'displayValue': 'NYJ@BUF'},
 {'field': 'FP', 'value': 29.28, 'displayValue': '29.28'},
 {'field': 'FD EXP', 'value': 25.85, 'displayValue': '25.85'},
 {'field': 'DK EXP', 'value': 27.56, 'displayValue': '27.56'},
 {'field': 'PPR', 'value': 29.28, 'displayValue': '29.28'},
 {'field': 'CONS%', 'value': 0.58, 'displayValue': '0.58'},
 {'field': 'FREB%', 'value': 0.6, 'displayValue': '0.6'},
 {'field': 'PASSYD', 'value': 277, 'displayValue': '277'},
 {'field': 'RUSHYD', 'value': 40, 'displayValue': '40'},
 {'field': 'RECYD', 'displayValue': '-'}]

In [709]:
projectionFields = projection['projectionFields']
projectionFields

[{'field': 'PLAYER', 'value': 'C.J. Ham', 'displayValue': 'C.J. Ham'},
 {'field': 'POS', 'value': 'FB', 'displayValue': 'FB'},
 {'field': 'TEAM', 'value': 'MIN', 'displayValue': 'MIN'},
 {'field': 'GAME', 'value': 'CHI@MIN', 'displayValue': 'CHI@MIN'},
 {'field': 'FP', 'value': 1.33, 'displayValue': '1.33'},
 {'field': 'FD EXP', 'value': 1.88, 'displayValue': '1.88'},
 {'field': 'DK EXP', 'value': 2.46, 'displayValue': '2.46'},
 {'field': 'PPR', 'value': 2.43, 'displayValue': '2.43'},
 {'field': 'CONS%', 'value': 0.38, 'displayValue': '0.38'},
 {'field': 'FREB%', 'value': 0.25, 'displayValue': '0.25'},
 {'field': 'PASSYD', 'displayValue': '-'},
 {'field': 'RUSHYD', 'value': 2, 'displayValue': '2'},
 {'field': 'RECYD', 'value': 9, 'displayValue': '9'}]

In [718]:
# Create a dataframe to append results to
SportsLine_Projections = pd.DataFrame(columns=['PLAYER', 'POS', 'TEAM', 'GAME', 'FP', 'PASSYD', 'RUSHYD', 'RECYD'])

# Loop over the projections and parse
for i, projection in enumerate(projections):
    if i < 30:
        # Create an empty list to update the data for
        player_projections = [0] * 8
        # Loop over list of projections to scrape
        projectionFields = projection['projectionFields']
        # Create a counter to add additional fields to correct list position
        counter = 0
        for x in projectionFields:
            field = x['field']
            if field in SportsLine_Projections.columns:
                try:
                    value = x['value']
                    player_projections[counter] = value
                except KeyError:
                    player_projections[counter] = 0
                counter += 1

        # Append the list to the dataframe         
        SportsLine_Projections.loc[len(SportsLine_Projections)] = player_projections
            
SportsLine_Projections

Unnamed: 0,PLAYER,POS,TEAM,GAME,FP,PASSYD,RUSHYD,RECYD
0,Josh Allen,QB,BUF,NYJ@BUF,29.28,277,40,0
1,Justin Herbert,QB,LAC,LAC@LV,26.49,289,17,0
2,Patrick Mahomes,QB,KC,KC@DEN,25.2,292,21,0
3,Kyler Murray,QB,ARI,SEA@ARI,24.78,264,26,0
4,Tom Brady,QB,TB,CAR@TB,24.76,278,4,0
5,Ryan Tannehill,QB,TEN,TEN@HOU,22.91,245,19,0
6,Taysom Hill,QB,NO,NO@ATL,22.29,175,57,0
7,Carson Wentz,QB,IND,IND@JAC,21.26,226,14,0
8,Matthew Stafford,QB,LAR,SF@LAR,20.95,266,4,0
9,Kirk Cousins,QB,MIN,CHI@MIN,20.69,246,7,0


In [635]:
type(output)

bs4.element.Tag