# DraftKings Prop Analysis

In [48]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import json
from datetime import datetime, timedelta
import psycopg2

## Helper Functions 

In [27]:
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

In [28]:
def american2DecimalOdds(americanBetOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Returns the bet odds in decimal format. 
    """
    try:
        if (americanBetOdds[0] == '+'):
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (americanBetOdds/100) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        elif americanBetOdds[0] == '-':
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (100/americanBetOdds) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        else:
            print("Bet odds must begin with a '+' or '-'")
    except:
        return(None)

In [29]:
def impliedOdds(betOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Takes in the betOdds and returns the implied probability of the bet
    """
    try:
        # First need to convert the American odds to decimal odds
        decimalOdds = american2DecimalOdds(betOdds)

        # Use the decimal odds to return the implied probability
        probability = 1/decimalOdds * 100

        # Return the probability rounded to the nearest whole number
        return(round(probability))
    except:
        return(None)

## DraftKings - NBA and NFL 

In [40]:
# Create a dataframe to append results to
bet_offers_df = pd.DataFrame(columns=['League', 'Game', 'StartDate', 'StartTime', 'Player', 'BetLabel', 
                                      'SportsLine_Projection', 'DK_Line', 'Outcome1_Label', 'Outcome1_Odds', 
                                      'Outcome2_Label', 'Outcome2_Odds'])

# Create a list of the DraftKings URLs to parse
urls = ["https://sportsbook.draftkings.com/leagues/basketball/88670846", 
        "https://sportsbook.draftkings.com/leagues/football/88670561"]

# Loop over the URLs and parse results
for url in urls:
    
    # Get the BeautifulSoup data from the DraftKings website
    dk_response = requests.get(url)
    dk_soup = BeautifulSoup(dk_response.text, "html.parser")

    # Narrow the Beautiful Soup extract to just the field of interest "window.__INITIAL_STATE__"

    # Filter out opening and closing <script> tags
    dk_scrape = str(list(list(list(dk_soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
    # Remove leading and trailing whitespace
    dk_scrape = dk_scrape.strip()
    # Split sections
    dk_scrape = dk_scrape.split(";\n")
    # Isolate to json of interest
    dk_scrape = dk_scrape[6].strip()
    # Format as json dictionary
    dk_scrape = dk_scrape.replace("window.__INITIAL_STATE__ = ","")
    dk_scrape = json.loads(dk_scrape)
    
    # Grab the sport ID from the scrape
    sportId = list(dk_scrape['eventGroups'].keys())[0]
    
    # From the full scrape of the page, pull a list of the games to loop over and extract data from
    games = dk_scrape['eventGroups'][sportId]['events'].keys()
    
    # Loop over games
    for index, game in enumerate(games):

        # Set the game JSON as variable
        game_details = dk_scrape['eventGroups'][sportId]['events'][game]
        # Get the eventId so I can scrape the actual props
        eventId = game_details['eventId']

        # Web scrape Draft Kings for player props
        props_url = f"https://sportsbook.draftkings.com/event/{eventId}"
        response = requests.get(props_url)
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Clean BeautifulSoup response
        # Filter out opening and closing <script> tags
        scrape = str(list(list(list(soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
        # Remove leading and trailing whitespace
        scrape = scrape.strip()
        # Split sections
        scrape = scrape.split(";\n")
        # Isolate to json of interest
        scrape = scrape[6].strip()
        # Format as json dictionary
        scrape = scrape.replace("window.__INITIAL_STATE__ = ","")
        scrape = json.loads(scrape)
        
        # Parse the scrape results
        eventGroupId = list(scrape['eventGroups'].keys())[0]
        providerEventId = list(scrape['eventGroups'][eventGroupId]['events'].keys())[0]
        providerOfferId = list(scrape['offers'][eventGroupId].keys())[0]
        eventId = scrape['eventGroups'][eventGroupId]['events'][providerEventId]['eventId']
        game_details = scrape['eventGroups'][eventGroupId]['events'][providerEventId]
        game = game_details['name']
        eventGroup = game_details['eventGroupName']
        teamName1 = game_details['teamName1']
        teamName1 = game_details['teamName2']
        startDate = game_details['startDate']
        date, time = startDate.split('T')
        dt = date + ' ' + time[:8]
        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')
        # Format as right timezone (-5 hours)
        dt = dt - timedelta(hours=5)
        date = dt.strftime('%a, %b %d')
        time = dt.strftime('%I:%M %p')

        # Isolate the bet offers from the beautiful soup scrape
        offers = scrape['offers'][eventGroupId]

        # Loop over offers and add to dataframe
        for index, offer in enumerate(offers):

            # Create a list for parsing the offers 
            offers_parsed = [eventGroup, game, date, time, '', '', '', '', '', '', '', '']

            # Parse the betting offer
            offer_dict = scrape['offers'][eventGroupId][offer]
            try:
                providerOfferId = offer_dict['providerOfferId']
                providerId = offer_dict['providerId']
                providerEventId = offer_dict['providerEventId']
                bet_label = offer_dict['label']
                isOpen = offer_dict['isOpen']
                outcomes = offer_dict['outcomes']
            except:
                continue

            # Assign to list
            offers_parsed[5] = bet_label    

            # Extract outcomes
            if len(outcomes) == 1:

                # Parse the outcome
                outcome_label = outcomes[0]['label']
                outcome_odds = outcomes[0]['oddsAmerican']
                offers_parsed[8] = outcome_label
                offers_parsed[9] = outcome_odds

                # Append the list to the dataframe 
                bet_offers_df.loc[len(bet_offers_df)] = offers_parsed

            elif len(outcomes) == 2:

                # Parse the outcomes
                for i, x in enumerate(outcomes):
                    if i == 0:
                        outcome_label = outcomes[i]['label']
                        try:
                            outcome_line = outcomes[i]['line']
                        except:
                            outcome_line = ''
                        outcome_odds = outcomes[i]['oddsAmerican']
                        offers_parsed[7] = outcome_line
                        offers_parsed[8] = outcome_label
                        offers_parsed[9] = outcome_odds
                    else:
                        outcome_label = outcomes[i]['label']
                        try:
                            outcome_line = outcomes[i]['line']
                        except:
                            continue
                        outcome_odds = outcomes[i]['oddsAmerican']
                        offers_parsed[10] = outcome_label
                        offers_parsed[11] = outcome_odds

                # Append the list to the dataframe 
                bet_offers_df.loc[len(bet_offers_df)] = offers_parsed

            else:

                continue

# Preview output
bet_offers_df.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,Outcome1_Odds,Outcome2_Label,Outcome2_Odds
0,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Total Points Odd/Even - 2nd Half,,,Odd,-105,,
1,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Total Points Odd/Even - 3rd Quarter,,,Odd,-115,,
2,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Total Points Odd/Even,,,Odd,-115,,
3,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Marcus Smart Assists,,4.5,Over,-185,Under,135.0
4,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Marcus Smart Assists + Rebounds,,7.5,Over,-130,Under,100.0


### Fantasy Pros NBA Projections 

They lock their NBA projections behind a paywall so I can only access the top 10 players. Not worth scraping really. 

In [31]:
# Web scrape Fantasy Pros for relevant information
url = 'https://www.fantasypros.com/nba/projections/daily-overall.php'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

projections_list = list(list(list(list(list(list(list(list(list(list(list(soup.children)[2])[3])[15])[13])[1])[1])[1])[11])[1])[3])
projections_list

['\n',
 <tr class="mpb-player-3316"><td class="player-label"><a class="player-name" href="/nba/players/jayson-tatum.php">Jayson Tatum</a> <small>(BOS - SF,PF)</small> <a aria-hidden="true" class="fp-player-link fp-id-3316" fp-player-name="Jayson Tatum" href="#" tabindex="-1"></a></td>
 <td class="tooltip-top" data-tooltip="16th ranked defense vs. SF">at NYK</td><td class="center">26.9</td>
 <td class="center">8.2</td>
 <td class="center">4.4</td>
 <td class="center">0.7</td>
 <td class="center">1.1</td>
 <td class="center">.440</td>
 <td class="center">.845</td>
 <td class="center">3.2</td>
 <td class="center">0</td>
 <td class="center">38.5</td>
 <td class="center">2.6</td>
 </tr>,
 '\n',
 <tr class="mpb-player-3170"><td class="player-label"><a class="player-name" href="/nba/players/jaylen-brown.php">Jaylen Brown</a> <small>(BOS - SG,SF)</small> <a aria-hidden="true" class="fp-player-link fp-id-3170" fp-player-name="Jaylen Brown" href="#" tabindex="-1"></a></td>
 <td class="tooltip-to

## Fantasy Pros NFL Projections

In [32]:
# Create lookup for the stat columns we are collecting
stats_lookup = {'QB': {1: 'PASS_ATT', 2: 'CMP', 3: 'PASS_YDS', 4: 'PASS_TDS',
             5: 'INTS', 6: 'RUSH_ATT', 7: 'RUSH_YDS', 8: 'RUSH_TDS', 10: 'FPTS'},
    'RB': {1: 'RUSH_ATT', 2: 'RUSH_YDS', 3: 'RUSH_TDS', 4: 'REC', 5: 'REC_YDS',
             6: 'REC_TDS', 8: 'FPTS'}, 
    'WR': {1: 'REC', 2: 'REC_YDS', 3: 'REC_TDS', 4: 'RUSH_ATT', 5: 'RUSH_YDS',
             6: 'RUSH_TDS', 8: 'FPTS'}, 
    'TE': {1: 'REC', 2: 'REC_YDS', 3: 'REC_TDS', 5: 'FPTS'},
    'DST': {1: 'SACK', 2: 'INT', 3: 'FR', 4: 'FF', 5: 'DEF_TD', 6: 'SAFETY',
             7: 'PA', 8: 'YDS_AGN', 9: 'FPTS'},
    'K': {1: 'FG', 2: 'FGA', 3: 'XPT', 4: 'FPTS'}}

# Need to make an additional dictionary for the index of FPTS of each position
position_fpts_index = {'QB': 10, 'RB': 8, 'WR': 8,
                                'TE': 5, 'DST': 9, 'K': 4}

# Create a lookup between a full team name and the abbreviation
team_abrv_lookup = {'New England Patriots': 'NE',
                    'Washington Football Team': 'WAS',
                    'Dallas Cowboys': 'DAL',
                    'Baltimore Ravens': 'BAL',
                    'Buffalo Bills': 'BUF',
                    'Chicago Bears': 'CHI',
                    'Indianapolis Colts': 'IND',
                    'Las Vegas Raiders': 'OAK',
                    'Kansas City Chiefs': 'KC',
                    'Los Angeles Chargers': 'LAC', 
                    'Carolina Panthers': 'CAR',
                    'Denver Broncos': 'DEN',
                    'Atlanta Falcons': 'ATL',
                    'Tennessee Titans': 'TEN',
                    'Minnesota Vikings': 'MIN',
                    'Los Angeles Rams': 'LAR',
                    'Tampa Bay Buccaneers': 'TB',
                    'Green Bay Packers': 'GB',
                    'Seattle Seahawks': 'SEA',
                    'New Orleans Saints': 'NO',
                    'Arizona Cardinals': 'ARI',
                    'Miami Dolphins': 'MIA',
                    'San Francisco 49ers': 'SF',
                    'Cleveland Browns': 'CLE',
                    'Pittsburgh Steelers': 'PIT',
                    'Philadelphia Eagles': 'PHI',
                    'Jacksonville Jaguars': 'JAX',
                    'Detroit Lions': 'DET',
                    'New York Jets': 'NYJ',
                    'Cincinnati Bengals': 'CIN',
                    'Houston Texans': 'HOU',
                    'New York Giants': 'NYG'}

# To add to the dataframe we'll need a consistent format of the lists
projection_format = {'NFL_WEEK': 0, 'PLAYER': 1, 'POSITION': 2, 'TEAM': 3, 'PASS_ATT': 4, 
                     'CMP': 5, 'PASS_YDS': 6, 'PASS_TDS': 7, 'INTS': 8, 'RUSH_ATT': 9,
                     'RUSH_YDS': 10, 'RUSH_TDS': 11, 'REC': 12, 'REC_YDS': 13, 'REC_TDS': 14,
                     'SACK': 15, 'INT': 16, 'FR': 17, 'FF': 18, 'DEF_TD': 19, 'SAFETY': 20,
                     'PA': 21, 'YDS_AGN': 22, 'FG': 23, 'FGA': 24, 'XPT': 25, 'FPTS': 26}

# Create an empty dataframe to append data to (excluding Def and K)
fp_projections = pd.DataFrame(columns = ['LEAGUE', 'PLAYER', 'POSITION', 'TEAM', 'PASS_ATT', 
                                         'CMP', 'PASS_YDS', 'PASS_TDS', 'INTS', 
                                         'RUSH_ATT', 'RUSH_YDS', 'RUSH_TDS', 'REC', 
                                         'REC_YDS', 'REC_TDS', 'SACK', 'INT',
                                         'FR', 'FF', 'DEF_TD', 'SAFETY', 'PA',
                                         'YDS_AGN', 'FG', 'FGA', 'XPT', 'FPTS'])

# A dictionary of the URLs to hit for each position
fantasy_pros_projection_urls = {'QB': 'https://www.fantasypros.com/nfl/projections/qb.php',
                                'RB': 'https://www.fantasypros.com/nfl/projections/rb.php?scoring=HALF',
                                'WR': 'https://www.fantasypros.com/nfl/projections/wr.php?scoring=HALF',
                                'TE': 'https://www.fantasypros.com/nfl/projections/te.php?scoring=HALF',
                                'DST': 'https://www.fantasypros.com/nfl/projections/dst.php',
                                'K': 'https://www.fantasypros.com/nfl/projections/k.php'}

# Iterate over dictionary of URLs to add data to dictionary
for position, url in fantasy_pros_projection_urls.items():  
    # Get the stat keys lookup for position
    lookup = stats_lookup[position]
    # Lookup the fpts index 
    fpts_index = position_fpts_index[position]
    # Web scrape Fantasy Pros for relevant information
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    projections_list = list(list(list(list(list(list(list(list(list(list(list(soup.children)[2])[3])[15])[13])[1])[1])[1])[11])[1])[3])
    nfl_week = int(find_between(str(list(list(list(soup.children)[2])[1])[1]), 'Week ', ' ' + position))
    for num, item in enumerate(projections_list):
        if item != '\n':
            # Create an empty list to update the data for
            player_projections = [0] * 27
            # Add the league name to the list
            player_projections[0] = 'NFL'
            # Split the projection string into a list to parse
            proj_list = list(item)
            # filter out the '\n'
            proj_list = [x for x in proj_list if x != '\n']
            # Extract the player name
            player_name = str(proj_list[0]).split("fp-player-name=")[1]
            start, stop = [m.start() for m in re.finditer('"', player_name)][0:2]
            player_name = player_name[start+1:stop]
            player_projections[1] = player_name
            # Add the position to the list
            player_projections[2] = position
            # Extract the team of the player
            team = list(proj_list[0])[1].strip() if position != 'DST' else team_abrv_lookup[find_between(str(list(proj_list[0])[0]), '>', '<')]
            player_projections[3] = team
            for i, stat in enumerate(proj_list):
                if i in lookup.keys():
                    if i != fpts_index:
                        # For default format of stat results
                        stat_name = lookup[i] # get the stat name from lookup
                        list_index = projection_format[stat_name] # get the list index for this stat
                        result = re.search('<td class="center">(.*)</td>', str(stat))
                        stat_result = float(result.group(1))
                        player_projections[list_index] = stat_result
                    else:
                        # There is a different output for fantasy points
                        stat_name = lookup[i] # get the stat name from lookup
                        list_index = projection_format[stat_name] # get the list index for this stat
                        result = re.search('<td class="center" data-sort-value="(.*)">(.*)</td>', str(stat))
                        stat_result = float(result.group(2))
                        player_projections[list_index] = stat_result
                else:
                    continue
            # Append the list to the dataframe         
            fp_projections.loc[len(fp_projections)] = player_projections

# Remove rows where fpts is 0, since there are not needed
fp_projections = fp_projections[fp_projections['FPTS'] != 0.0]

# Preview output
fp_projections.head()

Unnamed: 0,LEAGUE,PLAYER,POSITION,TEAM,PASS_ATT,CMP,PASS_YDS,PASS_TDS,INTS,RUSH_ATT,...,FR,FF,DEF_TD,SAFETY,PA,YDS_AGN,FG,FGA,XPT,FPTS
0,NFL,Josh Allen,QB,BUF,35.4,22.7,267.4,2.1,0.8,8.2,...,0,0,0,0,0,0,0,0,0,24.6
1,NFL,Kyler Murray,QB,ARI,36.4,24.2,279.2,1.8,0.7,7.6,...,0,0,0,0,0,0,0,0,0,22.3
2,NFL,Patrick Mahomes II,QB,KC,36.6,23.5,276.9,1.9,0.6,3.8,...,0,0,0,0,0,0,0,0,0,20.7
3,NFL,Justin Herbert,QB,LAC,35.8,23.8,269.7,2.0,0.8,3.8,...,0,0,0,0,0,0,0,0,0,20.4
4,NFL,Taysom Hill,QB,NO,30.5,19.1,216.3,1.1,0.9,9.4,...,0,0,0,0,0,0,0,0,0,19.9


## SportsLine Projections

### NFL

In [33]:
# Web scrape Fantasy Pros for relevant information
url = 'https://www.sportsline.com/nfl/expert-projections/simulation/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Isolate soup to the table of interest
output = str(list(list(list(soup.children)[1])[1])[1])
# Format as json dictionary
output = output.replace("""<script id="__NEXT_DATA__" type="application/json">""","").replace("</script>","")
output = json.loads(output)
projections = output['props']['initialState']['fantasyState']['projectionsPageState']['data']['projections']

# Create a dataframe to append results to
NFL_SportsLine_Projections = pd.DataFrame(columns=['LEAGUE', 'PLAYER', 'POS', 'TEAM', 'GAME', 'FP', 'PASSYD', 
                                                   'RUSHYD', 'RECYD'])

# Loop over the projections and parse
for i, projection in enumerate(projections):
    # Create an empty list to update the data for
    player_projections = [0] * 9
    # Set LEAGUE columns
    player_projections[0] = 'NFL'
    # Loop over list of projections to scrape
    projectionFields = projection['projectionFields']
    # Create a counter to add additional fields to correct list position
    counter = 1
    for x in projectionFields:
        field = x['field']
        if field in NFL_SportsLine_Projections.columns:
            try:
                value = x['value']
                player_projections[counter] = value
            except KeyError:
                player_projections[counter] = 0
            counter += 1

    # Append the list to the dataframe         
    NFL_SportsLine_Projections.loc[len(NFL_SportsLine_Projections)] = player_projections

# Preview output            
NFL_SportsLine_Projections.head()

Unnamed: 0,LEAGUE,PLAYER,POS,TEAM,GAME,FP,PASSYD,RUSHYD,RECYD
0,NFL,Josh Allen,QB,BUF,NYJ@BUF,28.9,275,39,0
1,NFL,Justin Herbert,QB,LAC,LAC@LV,27.15,289,17,0
2,NFL,Patrick Mahomes,QB,KC,KC@DEN,25.47,299,22,0
3,NFL,Kyler Murray,QB,ARI,SEA@ARI,24.98,268,27,0
4,NFL,Tom Brady,QB,TB,CAR@TB,24.72,278,4,0


### NBA

In [34]:
# Web scrape Fantasy Pros for relevant information
url = 'https://www.sportsline.com/nba/expert-projections/simulation/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Isolate soup to the table of interest
output = str(list(list(list(soup.children)[1])[1])[1])
# Format as json dictionary
output = output.replace("""<script id="__NEXT_DATA__" type="application/json">""","").replace("</script>","")
output = json.loads(output)
projections = output['props']['initialState']['fantasyState']['projectionsPageState']['data']['projections']

# Create a dataframe to append results to
NBA_SportsLine_Projections = pd.DataFrame(columns=['LEAGUE', 'PLAYER', 'POS', 'TEAM', 'GAME', 'FP', 'PTS', 'MIN', 'FG',
                                                   'FGA', 'AST', 'TRB', 'DRB', 'ORB', 'BK', 'ST', 'TO', 'FT', 'FTP', 'FGP'])
                                                   
# Loop over the projections and parse
for i, projection in enumerate(projections):
    # Create an empty list to update the data for
    player_projections = [0] * 20
    # Set LEAGUE columns
    player_projections[0] = 'NBA'
    # Loop over list of projections to scrape
    projectionFields = projection['projectionFields']
    # Create a counter to add additional fields to correct list position
    counter = 1
    for x in projectionFields:
        field = x['field']
        if field in NBA_SportsLine_Projections.columns:
            try:
                value = x['value']
                player_projections[counter] = value
            except KeyError:
                player_projections[counter] = 0
            counter += 1

    # Append the list to the dataframe         
    NBA_SportsLine_Projections.loc[len(NBA_SportsLine_Projections)] = player_projections

# Preview output
NBA_SportsLine_Projections.head()

Unnamed: 0,LEAGUE,PLAYER,POS,TEAM,GAME,FP,PTS,MIN,FG,FGA,AST,TRB,DRB,ORB,BK,ST,TO,FT,FTP,FGP
0,NBA,Ja Morant,PG,MEM,DET@MEM,41.9,23.4,34,8.8,17.6,7.5,5.0,4.1,0.9,0.4,1.4,3.2,4.2,0.76,0.5
1,NBA,Devin Booker,SG,PHO,LAC@PHO,39.3,26.0,34,9.4,20.8,4.7,6.3,5.7,0.6,0.3,0.9,3.5,4.6,0.89,0.45
2,NBA,Chris Paul,PG,PHO,LAC@PHO,38.3,16.3,32,6.2,13.1,8.2,6.0,5.6,0.4,0.2,1.9,2.6,2.4,0.87,0.47
3,NBA,Jayson Tatum,SF,BOS,BOS@NY,38.0,23.9,37,8.5,19.7,3.5,7.8,6.8,1.0,0.8,1.0,2.5,4.5,0.84,0.43
4,NBA,Julius Randle,PF,NY,BOS@NY,37.8,19.9,34,7.1,17.0,4.9,10.1,8.7,1.5,0.6,0.9,3.5,3.8,0.77,0.42


In [35]:
# Save the odds file as a csv for analysis
NBA_SportsLine_Projections.to_csv('Data/NBA_SportsLine_Projections.csv', index=False)

## Connect Bet Lines to Projections 

### SportsLine 

In [44]:
# Get a list of the players in the projections tables
nba_players = NBA_SportsLine_Projections['PLAYER'].to_list()
nfl_players = NFL_SportsLine_Projections['PLAYER'].to_list()
SportsLine_players = nba_players + nfl_players

# For every row in bef_offers_df, check if the label contains the name of a player, if so add it to the Player column
for i, row in bet_offers_df.iterrows():
    betLabel = row["BetLabel"]
    player = [x for x in SportsLine_players if x in betLabel]
    if len(player) > 0:
        # Extract the player name from the list
        player = player[0]
        # Update the Player field in the df
        bet_offers_df.loc[i, "Player"] = player
        # Update the BetLabel to remove the player name to isolate the stat
        bet_offers_df.loc[i, "BetLabel"] = betLabel.replace(player, "").strip()
        
# Link the SportsLine projection fields with the bet label tracked
stat_label_map = {
    'Assists': 'AST',
    'Points': 'PTS',
    'Rebounds': 'TRB'
}

stat_label_keys = list(stat_label_map.keys())

# Create a column for the SportsLine Projection
for i, row in bet_offers_df.iterrows():
    # Collet needed variables
    BetLabel = row["BetLabel"]
    bet_parts = BetLabel.split("+")
    bet_parts = [x.strip() for x in bet_parts]
    bet_parts.sort()
    player = row["Player"]

    if bet_parts is not None:

        if len(bet_parts) == 3:

            # Check that the labels are those in the stat_label_map 
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if list(bet_parts) == stat_label_keys:
        
                # Create a variable to store the NBA_SportsLine_Projection
                NBA_SportsLine_Projection = 0
                # Loop over the bet parts and add the projections
                for part in bet_parts:
                    # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                    projection = NBA_SportsLine_Projections.loc[NBA_SportsLine_Projections.PLAYER == player, 
                                                                stat_label_map[part]].iloc[0]
                    # Add the projection to the total
                    NBA_SportsLine_Projection += projection
                # Set the projection in the bet_offers_df table
                bet_offers_df.loc[i, "SportsLine_Projection"] = NBA_SportsLine_Projection

        elif len(bet_parts) == 2:
            
            # Check that both the labels are in the stat_label_lookup
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if (bet_parts[0] in stat_label_keys) & (bet_parts[1] in stat_label_keys) :
                # Create a variable to store the NBA_SportsLine_Projection
                NBA_SportsLine_Projection = 0
                # Loop over the bet parts and add the projections
                for part in bet_parts:
                    # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                    projection = NBA_SportsLine_Projections.loc[NBA_SportsLine_Projections.PLAYER == player, 
                                                                stat_label_map[part]].iloc[0]
                    # Add the projection to the total
                    NBA_SportsLine_Projection += projection
                # Set the projection in the bet_offers_df table
                bet_offers_df.loc[i, "SportsLine_Projection"] = NBA_SportsLine_Projection

        elif len(bet_parts) == 1:

            if BetLabel in stat_label_map.keys():
                # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                NBA_SportsLine_Projection = NBA_SportsLine_Projections.loc[NBA_SportsLine_Projections.PLAYER == player, 
                                                                           stat_label_map[BetLabel]].iloc[0]
                # Set the projection in the bet_offers_df table
                bet_offers_df.loc[i, "SportsLine_Projection"] = NBA_SportsLine_Projection

        else:
            
            continue
    
    else:
        
        continue

# Format columns
bet_offers_df['SportsLine_Projection'] = pd.to_numeric(bet_offers_df['SportsLine_Projection'], errors='coerce')
bet_offers_df['DK_Line'] = pd.to_numeric(bet_offers_df['DK_Line'], errors='coerce')
bet_offers_df['Outcome1_Label'] = bet_offers_df['Outcome1_Label'].astype(str)
bet_offers_df['Outcome2_Label'] = bet_offers_df['Outcome2_Label'].astype(str)

# Add a column for difference between the projection and line
bet_offers_df["Line2ProjDiff"] = bet_offers_df['SportsLine_Projection'] - bet_offers_df['DK_Line']
# Add columns with the implied probabilities of the odds
bet_offers_df['Outcome1_ImpliedProbability'] = bet_offers_df['Outcome1_Odds'].apply(impliedOdds)
bet_offers_df['Outcome2_ImpliedProbability'] = bet_offers_df['Outcome2_Odds'].apply(impliedOdds)
# Add a column for the bet "juice"
bet_offers_df['Bet_Juice'] = bet_offers_df['Outcome1_ImpliedProbability'] + bet_offers_df['Outcome2_ImpliedProbability'] - 100


# Preview output
bet_offers_df.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,Outcome1_Odds,Outcome2_Label,Outcome2_Odds,Line2ProjDiff,Outcome1_ImpliedProbability,Outcome2_ImpliedProbability,Bet_Juice
0,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Total Points Odd/Even - 2nd Half,,,Odd,-105,,,,51,,
1,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Total Points Odd/Even - 3rd Quarter,,,Odd,-115,,,,53,,
2,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,,Total Points Odd/Even,,,Odd,-115,,,,53,,
3,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,Marcus Smart,Assists,4.4,4.5,Over,-185,Under,135.0,-0.1,65,43.0,8.0
4,NBA,BOS Celtics @ NY Knicks,"Thu, Jan 06",07:32 PM,Marcus Smart,Assists + Rebounds,8.1,7.5,Over,-130,Under,100.0,0.6,56,50.0,6.0


In [82]:
# Save the odds file as a csv for analysis
bet_offers_df.to_csv('Data/BetOdds.csv', index=False)

FileNotFoundError: [Errno 2] No such file or directory: 'Data/BetOdds.csv'

## Pull Player Stats

### NBA

In [88]:
url = "https://api-nba-v1.p.rapidapi.com/games/league/standard/2021"

headers = {
    'x-rapidapi-host': "api-nba-v1.p.rapidapi.com",
    'x-rapidapi-key': "40cfc5891cmshe48d38938873f74p12526ejsnb332fefe9905"
    }

response = requests.request("GET", url, headers=headers)

print(response.text)

{"api":{"status":200,"message":"GET games\/league\/standard\/2021","results":1296,"filters":["seasonYear","league","gameId","teamId","date","live"],"games":[{"seasonYear":"2021","league":"standard","gameId":"10796","startTimeUTC":"2021-10-03T19:30:00.000Z","endTimeUTC":"2021-10-03T22:14:00.000Z","arena":"STAPLES Center","city":"Los Angeles","country":"USA","clock":"","gameDuration":"2:28","currentPeriod":"4\/4","halftime":"0","EndOfPeriod":"0","seasonStage":"1","statusShortGame":"3","statusGame":"Finished","vTeam":{"teamId":"4","shortName":"BKN","fullName":"Brooklyn Nets","nickName":"Nets","logo":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/4\/44\/Brooklyn_Nets_newlogo.svg\/130px-Brooklyn_Nets_newlogo.svg.png","score":{"points":"123"}},"hTeam":{"teamId":"17","shortName":"LAL","fullName":"Los Angeles Lakers","nickName":"Lakers","logo":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/3\/3c\/Los_Angeles_Lakers_logo.svg\/220px-Los_Angeles_Lakers_logo.svg.png","sco

In [89]:
# Create a dataframe to append results to
NBA_Games = pd.DataFrame(columns=['seasonYear', 'gameId', 'homeTeamId', 'homeTeamName', 'awayTeamId',
                                  'awayTeamName', 'startTime', 'status', 'homeScore', 'awayScore'])
                   
# Format response as json dictionary
games = json.loads(response.text)
games = games['api']['games']

# Loop over games and extract the results
for i, game in enumerate(games):
    
    # Find the league
    league = game['league']
    
    # Only extract fields if the game is from the 'standard' league
    if league == 'standard':
        # Extract the fields from the JSON
        seasonYear = game['seasonYear']
        gameId = game['gameId']
        startTime = game['startTimeUTC']
        status = game['statusGame']
        awayTeamId = game['vTeam']['teamId']
        homeTeamId = game['hTeam']['teamId']
        awayTeamName = game['vTeam']['fullName']
        homeTeamName = game['hTeam']['fullName']
        # If the game is over, get the score
        if status == 'Finished':
            awayScore = game['vTeam']['score']['points']
            homeScore = game['hTeam']['score']['points']
        else:
            awayScore = 0
            homeScore = 0

        # Add values to list
        game_details = [seasonYear, gameId, homeTeamId, homeTeamName, awayTeamId, awayTeamName, startTime, status, 
                        homeScore, awayScore]

        # Append the list to the dataframe         
        NBA_Games.loc[len(NBA_Games)] = game_details
        
    else:
        
        continue
    
NBA_Games.head()

Unnamed: 0,seasonYear,gameId,homeTeamId,homeTeamName,awayTeamId,awayTeamName,startTime,status,homeScore,awayScore
0,2021,10796,17,Los Angeles Lakers,4,Brooklyn Nets,2021-10-03T19:30:00.000Z,Finished,97,123
1,2021,10797,38,Toronto Raptors,27,Philadelphia 76ers,2021-10-04T23:00:00.000Z,Finished,123,107
2,2021,10798,2,Boston Celtics,26,Orlando Magic,2021-10-04T23:30:00.000Z,Finished,98,97
3,2021,10799,20,Miami Heat,1,Atlanta Hawks,2021-10-04T23:30:00.000Z,Finished,125,99
4,2021,10800,22,Minnesota Timberwolves,23,New Orleans Pelicans,2021-10-05T00:00:00.000Z,Finished,117,114


In [85]:
# Save the odds file as a csv for analysis
NBA_Games.to_csv('PlayerPropAnalysis/Data/NBA_Games.csv', index=False)

In [91]:
url = "https://api-nba-v1.p.rapidapi.com/players/league/standard"

headers = {
    'x-rapidapi-host': "api-nba-v1.p.rapidapi.com",
    'x-rapidapi-key': "40cfc5891cmshe48d38938873f74p12526ejsnb332fefe9905"
    }

response = requests.request("GET", url, headers=headers)

print(response.text)

{"api":{"status":200,"message":"GET players\/league\/standard","results":1853,"filters":["playerId","teamId","league","country","lastName","firstName"],"players":[{"firstName":"Alex","lastName":"Abrines","teamId":null,"yearsPro":"0","collegeName":"","country":"Spain","playerId":"1","dateOfBirth":"1993-08-01","affiliation":"Spain\/Spain","startNba":"2016","heightInMeters":"","weightInKilograms":"","leagues":{"standard":{"jersey":"8","active":"0","pos":""}}},{"firstName":"Quincy","lastName":"Acy","teamId":"28","yearsPro":"6","collegeName":"Baylor","country":"USA","playerId":"2","dateOfBirth":"1990-10-06","affiliation":"Baylor\/USA","startNba":"2012","heightInMeters":"2.01","weightInKilograms":"108.9","leagues":{"standard":{"jersey":"4","active":"1","pos":"F"}}},{"firstName":"Jordan","lastName":"Adams","teamId":"19","yearsPro":"1","collegeName":"UCLA","country":"United States","playerId":"3","dateOfBirth":"1994-07-08","affiliation":"","startNba":"2014","heightInMeters":"1.96","weightInKil

In [108]:
# Create a dataframe to append results to
NBA_Players = pd.DataFrame(columns=['playerId', 'firstName', 'lastName', 'teamId', 'pos', 'yearsPro', 'startNba', 
                                    'college', 'country', 'dateOfBirth', 'height', 'weight'])
                   
# Format response as json dictionary
players = json.loads(response.text)
players = players['api']['players']

# Loop over games and extract the results
for i, player in enumerate(players):

    # Find the leagues played in
    leagues = list(player['leagues'].keys())
    
    # Only extract fields if the game is from the 'standard' league
    if 'standard' in leagues:
        # Extract the fields from the JSON
        playerId = player['playerId']
        firstName = player['firstName']
        lastName = player['lastName']
        teamId = player['teamId']
        pos = player['leagues']['standard']['pos']
        yearsPro = player['yearsPro']
        startNba = player['startNba']
        college = player['collegeName']
        country = player['country']
        dateOfBirth = player['dateOfBirth']
        height = player['heightInMeters']
        weight = player['weightInKilograms']
                           
        # Add values to list
        player_details = [playerId, firstName, lastName, teamId, pos, yearsPro, startNba, 
                                    college, country, dateOfBirth, height, weight]

        # Append the list to the dataframe         
        NBA_Players.loc[len(NBA_Players)] = player_details
        
    else:
        
        continue
    
NBA_Players.head()

Unnamed: 0,playerId,firstName,lastName,teamId,pos,yearsPro,startNba,college,country,dateOfBirth,height,weight
0,1,Alex,Abrines,,,0,2016,,Spain,1993-08-01,,
1,2,Quincy,Acy,28.0,F,6,2012,Baylor,USA,1990-10-06,2.01,108.9
2,3,Jordan,Adams,19.0,G,1,2014,UCLA,United States,1994-07-08,1.96,94.8
3,4,Steven,Adams,19.0,C,8,2013,Pittsburgh,New Zealand,1993-07-20,2.11,120.2
4,5,Arron,Afflalo,26.0,G,0,0,,,,,


In [109]:
# Save the odds file as a csv for analysis
NBA_Players.to_csv('PlayerPropAnalysis/Data/NBA_Players.csv', index=False)

### NFL