In [1]:
import os
import requests
import datetime

import numpy as np
import pandas as pd

from typing import Callable

from bs4 import BeautifulSoup

# Local
import settings.custom

from propscraper import PropScraper
from settings.params import mode, site

In [2]:
# Returns current date as string in desired format for files
def date_path() -> str:
    return '.'.join([
        datetime.datetime.now().strftime("%m%d%y"),
        # (datetime.datetime.now() + datetime.timedelta(days=1)).strftime("%m%d%y"),
        'csv'
    ])

In [3]:
# In case webpage goes down again
def save_directory():
    
    df_data: dict[str, list[str,...]] = {
        'team': list(),
        'name': list(),
        'url': list()
    }

    for team, player_links in directory.items():
        for name,url in player_links.items():
            df_data['team'].append(team)
            df_data['name'].append(name)
            df_data['url'].append(url)
            
    df: pd.DataFrame = pd.DataFrame(df_data)
    df.to_csv('../data/url-directory.csv', index=False)
    
    return None


def load_directory():
    df: pd.DataFrame = pd.read_csv('../data/url-directory.csv')
    
    team_dfs: dict[str, pd.DataFrame] = {
        team: (df
               .loc[df['team']==team]
               .set_index('name')
               .drop(['team'], axis=1)
               .T
               .to_dict()
              )
        for team in df['team'].drop_duplicates()
    }
    
    directory={team: dict() for team in team_dfs}
    
#     Improve this
    for team in team_dfs:
        for name in team_dfs[team]:
            directory[team][name] = team_dfs[team][name]['url']
        
    
    return directory

In [4]:
Props = PropScraper()
directory: dict[str,dict[str,str]] = Props.create_webpage_directory()

In [5]:
def scrape_props(name: str, team: str, site: str, **kwargs):
    try:
        return Props.scrape_player_props(
            name,
            directory[team][name],
            site,
            **kwargs
        )
    
    except KeyError:
        return (0.0, 0.0)

In [6]:
def check_site():
    try:
        assert(len(directory))
    except AssertionError:
        return 'ScoresAndOdds.com is down, or at least the page containing links is empty...'
    
    return 'No Issues'
    

In [7]:
check_site()

'No Issues'

In [8]:
if len(directory):
    save_directory()
else:
    directory = load_directory()
# directory = load_directory()

In [9]:
def scrape_fanduel(**kwargs):
    
    path: str = '../data/current-fanduel.csv'
    if mode == 'single-game':
        path: str = path.replace('.csv', '-sg.csv')
    
    
    columns: dict[str, str] = {
        'Nickname': 'name',
        'Position': 'pos',
        'Team': 'team',
        'Salary': 'salary',
        'Injury Indicator': 'injury',
    }
    
    MIN_SAL: int = 3_500 if kwargs.get('drop_minimums', False) else 0
    
    keep_minimums: tuple[str,...] = tuple()
    drop_minimums: tuple[str,...] = tuple([
        name for name in (pd.read_csv(path, usecols=['Nickname','Salary']).pipe(lambda df_: df_.loc[df_['Salary'] == MIN_SAL]['Nickname'])) if name not in keep_minimums
    ])
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        .pipe(lambda df_: df_.loc[df_['injury']!='O'])
                        .drop('injury', axis=1)
                        .assign(name=lambda df_: df_.name.str.replace('.','',regex=False))
                        # .pipe(lambda df_: df_.loc[(df_['name'].isin(drop_minimums) == False)])
                       )

    # scoresandodds : FanDuel
    name_issues = {
        'Moe Wagner': 'Moritz Wagner',
        'Moritz Wagner': 'Moe Wagner'
    }


    
    df['name'] = df['name'].map(lambda name: name_issues.get(name, name))
    df['input'] = df.loc[:,['name','team']].apply(tuple, axis=1)
    df['output'] = df['input'].apply(lambda x: scrape_props(*x, 'fanduel'))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['5x'] = 5 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['5x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '5x'], axis=1)
          .assign(fpts_1k=lambda df_: 1000 * df_.fpts / df_.salary)
          .rename({'fpts_1k': 'fpts-1k'}, axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    single_game: bool = 'sg' in path or len(df['team'].drop_duplicates()) == 2
    
    df.to_csv(f'../data/fanduel-props{"-sg" if single_game else ""}.csv')
    
    # Save to optimizer
    df.to_csv('/home/deegs/devel/repos/nba-boxscores-git/nba-boxscores/data/2023-2024/contest-files/fanduel/current/projections.csv',
              # index=False
             )
    
    return None

In [10]:
def scrape_draftkings(**kwargs):

    path: str = '../data/current-draftkings.csv'
    if mode == 'single-game':
        path: str = path.replace('.csv', '-sg.csv')
    
    columns: dict[str, str] = {
        'Name': 'name',
        'Roster Position': 'pos',
        'TeamAbbrev': 'team',
        'Salary': 'salary'
    }
    
    inits_issues = {
        'SAS': 'SA',
        'PHX': 'PHO',
        'GSW': 'GS',
        'NOP': 'NO',
        'NYK': 'NY'
    }
    
    MIN_SAL: int = 1_000 if kwargs.get('drop_minimums', True) else 0
    
    keep_minimums: tuple[str,...] = tuple()
    drop_minimums: tuple[str,...] = tuple([
        name for name in (pd.read_csv(path, usecols=['Name','Salary']).pipe(lambda df_: df_.loc[df_['Salary'] == MIN_SAL]['Name'])) if name not in keep_minimums
    ])
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        .pipe(lambda df_: df_.loc[(df_['pos']!='CPT') ])# For single game contests
                        .assign(
                            name=lambda df_: df_.name.str.replace('.','', regex=False),
                            pos=lambda df_: df_.pos
                            .str.replace('/[GF]/UTIL','', regex=True)
                            .str.replace('C/UTIL','C',regex=False)
                            .str.replace('/[GF]', '', regex=True)
                        )
                        .pipe(lambda df_: df_.loc[(df_['name'].isin(drop_minimums) == False)])
                        # .pipe(lambda df_: df_.loc[(df_['salary'] > 3_000)])
                       )
    
    name_issues: dict[str,str] = {
        'KJ Martin': 'Kenyon Martin',
        'KJ Martin Jr.': 'Kenyon Martin',
        'Guillermo Hernangomez': 'Willy Hernangomez',
    }
    
    
    fix_name: Callable[[str],str] = lambda name: ' '.join(name.split(' ')[:2])
    
    df['name'] = df['name'].map(lambda x: name_issues.get(x, fix_name(x)))
    df['team'] = df['team'].map(lambda x: inits_issues.get(x,x))
    
    df['input'] = tuple(zip(df['name'], df['team']))
    # df['input'] = df.loc[:,['name','team']].apply(tuple, axis=1) # Does the same thing
    df['output'] = df['input'].apply(lambda x: scrape_props(*x, 'draftkings'))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['5x'] = 5 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['5x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '5x'], axis=1)
          .assign(fpts_1k=lambda df_: 1000 * df_.fpts / df_.salary)
          .rename({'fpts_1k': 'fpts-1k'}, axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    single_game: bool = 'sg' in path or len(df['team'].drop_duplicates()) == 2
    
    if single_game:
        df = (df
              .assign(
                  cpt_pts=lambda df_: df_.fpts * 1.5,
                  cpt_sal=lambda df_: df_.salary * 1.5,
                  cpt_fpts_1k=lambda df_: 1000 * df_.cpt_pts / df_.cpt_sal,
              )
              .assign(
                  cpt_sal=lambda df_: df_.cpt_sal.astype('int')
              )
              .round(2)
             )
    
    df.to_csv(f'../data/draftkings-props{"-sg" if single_game else ""}.csv')
    
    # # Save to optimizer
    df.to_csv('/home/deegs/devel/repos/nba-boxscores-git/nba-boxscores/data/2023-2024/contest-files/draftkings/current/projections.csv',
              # index=False
             )
    
    return None

In [11]:
# Temporary, just figuring out dynamics for now

def ScrapeProps(**kwargs):
    site: str = kwargs.get('site', 'draftkings')
    return scrape_fanduel(**kwargs) if site == 'fanduel' else scrape_draftkings(**kwargs)
    

In [12]:
def player_pool_distribution(df):
    df = (df
          .groupby('team')
          ['team']
          .agg(['count'])
          .set_axis(['num-players'], axis=1)
          .sort_values('num-players', ascending=False)
         )
    
    total_teams: int = len(pd
                           .read_csv(f'../data/current-draftkings{"-sg" if mode == "single-game" else ""}.csv', usecols=['TeamAbbrev'])
                           .rename({'TeamAbbrev': 'Team'}, axis=1)
                           ['Team']
                           .drop_duplicates()
                          )
    
    print(f'{len(df)} teams total...')
    print(f'Missing: {int(100*(1 - (len(df) / total_teams)))}% of teams...\n')
    
    return df

In [13]:
def output_box(msg: str, *args, **kwargs) ->  None:
    tb: str = ''.join(['   ', '-'*len(msg)])
    print(*[tb, f'   {msg}', tb], sep='\n')
    return


def load_slate(site: str, **kwargs):
    verbose: int = kwargs.get('verbose', 1)
    exclude = kwargs.get('exclude', list())
    drop = kwargs.get('drop', list())
    ret: pd.DataFrame = (pd
                         .read_csv(f'../data/{site}-props{"-sg" if mode == "single-game" else ""}.csv')
                         .pipe(lambda df_: df_.loc[df_['name'].isin(drop) == False])
                         .pipe(lambda df_: df_.loc[df_['team'].isin(exclude) == False])
                         .sort_values(by=kwargs.get('sort', 'fpts'), ascending=False)
                         .set_index('name')
                        )
    
    if verbose:
        msg = f'{len(ret)} total players'.upper()
        output_box(msg)
        print(player_pool_distribution(ret))
    
    return ret

def team_players(*args, **kwargs):
    df: pd.DataFrame = load_slate(site=site_)
    return {
        team: tuple(df
                    .loc[(df['team'] == team) & (df['value'] >= 0.0)]
                    .index
                   )
        for team in df['team'].drop_duplicates()
    }


def pos_value_players(site: str, *args, **kwargs) -> tuple[str,...]:
    ret: tuple[str,...] = tuple(load_slate(site=site, verbose=0, **kwargs)
                                .pipe(lambda df_: df_.loc[df_['value'] >= kwargs.get('value', 0.0)])
                                .sort_values('value', ascending=False)
                                .index
                               )
    
    msg = f'{len(ret)} total players'.upper()
    output_box(msg)
    return ret
# team_players()

In [14]:
import time
def output_times(func, **kwargs) -> None:
    """Wrapper function to print performance time in Xm Ys format"""
    start = time.perf_counter()
    func(**kwargs)
    stop = time.perf_counter()
    
    elapsed: float = (stop - start)/60.0
    
    elapsed_str: str = str(elapsed)
    minutes: int = int( elapsed_str.split('.')[0] )
    
    decimals: float = float( f'0.{elapsed_str.split(".")[1]}' )
    seconds: int = int(decimals * 60.0)
    
    performance_time: str = f'{minutes}m {seconds}s.'
    
    print(f"{func.__name__} performance time for {site.capitalize().replace('duel','Duel').replace('kings', 'Kings')}: {performance_time}\n")
    
    return None

In [61]:
# ScrapeProps(site=site)
last_update = pd.read_csv(f'../data/{site}-props{"-sg" if mode == "single-game" else ""}.csv').set_index('name')
output_times(ScrapeProps, site=site, drop_minimums=False)

ScrapeProps performance time for FanDuel: 0m 41s.



In [62]:
df = load_slate(
    site,
    sort='value',
    drop=['Tyrese Maxey', 'Anthony Davis'], # Late additions to injury report
    # exclude=['WAS', 'NY', 'CHI', 'TOR', 'OKC', 'UTA'] # Games that have already started
).drop('fpts-1k', axis=1)

   ----------------
   67 TOTAL PLAYERS
   ----------------
10 teams total...
Missing: -150% of teams...

      num-players
team             
UTA            10
NY              9
ATL             8
IND             8
LAL             8
BOS             7
CHI             6
GS              6
TOR             4
PHI             1


In [63]:
updated_players = list(set(df.index).difference(set(last_update.index)))
output = ['The following players have been added:']
output += [f'    > {name_}' for name_ in updated_players]
if not updated_players:
    output = ['No players updated since last scrape.']
print(*output, sep='\n')

The following players have been added:
    > Christian Wood
    > Ochai Agbaji
    > Jalen Smith
    > Obi Toppin
    > Jarred Vanderbilt
    > Saddiq Bey


In [64]:
pd.set_option('display.max_rows', 100)

In [65]:
# df.sort_values('fpts', ascending=False)
# df.sort_values('fpts/$', ascending=False)
df.sort_values('value', ascending=False)
# df.sort_values('salary')
# df.sort_index()

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Jalen Brunson,PG,9600,NY,55.45,27.72,5.78,2.89,7.45
Donte DiVincenzo,SG/SF,6100,NY,37.15,19.6,6.09,3.21,6.65
Joel Embiid,C,12300,PHI,67.35,34.12,5.48,2.77,5.85
Scottie Barnes,PF/SF,9300,TOR,51.25,25.99,5.51,2.79,4.75
DeMar DeRozan,SF,8100,CHI,44.65,22.26,5.51,2.75,4.15
Coby White,PG,7700,CHI,41.35,21.55,5.37,2.8,2.85
Josh Hart,SF/PF,6000,NY,32.45,16.63,5.41,2.77,2.45
LeBron James,SF/PF,10300,LAL,52.95,28.6,5.14,2.78,1.45
Nikola Vucevic,C,7900,CHI,40.55,21.49,5.13,2.72,1.05
Draymond Green,PF/C,6800,GS,34.75,17.35,5.11,2.55,0.75


In [36]:
def flatten(nestedSeq) -> list[list[str,...], ...]:
    return [element for inner in nestedSeq for element in inner]

def get_top_names(df: pd.DataFrame, n=2, by='value') -> pd.DataFrame:
    """
    Returns only the top n players from each team by provided parameter, defaults to value
    """
    df = df.sort_values(by, ascending=False)
    top_names = {team: list(df.loc[df['team'] == team].index)[:n] for team in df['team'].drop_duplicates()}
    
    return df.loc[df.index.isin(flatten(list(top_names.values())))]

top_df = get_top_names(df, n=3)
# top_df

In [37]:
pos_dfs = dict()
for pos in ('PG', 'SG', 'SF', 'PF', 'C'):
    df[pos] = df['pos'].map(lambda pos_: int(pos in pos_))
    pos_dfs[pos] = df.loc[df[pos] == 1].drop(pos, axis=1).sort_values('value', ascending=False)
    df = df.drop(pos, axis=1)

In [38]:
pos_dfs['PG']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Jalen Brunson,PG,9600,NY,53.95,29.08,5.62,3.03,5.95
Coby White,PG,7700,CHI,41.35,22.06,5.37,2.86,2.85
Trae Young,PG,10200,ATL,51.25,26.82,5.02,2.63,0.25
D'Angelo Russell,PG/SG,7800,LAL,39.25,20.46,5.03,2.62,0.25
Alex Caruso,PG/SG,6200,CHI,29.65,15.25,4.78,2.46,-1.35
Stephen Curry,PG,9100,GS,44.15,23.41,4.85,2.57,-1.35
Dejounte Murray,SG/PG,8900,ATL,42.35,21.0,4.76,2.36,-2.15
Jordan Clarkson,PG/SG,7000,UTA,30.25,14.58,4.32,2.08,-4.75
Ayo Dosunmu,PG/SG,5700,CHI,23.45,13.27,4.11,2.33,-5.05
Collin Sexton,SG/PG,7600,UTA,32.75,17.09,4.31,2.25,-5.25


In [39]:
pos_dfs['SG']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Donte DiVincenzo,SG/SF,6100,NY,37.15,19.49,6.09,3.2,6.65
Gary Trent,SG,5100,TOR,25.75,12.45,5.05,2.44,0.25
D'Angelo Russell,PG/SG,7800,LAL,39.25,20.46,5.03,2.62,0.25
Austin Reaves,SF/SG,6500,LAL,31.95,16.31,4.92,2.51,-0.55
Quentin Grimes,SG/SF,4600,NY,21.75,11.09,4.73,2.41,-1.25
Alex Caruso,PG/SG,6200,CHI,29.65,15.25,4.78,2.46,-1.35
Klay Thompson,SG,6400,GS,30.45,15.21,4.76,2.38,-1.55
Dejounte Murray,SG/PG,8900,ATL,42.35,21.0,4.76,2.36,-2.15
Brandin Podziemski,SG/SF,5400,GS,24.35,11.68,4.51,2.16,-2.65
Jaylen Brown,SF/SG,8500,BOS,38.35,20.75,4.51,2.44,-4.15


In [40]:
pos_dfs['SF']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Donte DiVincenzo,SG/SF,6100,NY,37.15,19.49,6.09,3.2,6.65
Scottie Barnes,PF/SF,9300,TOR,50.25,26.29,5.4,2.83,3.75
DeMar DeRozan,SF,8100,CHI,43.15,22.75,5.33,2.81,2.65
Josh Hart,SF/PF,6000,NY,32.45,16.72,5.41,2.79,2.45
LeBron James,SF/PF,10300,LAL,51.75,28.11,5.02,2.73,0.25
Jonathan Kuminga,SF/PF,6900,GS,34.05,17.05,4.93,2.47,-0.45
Austin Reaves,SF/SG,6500,LAL,31.95,16.31,4.92,2.51,-0.55
Quentin Grimes,SG/SF,4600,NY,21.75,11.09,4.73,2.41,-1.25
Jayson Tatum,PF/SF,10000,BOS,48.45,25.86,4.85,2.59,-1.55
Brandin Podziemski,SG/SF,5400,GS,24.35,11.68,4.51,2.16,-2.65


In [41]:
pos_dfs['PF']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Scottie Barnes,PF/SF,9300,TOR,50.25,26.29,5.4,2.83,3.75
Josh Hart,SF/PF,6000,NY,32.45,16.72,5.41,2.79,2.45
Draymond Green,PF/C,6800,GS,35.75,17.25,5.26,2.54,1.75
LeBron James,SF/PF,10300,LAL,51.75,28.11,5.02,2.73,0.25
Jonathan Kuminga,SF/PF,6900,GS,34.05,17.05,4.93,2.47,-0.45
Jayson Tatum,PF/SF,10000,BOS,48.45,25.86,4.85,2.59,-1.55
Precious Achiuwa,PF/C,5900,NY,26.95,14.67,4.57,2.49,-2.55
Kristaps Porzingis,PF/C,8200,BOS,38.25,19.32,4.66,2.36,-2.75
Jalen Johnson,SF/PF,8000,ATL,36.95,18.74,4.62,2.34,-3.05
Pascal Siakam,PF/C,8300,IND,38.05,19.21,4.58,2.31,-3.45


In [42]:
pos_dfs['C']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Joel Embiid,C,12300,PHI,67.35,33.78,5.48,2.75,5.85
Draymond Green,PF/C,6800,GS,35.75,17.25,5.26,2.54,1.75
Nikola Vucevic,C,7900,CHI,40.55,21.81,5.13,2.76,1.05
Precious Achiuwa,PF/C,5900,NY,26.95,14.67,4.57,2.49,-2.55
Kristaps Porzingis,PF/C,8200,BOS,38.25,19.32,4.66,2.36,-2.75
Pascal Siakam,PF/C,8300,IND,38.05,19.21,4.58,2.31,-3.45
Clint Capela,C,6500,ATL,28.65,14.98,4.41,2.3,-3.85
Myles Turner,C,6900,IND,30.35,15.33,4.4,2.22,-4.15
Kelly Olynyk,C/PF,5100,UTA,19.15,9.04,3.75,1.77,-6.35
John Collins,PF/C,6400,UTA,25.05,12.84,3.91,2.01,-6.95


In [43]:
# team_dfs = {team: df.loc[df['team'] == team] for team in df['team'].drop_duplicates()}
dict(sorted({team: df.loc[df['team'] == team].sort_values('value', ascending=False) for team in df['team'].drop_duplicates()}.items(), key=lambda item: item[0]))

{'ATL':                      pos  salary team   fpts  e_fpts  fpts/$  e_fpts/$  value
 name                                                                         
 Trae Young            PG   10200  ATL  51.25   26.82    5.02      2.63   0.25
 Dejounte Murray    SG/PG    8900  ATL  42.35   21.00    4.76      2.36  -2.15
 Jalen Johnson      SF/PF    8000  ATL  36.95   18.74    4.62      2.34  -3.05
 Clint Capela           C    6500  ATL  28.65   14.98    4.41      2.30  -3.85
 Bogdan Bogdanovic  SF/SG    6800  ATL  27.25   13.42    4.01      1.97  -6.75
 Onyeka Okongwu      C/PF    5700  ATL  17.85    9.78    3.13      1.72 -10.65,
 'BOS':                       pos  salary team   fpts  e_fpts  fpts/$  e_fpts/$  value
 name                                                                          
 Jayson Tatum        PF/SF   10000  BOS  48.45   25.86    4.85      2.59  -1.55
 Kristaps Porzingis   PF/C    8200  BOS  38.25   19.32    4.66      2.36  -2.75
 Jaylen Brown        SF/SG    850