In [1]:
import os
import requests
import datetime

import numpy as np
import pandas as pd

import customsettings

from typing import Callable

from bs4 import BeautifulSoup

from propscraper import PropScraper
from params import mode, site

In [2]:
# Returns current date as string in desired format for files
def date_path() -> str:
    return '.'.join([
        datetime.datetime.now().strftime("%m%d%y"),
        # (datetime.datetime.now() + datetime.timedelta(days=1)).strftime("%m%d%y"),
        'csv'
    ])

In [3]:
# In case webpage goes down again
def save_directory():
    
    df_data: dict[str, list[str,...]] = {
        'team': list(),
        'name': list(),
        'url': list()
    }

    for team, player_links in directory.items():
        for name,url in player_links.items():
            df_data['team'].append(team)
            df_data['name'].append(name)
            df_data['url'].append(url)
            
    df: pd.DataFrame = pd.DataFrame(df_data)
    df.to_csv('../data/url-directory.csv', index=False)
    
    return None


def load_directory():
    df: pd.DataFrame = pd.read_csv('../data/url-directory.csv')
    
    team_dfs: dict[str, pd.DataFrame] = {
        team: (df
               .loc[df['team']==team]
               .set_index('name')
               .drop(['team'], axis=1)
               .T
               .to_dict()
              )
        for team in df['team'].drop_duplicates()
    }
    
    directory={team: dict() for team in team_dfs}
    
#     Improve this
    for team in team_dfs:
        for name in team_dfs[team]:
            directory[team][name] = team_dfs[team][name]['url']
        
    
    return directory

In [4]:
Props = PropScraper()
directory: dict[str,dict[str,str]] = Props.create_webpage_directory()

In [5]:
def scrape_props(name: str, team: str, site: str, **kwargs):
    try:
        return Props.scrape_player_props(
            name,
            directory[team][name],
            site,
            **kwargs
        )
    
    except KeyError:
        return (0.0, 0.0)

In [6]:
def check_site():
    try:
        assert(len(directory))
    except AssertionError:
        return 'ScoresAndOdds.com is down, or at least the page containing links is empty...'
    
    return 'No Issues'
    

In [7]:
check_site()

'No Issues'

In [8]:
if len(directory):
    save_directory()
else:
    directory = load_directory()
# directory = load_directory()

In [9]:
def scrape_fanduel(**kwargs):
    
    path: str = '../data/current-fanduel.csv'
    if mode == 'single-game':
        path: str = path.replace('.csv', '-sg.csv')
    
    
    columns: dict[str, str] = {
        'Nickname': 'name',
        'Position': 'pos',
        'Team': 'team',
        'Salary': 'salary',
        'Injury Indicator': 'injury',
    }
    
    MIN_SAL: int = 3_500 if kwargs.get('drop_minimums', False) else 0
    
    keep_minimums: tuple[str,...] = tuple()
    drop_minimums: tuple[str,...] = tuple([
        name for name in (pd.read_csv(path, usecols=['Nickname','Salary']).pipe(lambda df_: df_.loc[df_['Salary'] == MIN_SAL]['Nickname'])) if name not in keep_minimums
    ])
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        .pipe(lambda df_: df_.loc[df_['injury']!='O'])
                        .drop('injury', axis=1)
                        .assign(name=lambda df_: df_.name.str.replace('.','',regex=False))
                        # .pipe(lambda df_: df_.loc[(df_['name'].isin(drop_minimums) == False)])
                       )

    # scoresandodds : FanDuel
    name_issues = {
        'Moe Wagner': 'Moritz Wagner',
        'Moritz Wagner': 'Moe Wagner'
    }


    
    df['name'] = df['name'].map(lambda name: name_issues.get(name, name))
    df['input'] = df.loc[:,['name','team']].apply(tuple, axis=1)
    df['output'] = df['input'].apply(lambda x: scrape_props(*x, 'fanduel'))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['5x'] = 5 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['5x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '5x'], axis=1)
          .assign(fpts_1k=lambda df_: 1000 * df_.fpts / df_.salary)
          .rename({'fpts_1k': 'fpts-1k'}, axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    single_game: bool = 'sg' in path or len(df['team'].drop_duplicates()) == 2
    
    df.to_csv(f'../data/fanduel-props{"-sg" if single_game else ""}.csv')
    
    # Save to optimizer
    df.to_csv('/home/deegs/devel/repos/nba-boxscores-git/nba-boxscores/data/2023-2024/contest-files/fanduel/current/projections.csv',
              # index=False
             )
    
    return None

In [10]:
def scrape_draftkings(**kwargs):

    path: str = '../data/current-draftkings.csv'
    if mode == 'single-game':
        path: str = path.replace('.csv', '-sg.csv')
    
    columns: dict[str, str] = {
        'Name': 'name',
        'Roster Position': 'pos',
        'TeamAbbrev': 'team',
        'Salary': 'salary'
    }
    
    inits_issues = {
        'SAS': 'SA',
        'PHX': 'PHO',
        'GSW': 'GS',
        'NOP': 'NO',
        'NYK': 'NY'
    }
    
    MIN_SAL: int = 1_000 if kwargs.get('drop_minimums', True) else 0
    
    keep_minimums: tuple[str,...] = tuple()
    drop_minimums: tuple[str,...] = tuple([
        name for name in (pd.read_csv(path, usecols=['Name','Salary']).pipe(lambda df_: df_.loc[df_['Salary'] == MIN_SAL]['Name'])) if name not in keep_minimums
    ])
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        .pipe(lambda df_: df_.loc[(df_['pos']!='CPT') ])# For single game contests
                        .assign(
                            name=lambda df_: df_.name.str.replace('.','', regex=False),
                            pos=lambda df_: df_.pos
                            .str.replace('/[GF]/UTIL','', regex=True)
                            .str.replace('C/UTIL','C',regex=False)
                            .str.replace('/[GF]', '', regex=True)
                        )
                        .pipe(lambda df_: df_.loc[(df_['name'].isin(drop_minimums) == False)])
                        # .pipe(lambda df_: df_.loc[(df_['salary'] > 3_000)])
                       )
    
    name_issues: dict[str,str] = {
        'KJ Martin': 'Kenyon Martin',
        'KJ Martin Jr.': 'Kenyon Martin',
        'Guillermo Hernangomez': 'Willy Hernangomez',
    }
    
    
    fix_name: Callable[[str],str] = lambda name: ' '.join(name.split(' ')[:2])
    
    df['name'] = df['name'].map(lambda x: name_issues.get(x, fix_name(x)))
    df['team'] = df['team'].map(lambda x: inits_issues.get(x,x))
    
    df['input'] = tuple(zip(df['name'], df['team']))
    # df['input'] = df.loc[:,['name','team']].apply(tuple, axis=1) # Does the same thing
    df['output'] = df['input'].apply(lambda x: scrape_props(*x, 'draftkings'))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['5x'] = 5 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['5x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '5x'], axis=1)
          .assign(fpts_1k=lambda df_: 1000 * df_.fpts / df_.salary)
          .rename({'fpts_1k': 'fpts-1k'}, axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    single_game: bool = 'sg' in path or len(df['team'].drop_duplicates()) == 2
    
    if single_game:
        df = (df
              .assign(
                  cpt_pts=lambda df_: df_.fpts * 1.5,
                  cpt_sal=lambda df_: df_.salary * 1.5,
                  cpt_fpts_1k=lambda df_: 1000 * df_.cpt_pts / df_.cpt_sal,
              )
              .assign(
                  cpt_sal=lambda df_: df_.cpt_sal.astype('int')
              )
              .round(2)
             )
    
    df.to_csv(f'../data/draftkings-props{"-sg" if single_game else ""}.csv')
    # # Save to optimizer
    df.to_csv('/home/deegs/devel/repos/nba-boxscores-git/nba-boxscores/data/2023-2024/contest-files/draftkings/current/projections.csv',
              # index=False
             )
    # df.to_csv(f'/home/deegs/devel/repos/nba-dfs-git/nba-dfs/data/contest-files/draftkings/{"single-game" if single_game else "main-slate"}/projections/deegs/{date_path()}',
    #           # index=False
             # )
    
    return None

In [11]:
# Temporary, just figuring out dynamics for now

def ScrapeProps(**kwargs):
    site: str = kwargs.get('site', 'draftkings')
    return scrape_fanduel(**kwargs) if site == 'fanduel' else scrape_draftkings(**kwargs)
    

In [12]:
def player_pool_distribution(df):
    df = (df
          .groupby('team')
          ['team']
          .agg(['count'])
          .set_axis(['num-players'], axis=1)
          .sort_values('num-players', ascending=False)
         )
    
    total_teams: int = len(pd
                           .read_csv(f'../data/current-draftkings{"-sg" if mode == "single-game" else ""}.csv', usecols=['TeamAbbrev'])
                           .rename({'TeamAbbrev': 'Team'}, axis=1)
                           ['Team']
                           .drop_duplicates()
                          )
    
    print(f'{len(df)} teams total...')
    print(f'Missing: {int(100*(1 - (len(df) / total_teams)))}% of teams...\n')
    
    return df

In [13]:
def output_box(msg: str, *args, **kwargs) ->  None:
    tb: str = ''.join(['   ', '-'*len(msg)])
    print(*[tb, f'   {msg}', tb], sep='\n')
    return


def load_slate(site: str, **kwargs):
    verbose: int = kwargs.get('verbose', 1)
    exclude = kwargs.get('exclude', list())
    drop = kwargs.get('drop', list())
    ret: pd.DataFrame = (pd
                         .read_csv(f'../data/{site}-props{"-sg" if mode == "single-game" else ""}.csv')
                         .pipe(lambda df_: df_.loc[df_['name'].isin(drop) == False])
                         .pipe(lambda df_: df_.loc[df_['team'].isin(exclude) == False])
                         .sort_values(by=kwargs.get('sort', 'fpts'), ascending=False)
                         .set_index('name')
                        )
    
    if verbose:
        msg = f'{len(ret)} total players'.upper()
        output_box(msg)
        print(player_pool_distribution(ret))
    
    return ret

def team_players(*args, **kwargs):
    df: pd.DataFrame = load_slate(site=site_)
    return {
        team: tuple(df
                    .loc[(df['team'] == team) & (df['value'] >= 0.0)]
                    .index
                   )
        for team in df['team'].drop_duplicates()
    }


def pos_value_players(site: str, *args, **kwargs) -> tuple[str,...]:
    ret: tuple[str,...] = tuple(load_slate(site=site, verbose=0, **kwargs)
                                .pipe(lambda df_: df_.loc[df_['value'] >= kwargs.get('value', 0.0)])
                                .sort_values('value', ascending=False)
                                .index
                               )
    
    msg = f'{len(ret)} total players'.upper()
    output_box(msg)
    return ret
# team_players()

In [14]:
import time
def output_times(func, **kwargs) -> None:
    """Wrapper function to print performance time in Xm Ys format"""
    start = time.perf_counter()
    func(**kwargs)
    stop = time.perf_counter()
    
    elapsed: float = (stop - start)/60.0
    
    elapsed_str: str = str(elapsed)
    minutes: int = int( elapsed_str.split('.')[0] )
    
    decimals: float = float( f'0.{elapsed_str.split(".")[1]}' )
    seconds: int = int(decimals * 60.0)
    
    performance_time: str = f'{minutes}m {seconds}s.'
    
    print(f"{func.__name__} performance time for {site.capitalize().replace('duel','Duel').replace('kings', 'Kings')}: {performance_time}\n")
    
    return None

In [17]:
# ScrapeProps(site=site)
last_update = pd.read_csv(f'../data/{site}-props{"-sg" if mode == "single-game" else ""}.csv').set_index('name')
output_times(ScrapeProps, site=site, drop_minimums=False, mute_touchdowns=False)

In [18]:
df = load_slate(
    site,
    sort='value',
    # drop=['Aaron Nesmith'], # Late additions to injury report
    # exclude=['WAS', 'NY', 'CHI', 'TOR', 'OKC', 'UTA'] # Games that have already started
).drop('fpts-1k', axis=1)

   ----------------
   63 TOTAL PLAYERS
   ----------------
13 teams total...
Missing: 7% of teams...

      num-players
team             
DEN             7
BOS             6
NO              6
PHI             6
PHO             6
LAL             5
ORL             5
SA              5
BKN             4
CHA             4
MIA             4
ATL             3
POR             2


In [19]:
updated_players = list(set(df.index).difference(set(last_update.index)))
output = ['The following players have been added:']
output += [f'    > {name_}' for name_ in updated_players]
if not updated_players:
    output = ['No players updated since last scrape.']
print(*output, sep='\n')

No players updated since last scrape.


In [20]:
df.sort_values('fpts', ascending=False)
# df.sort_values('fpts/$', ascending=False)
# df.sort_values('salary')

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Joel Embiid,C,11500,PHI,64.38,32.09,5.6,2.79,6.88
Nikola Jokic,C,11300,DEN,60.88,30.06,5.39,2.66,4.38
Anthony Davis,PF/C,10200,LAL,57.12,29.26,5.6,2.87,6.12
LaMelo Ball,PG,8800,CHA,51.62,27.5,5.87,3.12,7.62
Trae Young,PG,9700,ATL,51.62,25.43,5.32,2.62,3.12
Bam Adebayo,C,8700,MIA,51.13,25.16,5.88,2.89,7.63
Jayson Tatum,SF/PF,9700,BOS,50.12,26.05,5.17,2.69,1.62
LeBron James,SF/PF,9500,LAL,49.88,26.46,5.25,2.79,2.38
Paolo Banchero,PF,9000,ORL,46.88,23.67,5.21,2.63,1.88
Devin Booker,PG/SG,9100,PHO,45.62,22.76,5.01,2.5,0.12


In [19]:
def flatten(nestedSeq) -> list[list[str,...], ...]:
    return [element for inner in nestedSeq for element in inner]

def get_top_names(df: pd.DataFrame, n=2, by='value') -> pd.DataFrame:
    """
    Returns only the top n players from each team by provided parameter, defaults to value
    """
    df = df.sort_values(by, ascending=False)
    top_names = {team: list(df.loc[df['team'] == team].index)[:n] for team in df['team'].drop_duplicates()}
    
    return df.loc[df.index.isin(flatten(list(top_names.values())))]

top_df = get_top_names(df, n=3)
# top_df

In [20]:
pos_dfs = dict()
for pos in ('PG', 'SG', 'SF', 'PF', 'C'):
    df[pos] = df['pos'].map(lambda pos_: int(pos in pos_))
    pos_dfs[pos] = df.loc[df[pos] == 1].drop(pos, axis=1).sort_values('value', ascending=False)
    df = df.drop(pos, axis=1)

In [21]:
pos_dfs['PG']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
LaMelo Ball,PG,8800,CHA,51.62,27.5,5.87,3.12,7.62
Tre Jones,PG,5800,SA,33.38,15.79,5.76,2.72,4.38
Tyler Herro,PG/SG,7500,MIA,41.38,20.82,5.52,2.78,3.88
Trae Young,PG,9700,ATL,51.62,25.43,5.32,2.62,3.12
Devin Vassell,PG/SG,6700,SA,36.62,18.73,5.47,2.8,3.12
D'Angelo Russell,PG,6200,LAL,32.12,17.51,5.18,2.82,1.12
Jamal Murray,PG,8100,DEN,41.38,20.69,5.11,2.55,0.88
Tyrese Maxey,PG,8900,PHI,45.38,23.72,5.1,2.67,0.88
Anfernee Simons,PG/SG,7800,POR,39.38,21.45,5.05,2.75,0.38
CJ McCollum,PG,7600,NO,38.12,18.78,5.02,2.47,0.12


In [22]:
pos_dfs['SG']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Terry Rozier,SG,7200,CHA,41.12,21.97,5.71,3.05,5.12
Tyler Herro,PG/SG,7500,MIA,41.38,20.82,5.52,2.78,3.88
Devin Vassell,PG/SG,6700,SA,36.62,18.73,5.47,2.8,3.12
Anfernee Simons,PG/SG,7800,POR,39.38,21.45,5.05,2.75,0.38
Mikal Bridges,SG/SF,7700,BKN,38.88,19.97,5.05,2.59,0.38
Devin Booker,PG/SG,9100,PHO,45.62,22.76,5.01,2.5,0.12
Jaylen Brown,SG/SF,8100,BOS,40.38,21.06,4.99,2.6,-0.12
Austin Reaves,PG/SG,6100,LAL,29.88,15.91,4.9,2.61,-0.62
Julian Champagnie,SG/SF,3500,SA,16.88,8.71,4.82,2.49,-0.62
Jrue Holiday,PG/SG,6500,BOS,30.38,15.49,4.67,2.38,-2.12


In [23]:
pos_dfs['SF']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Jimmy Butler,SF,7600,MIA,41.38,21.21,5.44,2.79,3.38
LeBron James,SF/PF,9500,LAL,49.88,26.46,5.25,2.79,2.38
Jayson Tatum,SF/PF,9700,BOS,50.12,26.05,5.17,2.69,1.62
Mikal Bridges,SG/SF,7700,BKN,38.88,19.97,5.05,2.59,0.38
Jaylen Brown,SG/SF,8100,BOS,40.38,21.06,4.99,2.6,-0.12
Julian Champagnie,SG/SF,3500,SA,16.88,8.71,4.82,2.49,-0.62
Taurean Prince,SF,4000,LAL,17.88,9.33,4.47,2.33,-2.12
Keldon Johnson,SF/PF,6400,SA,29.88,16.56,4.67,2.59,-2.12
Jerami Grant,SF/PF,6800,POR,31.38,16.05,4.61,2.36,-2.62
Michael Porter,SF/PF,6400,DEN,29.12,14.94,4.55,2.33,-2.88


In [24]:
pos_dfs['PF']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Anthony Davis,PF/C,10200,LAL,57.12,29.26,5.6,2.87,6.12
Jeremy Sochan,PF,5200,SA,30.88,15.95,5.94,3.07,4.88
Miles Bridges,PF/C,7100,CHA,39.12,20.96,5.51,2.95,3.62
LeBron James,SF/PF,9500,LAL,49.88,26.46,5.25,2.79,2.38
Paolo Banchero,PF,9000,ORL,46.88,23.67,5.21,2.63,1.88
Zion Williamson,PF,7500,NO,39.13,20.68,5.22,2.76,1.63
Jayson Tatum,SF/PF,9700,BOS,50.12,26.05,5.17,2.69,1.62
Keldon Johnson,SF/PF,6400,SA,29.88,16.56,4.67,2.59,-2.12
Kristaps Porzingis,PF/C,7800,BOS,36.38,18.01,4.66,2.31,-2.62
Jerami Grant,SF/PF,6800,POR,31.38,16.05,4.61,2.36,-2.62


In [25]:
pos_dfs['C']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bam Adebayo,C,8700,MIA,51.13,25.16,5.88,2.89,7.63
Joel Embiid,C,11500,PHI,64.38,32.09,5.6,2.79,6.88
Anthony Davis,PF/C,10200,LAL,57.12,29.26,5.6,2.87,6.12
Nikola Jokic,C,11300,DEN,60.88,30.06,5.39,2.66,4.38
Miles Bridges,PF/C,7100,CHA,39.12,20.96,5.51,2.95,3.62
Nic Claxton,C,7000,BKN,36.63,17.43,5.23,2.49,1.63
Nick Richards,C,5200,CHA,27.37,13.97,5.26,2.69,1.37
Kristaps Porzingis,PF/C,7800,BOS,36.38,18.01,4.66,2.31,-2.62
Jusuf Nurkic,C,7000,PHO,32.12,15.68,4.59,2.24,-2.88
Larry Nance,C,4400,NO,17.88,8.92,4.06,2.03,-4.12


In [26]:
# team_dfs = {team: df.loc[df['team'] == team] for team in df['team'].drop_duplicates()}
dict(sorted({team: df.loc[df['team'] == team].sort_values('value', ascending=False) for team in df['team'].drop_duplicates()}.items(), key=lambda item: item[0]))

{'ATL':                    pos  salary team   fpts  e_fpts  fpts/$  e_fpts/$  value
 name                                                                       
 Trae Young          PG    9700  ATL  51.62   25.43    5.32      2.62   3.12
 Dejounte Murray  PG/SG    7500  ATL  34.12   18.23    4.55      2.43  -3.38
 Jalen Johnson       PF    7100  ATL  30.37   16.42    4.28      2.31  -5.13,
 'BKN':                      pos  salary team   fpts  e_fpts  fpts/$  e_fpts/$  value
 name                                                                         
 Nic Claxton            C    7000  BKN  36.63   17.43    5.23      2.49   1.63
 Mikal Bridges      SG/SF    7700  BKN  38.88   19.97    5.05      2.59   0.38
 Cameron Johnson    SF/PF    5900  BKN  24.88   12.84    4.22      2.18  -4.62
 Spencer Dinwiddie  PG/SG    6100  BKN  25.62   12.68    4.20      2.08  -4.88,
 'BOS':                       pos  salary team   fpts  e_fpts  fpts/$  e_fpts/$  value
 name                                 