In [1]:
import os
import requests
import datetime

import numpy as np
import pandas as pd

from bs4 import BeautifulSoup

from typing import Callable

import customsettings
from propscraper import PropScraper
from params import mode, site

In [2]:
# Returns current date as string in desired format for files
def date_path() -> str:
    return '.'.join([
        datetime.datetime.now().strftime("%m%d%y"),
        # (datetime.datetime.now() + datetime.timedelta(days=1)).strftime("%m%d%y"),
        'csv'
    ])

In [3]:
def implied_probability(ml: str):

    # Implied Probability = 100 / (Odds + 100)
    if ml[0] == '+':
        ml = int(ml[1:])
        if ml == 100:
            return 0.5
        return 100 / (ml +100)

    
    if ml == '+100':
        return 0.5
    
    return cls.pos_ml_prob(ml) if ml[0]=='+' else cls.neg_ml_prob(ml)

In [4]:
# In case webpage goes down again
def save_directory():
    
    df_data: dict[str, list[str,...]] = {
        'team': list(),
        'name': list(),
        'url': list()
    }

    for team, player_links in directory.items():
        for name,url in player_links.items():
            df_data['team'].append(team)
            df_data['name'].append(name)
            df_data['url'].append(url)
            
    df: pd.DataFrame = pd.DataFrame(df_data)
    df.to_csv('../data/url-directory.csv', index=False)
    
    return None


def load_directory():
    df: pd.DataFrame = pd.read_csv('../data/url-directory.csv')
    
    team_dfs: dict[str, pd.DataFrame] = {
        team: (df
               .loc[df['team']==team]
               .set_index('name')
               .drop(['team'], axis=1)
               .T
               .to_dict()
              )
        for team in df['team'].drop_duplicates()
    }
    
    directory={team: dict() for team in team_dfs}
    
#     Improve this
    for team in team_dfs:
        for name in team_dfs[team]:
            directory[team][name] = team_dfs[team][name]['url']
        
    
    return directory

In [5]:
Props = PropScraper()
directory: dict[str,dict[str,str]] = Props.create_webpage_directory()

In [6]:
def scrape_props(name: str, team: str, site: str, **kwargs):
    try:
        return Props.scrape_player_props(
            name,
            directory[team][name],
            site,
            **kwargs
        )
    
    except KeyError:
        return (0.0, 0.0)

In [7]:
def check_site():
    try:
        assert(len(directory))
    except AssertionError:
        return 'ScoresAndOdds.com is down, or at least the page containing links is empty...'
    
    return 'No Issues'
    

In [8]:
check_site()

'No Issues'

In [9]:
if len(directory):
    save_directory()
else:
    directory = load_directory()
# directory = load_directory()

In [10]:
def scrape_fanduel(**kwargs):
    
    path: str = '../data/current-fanduel.csv'
    if mode == 'single-game':
        path: str = path.replace('.csv', '-sg.csv')
    
    
    columns: dict[str, str] = {
        'Nickname': 'name',
        'Position': 'pos',
        'Team': 'team',
        'Salary': 'salary',
        'Injury Indicator': 'injury',
    }

    inits_issues = {
        'JAC': 'JAX'
    }
    
    MIN_SAL: int = 200 if kwargs.get('drop_minimums', False) else 0
    
    keep_minimums: tuple[str,...] = tuple()
    drop_minimums: tuple[str,...] = tuple([
        name for name in (pd.read_csv(path, usecols=['Nickname','Salary']).pipe(lambda df_: df_.loc[df_['Salary'] == MIN_SAL]['Nickname'])) if name not in keep_minimums
    ])
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        .pipe(lambda df_: df_.loc[df_['injury']!='O'])
                        .drop('injury', axis=1)
                        .assign(name=lambda df_: df_.name.str.replace('.','',regex=False))
                        # .pipe(lambda df_: df_.loc[(df_['name'].isin(drop_minimums) == False)])
                       )

    # scoresandodds : FanDuel
    name_issues = {
        'Gabriel Davis': 'Gabe Davis',
    }


    
    df['name'] = df['name'].map(lambda name: name_issues.get(name, name))
    df['team'] = df['team'].map(lambda x: inits_issues.get(x,x))
    
    df['input'] = df.loc[:,['name','team']].apply(tuple, axis=1)
    df['output'] = df['input'].apply(lambda x: scrape_props(*x, 'fanduel'))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['5x'] = 5 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['5x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '5x'], axis=1)
          .assign(fpts_1k=lambda df_: 1000 * df_.fpts / df_.salary)
          .rename({'fpts_1k': 'fpts-1k'}, axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    single_game: bool = 'sg' in path or len(df['team'].drop_duplicates()) == 2
    
    df.to_csv(f'../data/fanduel-props{"-sg" if single_game else ""}.csv')
    
    # Save to optimizer
    # df.to_csv('/home/deegs/devel/repos/nba-boxscores-git/nba-boxscores/data/2023-2024/contest-files/fanduel/current/projections.csv',
              # index=False
             # )
    
    return None

In [11]:
def scrape_draftkings(**kwargs):

    path: str = '../data/current-draftkings.csv'
    dk_pos_column: str = 'Position'
    if mode == 'single-game':
        path: str = path.replace('.csv', '-sg.csv')
        dk_pos_column: str = 'Roster Position'
    
    columns: dict[str, str] = {
        'Name': 'name',
        # 'Roster Position': 'pos', # Now have separate position column
        # 'Position': 'pos',
        dk_pos_column: 'pos',
        'TeamAbbrev': 'team',
        'Salary': 'salary'
    }
    
    inits_issues = {
        'JAC': 'JAX'
    }
    
    MIN_SAL: int = 200 if kwargs.get('drop_minimums', True) else 0
    
    keep_minimums: tuple[str,...] = tuple()
    drop_minimums: tuple[str,...] = tuple([
        name for name in (pd.read_csv(path, usecols=['Name','Salary']).pipe(lambda df_: df_.loc[df_['Salary'] == MIN_SAL]['Name'])) if name not in keep_minimums
    ])
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        .pipe(lambda df_: df_.loc[(df_['pos']!='CPT') ])# For single game contests
                        .assign(
                            name=lambda df_: df_.name.str.replace('.','', regex=False),
                        )
                        .pipe(lambda df_: df_.loc[(df_['name'].isin(drop_minimums) == False)])
                       )
    
    name_issues: dict[str,str] = {
        'Gabriel Davis': 'Gabe Davis',
        'Chigoziem Okonkwo': 'Chig Okonkwo',
        'Chig Okonkwo': 'Chigoziem Okonkwo',
        'Devon Achane': 'DeVon Achane',
        "De'Von Achane": 'Devon Achane',
        'Josh Palmer': 'Joshua Palmer',
        'Joshua Palmer': 'Josh Palmer',
    }
    

    # print(df['name'])
    
    fix_name: Callable[[str],str] = lambda name: ' '.join(name.split(' ')[:2])
    
    df['name'] = df['name'].map(lambda x: name_issues.get(x, fix_name(x)))
    df['team'] = df['team'].map(lambda x: inits_issues.get(x,x))
    
    df['input'] = tuple(zip(df['name'], df['team']))
    # df['input'] = df.loc[:,['name','team']].apply(tuple, axis=1) # Does the same thing
    df['output'] = df['input'].apply(lambda x: scrape_props(*x, 'draftkings'))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['3x'] = 3 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['3x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '3x'], axis=1)
          .assign(fpts_1k=lambda df_: 1000 * df_.fpts / df_.salary)
          .rename({'fpts_1k': 'fpts-1k'}, axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    single_game: bool = 'sg' in path or len(df['team'].drop_duplicates()) == 2
    
    if single_game:
        df = (df
              .assign(
                  cpt_pts=lambda df_: df_.fpts * 1.5,
                  cpt_sal=lambda df_: df_.salary * 1.5,
                  cpt_fpts_1k=lambda df_: 1000 * df_.cpt_pts / df_.cpt_sal,
              )
              .assign(
                  cpt_sal=lambda df_: df_.cpt_sal.astype('int')
              )
              .round(2)
             )
    
    df.to_csv(f'../data/draftkings-props{"-sg" if single_game else ""}.csv')
    # Save to optimizer
    # df.to_csv(f'/home/deegs/devel/repos/nba-dfs-git/nba-dfs/data/contest-files/draftkings/{"single-game" if single_game else "main-slate"}/projections/deegs/{date_path()}',
    #           # index=False
    #          )
    
    return None

In [12]:
# Temporary, just figuring out dynamics for now

def ScrapeProps(**kwargs):
    site: str = kwargs.get('site', 'draftkings')
    return scrape_fanduel(**kwargs) if site == 'fanduel' else scrape_draftkings(**kwargs)
    

In [13]:
def player_pool_distribution(df):
    df = (df
          .groupby('team')
          ['team']
          .agg(['count'])
          .set_axis(['num-players'], axis=1)
          .sort_values('num-players', ascending=False)
         )
    
    total_teams: int = len(pd
                           .read_csv(f'../data/current-draftkings{"-sg" if mode == "single-game" else ""}.csv', usecols=['TeamAbbrev'])
                           .rename({'TeamAbbrev': 'Team'}, axis=1)
                           ['Team']
                           .drop_duplicates()
                          )
    
    print(f'{len(df)} teams total...')
    print(f'Missing: {int(100*(1 - (len(df) / total_teams)))}% of teams...\n')
    
    return df

In [14]:
def output_box(msg: str, *args, **kwargs) ->  None:
    tb: str = ''.join(['   ', '-'*len(msg)])
    print(*[tb, f'   {msg}', tb], sep='\n')
    return


def load_slate(site: str, **kwargs):
    verbose: int = kwargs.get('verbose', 1)
    exclude = kwargs.get('exclude', list())
    drop = kwargs.get('drop', list())
    ret: pd.DataFrame = (pd
                         .read_csv(f'../data/{site}-props{"-sg" if mode == "single-game" else ""}.csv')
                         .pipe(lambda df_: df_.loc[df_['name'].isin(drop) == False])
                         .pipe(lambda df_: df_.loc[df_['team'].isin(exclude) == False])
                         .sort_values(by=kwargs.get('sort', 'fpts'), ascending=False)
                         .set_index('name')
                        )
    
    if verbose:
        msg = f'{len(ret)} total players'.upper()
        output_box(msg)
        print(player_pool_distribution(ret))
    
    return ret

def team_players(*args, **kwargs):
    df: pd.DataFrame = load_slate(site=site_)
    return {
        team: tuple(df
                    .loc[(df['team'] == team) & (df['value'] >= 0.0)]
                    .index
                   )
        for team in df['team'].drop_duplicates()
    }


def pos_value_players(site: str, *args, **kwargs) -> tuple[str,...]:
    ret: tuple[str,...] = tuple(load_slate(site=site, verbose=0, **kwargs)
                                .pipe(lambda df_: df_.loc[df_['value'] >= kwargs.get('value', 0.0)])
                                .sort_values('value', ascending=False)
                                .index
                               )
    
    msg = f'{len(ret)} total players'.upper()
    output_box(msg)
    return ret
# team_players()

In [15]:
import time
def output_times(func, **kwargs) -> None:
    """Wrapper function to print performance time in Xm Ys format"""
    start = time.perf_counter()
    func(**kwargs)
    stop = time.perf_counter()
    
    elapsed: float = (stop - start)/60.0
    
    elapsed_str: str = str(elapsed)
    minutes: int = int( elapsed_str.split('.')[0] )
    
    decimals: float = float( f'0.{elapsed_str.split(".")[1]}' )
    seconds: int = int(decimals * 60.0)
    
    performance_time: str = f'{minutes}m {seconds}s.'
    
    print(f"{func.__name__} performance time for {site.capitalize().replace('duel','Duel').replace('kings', 'Kings')}: {performance_time}\n")
    
    return None

In [33]:
# ScrapeProps(site=site)
last_update = pd.read_csv(f'../data/{site}-props{"-sg" if mode == "single-game" else ""}.csv').set_index('name')
output_times(ScrapeProps, site=site, drop_minimums=False, mute_touchdowns=False)

ScrapeProps performance time for DraftKings: 0m 10s.



In [34]:
df = load_slate(
    site,
    sort='e_fpts/$',
    # drop=['Amari Cooper'],
    # exclude=[]
).drop('fpts-1k', axis=1) #.pipe(lambda df_: df_.loc[df_.index.isin(['Geno Smith']) == False])#.head(10)""

   ----------------
   16 TOTAL PLAYERS
   ----------------
2 teams total...
Missing: 0% of teams...

      num-players
team             
PHI             8
TB              8


In [35]:
updated_players = list(set(df.index).difference(set(last_update.index)))
output = ['The following players have been added:']
output += [f'    > {name_}' for name_ in updated_players]
if not updated_players:
    output = ['No players updated since last scrape.']
print(*output, sep='\n')

No players updated since last scrape.


In [36]:
df.sort_values('fpts', ascending=False)
# df.sort_values('fpts/$', ascending=False)
# df.sort_values('salary')

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value,cpt_pts,cpt_sal,cpt_fpts_1k
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Jalen Hurts,FLEX,11600,PHI,18.38,8.91,1.58,0.77,-16.42,27.57,17400,1.58
Baker Mayfield,FLEX,9800,TB,15.86,8.04,1.62,0.82,-13.54,23.79,14700,1.62
DeVonta Smith,FLEX,8600,PHI,12.45,6.42,1.45,0.75,-13.35,18.67,12900,1.45
Rachaad White,FLEX,10200,TB,11.8,6.21,1.16,0.61,-18.8,17.7,15300,1.16
Mike Evans,FLEX,10600,TB,11.35,6.33,1.07,0.6,-20.45,17.02,15900,1.07
Dallas Goedert,FLEX,6400,PHI,10.55,5.11,1.65,0.8,-8.65,15.82,9600,1.65
Chris Godwin,FLEX,7800,TB,10.15,5.77,1.3,0.74,-13.25,15.23,11700,1.3
D'Andre Swift,FLEX,7200,PHI,8.7,4.75,1.21,0.66,-12.9,13.05,10800,1.21
Cade Otton,FLEX,5400,TB,4.95,2.69,0.92,0.5,-11.25,7.43,8100,0.92
Trey Palmer,FLEX,2800,TB,4.75,2.34,1.7,0.84,-3.65,7.12,4200,1.7


In [37]:
def convert_to_dict(df: pd.DataFrame) -> dict[str, float]:
    return {name: df.loc[name, 'fpts'] for name in df.index}
convert_to_dict(df)

{'Trey Palmer': 4.75,
 'Baker Mayfield': 15.86,
 'Dallas Goedert': 10.55,
 'Jalen Hurts': 18.38,
 'DeVonta Smith': 12.45,
 'Chris Godwin': 10.15,
 'Olamide Zaccheaus': 2.85,
 "D'Andre Swift": 8.7,
 'Quez Watkins': 3.35,
 'Rachaad White': 11.8,
 'Mike Evans': 11.35,
 'David Moore': 0.85,
 'Julio Jones': 3.55,
 'Kenneth Gainwell': 4.0,
 'Cade Otton': 4.95,
 'Chase Edmonds': 1.4}

In [21]:
team_dfs = {team: df.loc[df['team'] == team] for team in df['team'].drop_duplicates()}

In [22]:
team_dfs['PHI']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value,cpt_pts,cpt_sal,cpt_fpts_1k
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Dallas Goedert,FLEX,6400,PHI,10.55,5.11,1.65,0.8,-8.65,15.82,9600,1.65
Jalen Hurts,FLEX,11600,PHI,18.38,8.91,1.58,0.77,-16.42,27.57,17400,1.58
DeVonta Smith,FLEX,8600,PHI,12.45,6.42,1.45,0.75,-13.35,18.67,12900,1.45
Olamide Zaccheaus,FLEX,2000,PHI,2.85,1.33,1.42,0.66,-3.15,4.28,3000,1.42
D'Andre Swift,FLEX,7200,PHI,8.7,4.75,1.21,0.66,-12.9,13.05,10800,1.21
Quez Watkins,FLEX,3000,PHI,3.35,1.9,1.12,0.63,-5.65,5.03,4500,1.12
Julio Jones,FLEX,3800,PHI,3.45,1.97,0.91,0.52,-7.95,5.18,5700,0.91
Kenneth Gainwell,FLEX,4200,PHI,4.0,2.11,0.95,0.5,-8.6,6.0,6300,0.95


In [23]:
team_dfs['TB']

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value,cpt_pts,cpt_sal,cpt_fpts_1k
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Trey Palmer,FLEX,2800,TB,4.65,2.29,1.66,0.82,-3.75,6.98,4200,1.66
Baker Mayfield,FLEX,9800,TB,15.86,8.05,1.62,0.82,-13.54,23.79,14700,1.62
Chris Godwin,FLEX,7800,TB,10.15,5.77,1.3,0.74,-13.25,15.23,11700,1.3
Rachaad White,FLEX,10200,TB,11.8,6.21,1.16,0.61,-18.8,17.7,15300,1.16
Mike Evans,FLEX,10600,TB,11.35,6.33,1.07,0.6,-20.45,17.02,15900,1.07
David Moore,FLEX,800,TB,0.85,0.47,1.06,0.59,-1.55,1.27,1200,1.06
Cade Otton,FLEX,5400,TB,4.95,2.69,0.92,0.5,-11.25,7.43,8100,0.92
Chase Edmonds,FLEX,4400,TB,1.4,0.74,0.32,0.17,-11.8,2.1,6600,0.32


In [24]:
# def flatten(nestedSeq) -> list[list[str,...], ...]:
#     return [element for inner in nestedSeq for element in inner]

# def get_top_names(df: pd.DataFrame, n=2, by='value') -> pd.DataFrame:
#     """
#     Returns only the top n players from each team by provided parameter, defaults to value
#     """
#     df = df.sort_values(by, ascending=False)
#     top_names = {team: list(df.loc[df['team'] == team].index)[:n] for team in df['team'].drop_duplicates()}
    
#     return df.loc[df.index.isin(flatten(list(top_names.values())))]

In [25]:
# pos_dfs = dict()
# for pos in ('QB', 'RB', 'WR', 'TE', 'FLEX'): 
#     df[pos] = df['pos'].map(lambda pos_: pos_ in ['RB', 'WR', 'TE']) if pos == 'FLEX' else df['pos'].map(lambda pos_: int(pos in pos_))
#     pos_dfs[pos] = df.loc[df[pos] == 1].drop(pos, axis=1).sort_values('fpts', ascending=False)
#     df = df.drop(pos, axis=1)

In [26]:
# pos_dfs['WR']

In [27]:
# pos_dfs['RB']