In [1]:
import os
import requests
import datetime

import numpy as np
import pandas as pd

from bs4 import BeautifulSoup

from typing import Callable

In [2]:
site_: str = 'fanduel'

In [3]:
def pandas_settings() -> None:
        for option in ('display.max_rows', 'display.max_columns', 'display.width', 'display.memory_usage'):
            pd.set_option(option, 250 if 'memory_usage' not in option else False)
        # message('pandas')
        return None
    
pandas_settings()

In [4]:
class Conversions:
    
    def __init__(self):
        
        self.inits_issues: dict[str,str] = {
            'SAS': 'SA',
            'PHX': 'PHO',
            'GSW': 'GS',
            'NOP': 'NO',
            'NYK': 'NY'
        }
        
        self.inits_teams: dict[str, str] = {
            'NY': 'New York Knicks',
            'LAL': 'Los Angeles Lakers',
            'MIA': 'Miami Heat',
            'UTA': 'Utah Jazz',
            'PHO': 'Phoenix Suns',
            'LAC': 'Los Angeles Clippers',
            # 'LAC': 'LA Clippers',
            'PHI': 'Philadelphia 76ers',
            'DAL': 'Dallas Mavericks',
            'DEN': 'Denver Nuggets',
            'BOS': 'Boston Celtics',
            'ATL': 'Atlanta Hawks',
            'CLE': 'Cleveland Cavaliers',
            'DET': 'Detroit Pistons',
            'TOR': 'Toronto Raptors',
            'CHA': 'Charlotte Hornets',
            'ORL': 'Orlando Magic',
            'MEM': 'Memphis Grizzlies',
            'SA': 'San Antonio Spurs',
            'MIL': 'Milwaukee Bucks',
            'IND': 'Indiana Pacers',
            'CHI': 'Chicago Bulls',
            'OKC': 'Oklahoma City Thunder',
            'GS': 'Golden State Warriors',
            'HOU': 'Houston Rockets',
            'BKN': 'Brooklyn Nets',
            'POR': 'Portland Trail Blazers',
            'NO': 'New Orleans Pelicans',
            'MIN': 'Minnesota Timberwolves',
            'SAC': 'Sacramento Kings',
            'WAS': 'Washington Wizards'
        }

        # Invert
        self.teams_inits: dict[str,str] = { val: key for key, val in self.inits_teams.items() }
        
#         scoresandodds.com: FanDuel name
        self.name_issues: dict[str,str] = {
            'Lu Dort': 'Luguentz Dort',
            'Moe Wagner': 'Moritz Wagner',
            'KJ Martin': 'Kenyon Martin',
            'Devonte Graham': "Devonte' Graham"
        }
    
        
    def team_name(self, team_str: str) -> str:
        return self.teams_inits[team_str]
    
    def team_initials(self, team_init_str: str) -> str:
        return self.inits_teams[team_init_str]
    
    def player_name(self, name: str):
        return self.name_issues.get(name,name)
    
    def initals_issue(self, team_inits: str) -> str:
        return self.inits_issues.get(team_inits,team_inits)

In [5]:
class PropsScraper:
    
    def __init__(self):
        self.convert = Conversions()
        self.directory_url: str = 'https://www.scoresandodds.com/nba/players'
        
        self.current_date_str = datetime.datetime.now().strftime("%m/%d")
        
        # self.prop_frames = list()
        
    
#     Creates a dictionary containing the links to current and historical props
#     for every player in the NBA, organized by team
    def create_webpage_directory(self) -> dict[str, dict[str, str]]:
        
#         Load HTML into bs4
        soup = BeautifulSoup(
            requests.get(self.directory_url).text,
            'html.parser'
        )

#         Load each team data into dictionary, converting the full team name into initials as used in rest of data
        team_modules = {
            self.convert.team_name(team_html.find('h3').get_text()): team_html.find_all('div', class_='module-body')[0].find('ul')
            for team_html in soup.find_all('div', class_='module')
        }
        
        
        clean_name: Callable[[str],str] = lambda name: self.convert.player_name(' '.join(name.split(' ')[:2]).replace('.', ''))
        
#         Parse HTML data for each team to organize links in easily searchable manner
        teams_players_links: dict[str, dict[str, str]] = {
            
            team: {
                clean_name(a_tag.get_text()): self.directory_url.replace(
                    '/nba/players',
                    a_tag['href']
                )
                for a_tag in module.find_all('a')
            }
            
            for team, module in team_modules.items()
            
        }
        
        return teams_players_links
    
    # Implied Probability = 100 / (Odds + 100)
    @staticmethod
    def pos_ml_prob(ml: str) -> float:
        return 100 / sum([int(ml[1:]),100])
    
    # Implied Probability = (-1*(Odds)) / (-1(Odds) + 100) ->
    @staticmethod
    def neg_ml_prob(ml: str) -> float:
        ml: int = int(ml)
        return (-1*ml) / sum([-1*ml,100])
        
    @classmethod
    def implied_probability(cls, ml: str):
        if ml == '+100':
            return 0.5
        
        return cls.pos_ml_prob(ml) if ml[0]=='+' else cls.neg_ml_prob(ml)
    
    @classmethod
    def expected_value(cls, val: float, ml: str) -> float:
        return cls.implied_probability(ml)*val
        
    def scrape_player_props(
        self, 
        name: str, 
        url: str, 
        site: str
    ) -> tuple[float,float]:
        
#         Load HTML
        soup = BeautifulSoup(
            requests.get(url).text, 
            'html.parser'
        )
        
        module = soup.find('div', class_="module-body scroll")
        
        try:
            if not len(module.find_all('span')):
                return (0.0,0.0)
        except AttributeError:
            return (0.0, 0.0)
        
#         Make sure current
        zerofill = lambda dp: f'0{dp}' if len(dp) == 1 else dp
        date_str = '/'.join([
            zerofill(dp) for dp in module.find_all('span')[2].get_text().split(' ')[1].split('/')
        ])
        
        if date_str != self.current_date_str:
            return (0.0,0.0)
        
        props_rows = module.find('table', class_='sticky').find('tbody').find_all('tr')
        
        # Steals, blocks are options but noisy, better to use season data for opponents
        
        site_targets: dict[str,tuple[str,...]] = {
            'fanduel': (
                'Points', 
                'Rebounds', 
                'Assists',
                'Steals',
                'Blocks'
            ),
            'draftkings': ('Points', 'Rebounds', 'Assists', '3 Pointers')
        }
        
        targets = site_targets[site]
        
#         Form: Category Line Over Under
        target_rows = [row for row in props_rows if row.find('td').get_text() in targets]
    
    
#     TODO: Figure out more efficient way for this, dict(zip()) probably best
        props = {
            # 'name': list(),
            'stat': list(),
            'value': list(),
            # 'over': list(),
            'e_value': list(),
            'fpts': list(),
            'e_fpts': list()
            # 'under': list()
        }
        
        
        site_multipliers: dict[str,dict[str,float]] = {
            'fanduel': {'assists': 1.5, 'rebounds': 1.2, 'blocks': 3.0, 'steals': 3.0, '3 pointers': 0.0},
            'draftkings': {'assists': 1.5, 'rebounds': 1.25, '3 pointers': 0.5, 'blocks': 2.0, 'steals': 2.0, }
        }
        
        multipliers: dict[str,float] = site_multipliers[site]
        for rowtags in target_rows:
            vals = [val.get_text().lower() for val in rowtags.find_all('td')] # (Category, Line, Over, Under)
            
            stat: str = vals[0]
            props['stat'].append(stat)
            
            statval = sum([float(vals[1]), 0.5])
            props['value'].append(statval)
            
            overml: str = vals[2]
            
            props['e_value'].append(self.expected_value(statval, overml))
            
            multi: float = multipliers.get(stat, 1.0)
            fpts: float = multi*statval
            
            props['fpts'].append(fpts)
            props['e_fpts'].append(self.expected_value(fpts, overml))
            # props['under'].append(vals[3])
        
        
        df: pd.DataFrame = pd.DataFrame(props).round(2)
        
        
        return (df['fpts'].sum(), df['e_fpts'].sum())

In [6]:
Props = PropsScraper()
directory: dict[str,dict[str,str]] = Props.create_webpage_directory()

In [7]:
# In case webpage goes down again
def save_directory():
    
    df_data: dict[str, list[str,...]] = {
        'team': list(),
        'name': list(),
        'url': list()
    }

    for team, player_links in directory.items():
        for name,url in player_links.items():
            df_data['team'].append(team)
            df_data['name'].append(name)
            df_data['url'].append(url)
            
    df: pd.DataFrame = pd.DataFrame(df_data)
    df.to_csv('../data/url-directory.csv', index=False)
    
    return None


# def load_directory():
#     df: pd.DataFrame = pd.read_csv('../data/url-directory.csv')
    
#     team_dfs: dict[str, pd.DataFrame] = {
#         team: (df
#                .loc[df['team']==team]
#                .set_index('name')
#                .drop(['team'], axis=1)
#                .T
#                .to_dict()
#               )
#         for team in df['team'].drop_duplicates()
#     }
    
#     for team in team_dfs:
        
    
    
        

In [8]:
if len(directory):
    save_directory()

In [9]:
def test_(name: str, team: str, **kwargs):
    try:
        return Props.scrape_player_props(
            name,
            directory[team][name],
            kwargs.get('site', 'fanduel')
        )
    
    except KeyError:
        return (0.0,0.0)

In [10]:
def check_site():
    try:
        assert(len(directory))
    except AssertionError:
        return 'ScoresAndOdds.com is down, or at least the page containing links is empty...'
    
    return 'No Issues'
    

In [11]:
check_site()

'No Issues'

In [12]:
def scrape_fanduel():
    
    path: str = '../data/current-fanduel.csv'
    
    columns: dict[str, str] = {
        'Nickname': 'name',
        'Position': 'pos',
        'Team': 'team',
        'Salary': 'salary',
        'Injury Indicator': 'injury',
    }
    
    ignores: tuple[str,...] = ('Isaiah Roby', 'Frank Jackson')
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        .pipe(lambda df_: df_.loc[df_['injury']!='O'])
                        .drop('injury', axis=1)
                        # .pipe(lambda df_: df_.loc[df_['salary']>3500])
                        .pipe(lambda df_: df_.loc[df_['name'].isin(ignores) == False])
                        .assign(name=lambda df_: df_.name.str.replace('.','',regex=False))
                       )
    
    df['input'] = tuple(zip(df['name'], df['team']))
    df['output'] = df['input'].apply(lambda x: test_(*x))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['5x'] = 5 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['5x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '5x'], axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    df.to_csv('../data/fanduel-props.csv')
    
    return None

In [13]:
def scrape_draftkings():

    path: str = '../data/current-draftkings.csv'
    
    columns: dict[str, str] = {
        'Name': 'name',
        'Roster Position': 'pos',
        'TeamAbbrev': 'team',
        'Salary': 'salary'
    }
    
    inits_issues = {
        'SAS': 'SA',
        'PHX': 'PHO',
        'GSW': 'GS',
        'NOP': 'NO',
        'NYK': 'NY'
    }
    
    ignores: tuple[str,...] = ('Isaiah Roby', 'Frank Jackson')
    
    df: pd.DataFrame = (pd
                        .read_csv(path, usecols=columns)
                        .rename(columns,axis=1)
                        # .pipe(lambda df_: df_.loc[df_['salary']>3000])
                        .pipe(lambda df_: df_.loc[df_['pos']!='CPT'])
                        .pipe(lambda df_: df_.loc[df_['name'].isin(ignores) == False])
                        .assign(
                            name=lambda df_: df_.name.str.replace('.','', regex=False),
                            pos=lambda df_: df_.pos
                            .str.replace('/[GF]/UTIL','', regex=True)
                            .str.replace('C/UTIL','C',regex=False)
                            .str.replace('/[GF]', '', regex=True)
                        )
                       )
    
    name_issues: dict[str,str] = {
        'KJ Martin': 'Kenyon Martin',
        'Guillermo Hernangomez': 'Willy Hernangomez',
    }
    
    df['name'] = df['name'].map(lambda x: ' '.join(x.split(' ')[:2]))
    df['name'] = df['name'].map(lambda x: name_issues.get(x,x))
    
    df['team'] = df['team'].map(lambda x: inits_issues.get(x,x))
    
    df['input'] = tuple(zip(df['name'], df['team']))
    df['output'] = df['input'].apply(lambda x: test_(*x))
    
    df['fpts'] = df['output'].map(lambda x: x[0])
    df['e_fpts'] = df['output'].map(lambda x: x[1])
    
    
    for col in ('fpts', 'e_fpts'):
        df[f'{col}/$'] = 1000 * (df[col] / df['salary'])
    
    df['5x'] = 5 * (df['salary'] / 1000)
    df['value'] = df['fpts'] - df['5x']
    
    df = (df
          .loc[df['fpts']>0.0]
          .drop(['input', 'output', '5x'], axis=1)
          .sort_values('value', ascending=False)
          .set_index('name')
          .round(2)
         )
    
    df.to_csv('../data/draftkings-props.csv')
    
    return None

In [14]:
# Temporary, just figuring out dynamics for now

def scrape_slate(**kwargs):
    site: str = kwargs.get('site', 'fanduel')
    return scrape_fanduel() if site == 'fanduel' else scrape_draftkings()
    

In [15]:
def load_slate(**kwargs):
    site: str = kwargs.get('site', 'fanduel')
    return (pd
            .read_csv(f'../data/{site}-props.csv')
            .sort_values(by=kwargs.get('sort', 'fpts'), ascending=False)
            .set_index('name')
           )

In [16]:
scrape_slate(site=site_)

In [17]:
load_slate(
    site=site_, 
    # sort='e_fpts/$'
    sort='value'
    # sort='fpts/$'
)

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Andrew Nembhard,SG/PG,4300,IND,36.6,19.71,8.51,4.58,15.1
Buddy Hield,SG/SF,6200,IND,44.7,21.84,7.21,3.52,13.7
Isaiah Jackson,PF,4100,IND,32.4,17.83,7.9,4.35,11.9
Chris Duarte,SG/SF,4100,IND,30.8,16.25,7.51,3.96,10.3
Marvin Bagley,PF/C,5600,DET,37.8,19.11,6.75,3.41,9.8
Jordan Nwora,PF/SF,5200,IND,34.2,16.87,6.58,3.24,8.2
Nick Richards,C,5400,CHA,34.5,19.05,6.39,3.53,7.5
Tim Hardaway,SG/SF,5600,DAL,35.5,19.02,6.34,3.4,7.5
Lauri Markkanen,PF/SF,9000,UTA,52.5,26.17,5.83,2.91,7.5
Josh Okogie,SF/SG,4700,PHO,31.0,15.22,6.6,3.24,7.5


In [18]:
load_slate(site_=site_, sort='fpts')

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Jayson Tatum,PF/SF,10800,BOS,57.0,31.24,5.28,2.89,3.0
Shai Gilgeous-Alexander,SG/PG,10500,OKC,56.2,29.42,5.35,2.8,3.7
Devin Booker,SG,10400,PHO,55.7,28.28,5.36,2.72,3.7
Trae Young,PG,9600,ATL,53.6,26.77,5.58,2.79,5.6
Lauri Markkanen,PF/SF,9000,UTA,52.5,26.17,5.83,2.91,7.5
De'Aaron Fox,PG,9400,SAC,51.3,26.8,5.46,2.85,4.3
Stephen Curry,PG,10000,GS,51.2,26.08,5.12,2.61,1.2
Domantas Sabonis,C/PF,10100,SAC,51.1,28.63,5.06,2.83,0.6
Jimmy Butler,SF/PF,9200,MIA,49.4,24.53,5.37,2.67,3.4
Jaylen Brown,SF/SG,9100,BOS,49.1,25.2,5.4,2.77,3.6


In [19]:
def compare_players(*args, **kwargs):
    return (pd
            .read_csv(f'../data/{kwargs.get("site", "fanduel")}-props.csv')
            .set_index('name')
            .sort_values(kwargs.get('sort', 'value'), ascending=False)
            .pipe(lambda df_: df_.loc[df_.index.isin(args)])
           )

In [20]:
def positions_included(*args) -> tuple[str,...]:
    positions: tuple[str,...] = (
        'PF',
        'PG/SG',
        'SG',
        'PF/C',
        'SF/SG',
        'SG/PG',
        'SG/SF',
        'C',
        'PF/SF',
        'SF/PF',
        'PG',
        'C/PF',
        'SF'
    )
    
    if not len(args) or args[0] is None:
        return positions
    
    pos: str = args[0]
    
    return tuple([p for p in positions if pos in p.split('/')]) if len(pos.split('/')) == 1 else (pos,)

In [21]:
def payup_options(**kwargs):
    

    #Add df.query
    df: pd.DataFrame = (pd
                        .read_csv(f'../data/{kwargs.get("site", "fanduel")}-props.csv')
                        .set_index('name')
                       )
    
    salaries = (
        kwargs.get('minsal', df['salary'].min()),
        kwargs.get('maxsal', df['salary'].max())+1
    )
    
    positions = kwargs.get(
        'pos',
        tuple(df['pos'].drop_duplicates())
    )
    
    if isinstance(positions, str):
        positions = positions_included(positions)
    
    
    #Add better selector for site
    names: tuple[str,...] = tuple(df
                                  .loc[df['salary'].isin(range(*salaries, 100))
                                       & (df['pos'].isin(positions))
                                      ]
                                  .index
                                 )
    
    return compare_players(*names,**kwargs)

In [22]:
payup_options(
    site=site_,
    minsal=9000,
    # maxsal=4800,
    sort='e_fpts/$',
    # sort='e_fpts',
    # pos='PF'
)

Unnamed: 0_level_0,pos,salary,team,fpts,e_fpts,fpts/$,e_fpts/$,value
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Lauri Markkanen,PF/SF,9000,UTA,52.5,26.17,5.83,2.91,7.5
Jayson Tatum,PF/SF,10800,BOS,57.0,31.24,5.28,2.89,3.0
De'Aaron Fox,PG,9400,SAC,51.3,26.8,5.46,2.85,4.3
Domantas Sabonis,C/PF,10100,SAC,51.1,28.63,5.06,2.83,0.6
Shai Gilgeous-Alexander,SG/PG,10500,OKC,56.2,29.42,5.35,2.8,3.7
Trae Young,PG,9600,ATL,53.6,26.77,5.58,2.79,5.6
Jaylen Brown,SF/SG,9100,BOS,49.1,25.2,5.4,2.77,3.6
Devin Booker,SG,10400,PHO,55.7,28.28,5.36,2.72,3.7
Jimmy Butler,SF/PF,9200,MIA,49.4,24.53,5.37,2.67,3.4
Stephen Curry,PG,10000,GS,51.2,26.08,5.12,2.61,1.2
