### Player Outcomes

- The goal of this notebook is to take in the current contest data and produce the results for the following:
    - `proj-fpts` : median projection fantasy points (FP)
    - `ceiling-fpts`: 75% outcome of FP
    - `floor-fpts`: 25% outcome of FP
    
    - `boom-prob`: likliehood of player achieving *at least* 5x+10 FP
    - `bust-prob`: likliehood of player achieving *at most* 5x FP
    - `neutral-prob`: likliehood of player achieving between (5x, 5x+10) FP

- NOTES: these projections will not go to into depth, therefore results will not include outcomes for those specific lineups but rather just players for whole of season.
- This may cause issues for players who are missing high FP scoring players for current contest, players who have past outcomes distorted by long durations of team missing high FP scoring players, and for players who have been traded.

In [1]:
import numpy as np
import pandas as pd

In [2]:
def pandas_settings() -> None:
        for option in ('display.max_rows', 'display.max_columns', 'display.width', 'display.memory_usage'):
            pd.set_option(option, 250 if 'memory_usage' not in option else False)
        # message('pandas')
        return None
    
pandas_settings()

In [3]:
stats_: list[str,...] = [
    'name',
    'date',
    'team',
    'opp',
    'fpts',
    'mp',
    'fppm',
    'usg',
    'pts',
    'ast',
    'trb',
    'stl',
    'blk',
    'tov',
    'starter',
    'ast_perc',
    '3p'
]

In [5]:

# Helper function to pass into .agg for a groupby
# Calculates percentile outcome of given np array ()
# Example: percentile(50) = np.median
def percentile(n: int) -> float:
    def percentile_(arr):
        return np.percentile(arr, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

In [6]:
class Outcomes:
    def __init__(self, **kwargs):
        
        # Need to convert fantasy points if draftkings
        self.mode: str = kwargs.get('mode', 'fanduel')
        
        stats_: list[str,...] = [
            'name',
            'date',
            'team',
            'opp',
            'fpts',
            'mp',
            'fppm',
            'usg',
            'pts',
            'ast',
            'trb',
            'stl',
            'blk',
            'tov',
            'starter',
            'ast_perc',
            '3p'
        ]
        
        stats = sum([
            stats_,
            [stat for stat in kwargs.get('stats', list()) if stat not in stats_]
        ], list())
        
        self.szn: pd.DataFrame = (pd
                                  .read_csv('../data/season-data-clean.csv')
                                  .pipe(lambda df_: df_.loc[(df_['mp'] >= 8.0)])
                                  [stats]
                                 )
        
        fd_columns: dict[str, str] = {
            'Nickname': 'name',
            'Position': 'pos',
            'Team': 'team',
            'Salary': 'salary',
        }
        
        dk_columns: dict[str, str] = {
            'Name': 'name',
            'Roster Position': 'pos',
            'TeamAbbrev': 'team',
            'Salary': 'salary'
        }
        
        columns: dict[str, str] = fd_columns if self.mode=='fanduel' else dk_columns
        
        # Only fanduel gives injuries
        injured_players: tuple[str,...] = tuple(pd
                                                .read_csv('../data/current-fanduel.csv', usecols=['Nickname', 'Injury Indicator'])
                                                .rename({'Nickname': 'name', 'Injury Indicator': 'injury'}, axis=1)
                                                .pipe(lambda df_: df_.loc[df_['injury']=='O'])
                                                ['name']
                                               )
        
#         Right now only good for fanduel
        self.current: pd.DataFrame = (pd
                                      .read_csv(f'../data/current-{self.mode}.csv', usecols=columns)
                                      .rename(columns, axis=1)
                                      .pipe(lambda df_: df_.loc[df_['name'].isin(injured_players)==False])
                                      .assign(name=lambda df_: df_.name.str.replace('.','',regex=False))
                                      .sort_values('name')
                                      .set_index('name')
                                     )
    
        if self.mode == 'draftkings':
            
            name_issues: dict[str,str] = {
                'KJ Martin': 'Kenyon Martin',
                'Guillermo Hernangomez': 'Willy Hernangomez',
            }

            self.current.index = self.current.index.map(lambda x: ' '.join(x.split(' ')[:2]))
            self.current.index = self.current.index.map(lambda x: name_issues.get(x,x))
            
            self.current: pd.DataFrame = (self.current
                                          .assign(
                                            pos=lambda df_: df_.pos
                                            .str.replace('/[GF]/UTIL','', regex=True)
                                            .str.replace('C/UTIL','C',regex=False)
                                            .str.replace('/[GF]', '', regex=True)
                                          )
                                         )
            
            def conv_dk_to_fd(pts, ast, trb, stl, blk, tov, three):
                
                result = sum([
                    1.0*pts,
                    1.5*ast,
                    1.25*trb,
                    2*stl,
                    2*blk,
                    -0.5*tov,
                    0.5*three,
                ])
                
                # result = 1.0*pts + 1.5*ast + 1.25*trb + 2*stl + 2*blk + -0.5*tov + 0.5*three
                
                if pts >= 10.0:
                    if ast >= 10.0 and trb >= 10.0:
                        result += 3
                    elif ast >=10.0 or trb >= 10.0:
                        result += 1.5
                        
                return result
            
            self.szn['fpts'] = self.szn[['pts', 'ast', 'trb', 'stl', 'blk', 'tov', '3p']].apply(lambda vals: conv_dk_to_fd(*vals), axis=1)
    
        self.players: tuple[str,...] = tuple(self.current.index)
    
    
    def add_projections(self, **kwargs):
        
        stat: str = kwargs.get('stat', 'fpts')
        
        percs_df: pd.DataFrame = (self.szn
                                  # .loc[self.szn['name'].isin(self.players)]
                                  .groupby('name')
                                  [stat]
                                  .agg([
                                      percentile(25), 
                                      percentile(50), # same as np.median
                                      percentile(75),
                                      'count',
                                      np.std
                                  ])
                                  .set_axis([
                                      f'floor-{stat}',
                                      f'median-{stat}',
                                      f'ceiling-{stat}',
                                      'games',
                                      f'std-{stat}'
                                  ], axis=1)
                                  # Piping at end to filter for current players is faster than slicing szn at beginning
                                  .pipe(lambda df_: df_.loc[df_.index.isin(self.players)])
                                 )
        
        # self.current: pd.DataFrame = (self.current
        #                               .loc[self.current.index.isin(percs_df.index)]
        #                              )
        
        return percs_df
    
    
    def add_probabilities(self):
        
        usecols = {'fanduel':['Nickname', 'Salary'], 'draftkings': ['Name', 'Salary']}
        
        info = (pd
                .read_csv(f'../data/current-{self.mode}.csv', usecols=usecols[self.mode])
                .set_axis(['name', 'salary'], axis=1)
                .set_index('name')
                .to_dict()
               )
        
        salaries = {k: v for k,v in info['salary'].items()}
        
        
        def calc_boom(vals):
            # print(vals.index[0])
            # Not efficient at all
            name = self.szn.loc[vals.index[0], 'name']
            sal, games = salaries[name], len(vals) 
            boom = (5.0*(sal/1000)) + 10
            return len(vals.loc[vals>=boom]) / games
        
        def calc_bust(vals):
            name = self.szn.loc[vals.index[0], 'name']
            sal, games = salaries[name], len(vals) 
            bust = 5.0*(sal/1000)
            return len(vals.loc[vals<bust]) / games
        
        
        probs_df: pd.DataFrame = (self.szn
                                  .loc[self.szn['name'].isin(salaries)]
                                  .groupby('name')
                                  ['fpts']
                                  # .apply(calc_boom)
                                  .agg([
                                      calc_boom,
                                      calc_bust,
                                  ])
                                  .set_axis(['boom', 'bust'], axis=1)
                                  .assign(
                                      boom=lambda df_: df_.boom*100,
                                      bust=lambda df_: df_.bust*100,
                                      fivex=lambda df_: 100.0-df_.boom-df_.bust
                                  )
                                  # .round()
                                 )
        
        return probs_df
        
    
    
    def load(self, **kwargs) -> pd.DataFrame:
        
        stat: str = kwargs.get('stat', 'fpts')
        
        df: pd.DataFrame = (pd
                            .concat([
                                self.current,
                                self.add_projections(stat=stat),
                                self.add_probabilities(),
                            ], axis=1)
                            .dropna()
                            .assign(
                                games=lambda df_: df_.games.astype('uint16'),
                                salary=lambda df_: df_.salary.astype('int')
                            )
                            .pipe(
                                lambda df_: df_.loc[(df_['games'] >= 5)]
                            )
                           )
        
        if kwargs.get('add_per_dollar', False):
            
            df: pd.DataFrame = (df
                                .assign(
                                    f_per_dollar=lambda df_: 1_000 * df_[f'floor-{stat}'] / df_.salary,
                                    med_per_dollar=lambda df_: 1_000 * df_[f'median-{stat}'] / df_.salary,
                                    c_per_dollar=lambda df_: 1_000 * df_[f'ceiling-{stat}'] / df_.salary
                                )
                                .rename({
                                    'f_per_dollar': 'floor/$',
                                    'med_per_dollar': 'med/$',
                                    'c_per_dollar': 'ceiling/$'
                                }, axis=1)
                               )
            
        if 'salary' in kwargs:
            df: pd.DataFrame = df.loc[df['salary'] >= kwargs['salary']]
            
        return (df
                .sort_values(
                    kwargs.get('sort', f'median-{stat}'), 
                    ascending=False
                )
                .round({'fpts': 1, 'mp': 1}.get(stat,2))
               )

In [7]:
outcomes = Outcomes(mode='draftkings')

In [12]:
# %%timeit
outcomes.load(sort='boom')

Unnamed: 0_level_0,pos,salary,team,floor-fpts,median-fpts,ceiling-fpts,games,std-fpts,boom,bust,fivex
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Christian Wood,C,5400,DAL,22.8,31.2,40.2,55,12.8,36.4,40.0,23.6
John Collins,PF/C,4000,ATL,19.2,25.0,34.2,57,10.5,35.1,28.1,36.8
Kyle Lowry,PG,4500,MIA,19.2,28.2,34.9,44,11.5,34.1,31.8,34.1
Shai Gilgeous-Alexander,PG,9200,OKC,43.8,50.1,58.8,56,10.3,30.4,33.9,35.7
Max Strus,SG/SF,3500,MIA,15.1,22.1,28.9,66,9.5,28.8,34.8,36.4
Bol Bol,PF/C,3400,ORL,12.2,20.2,27.5,62,10.7,27.4,37.1,35.5
Cameron Payne,PG,3500,PHX,15.8,22.2,28.0,33,11.7,27.3,33.3,39.4
Onyeka Okongwu,C,3700,ATL,17.5,21.0,28.8,65,9.8,26.2,27.7,46.2
Deandre Ayton,C,7500,PHX,27.0,36.8,46.2,56,12.7,25.0,53.6,21.4
Dejounte Murray,PG/SG,7900,ATL,34.8,39.5,49.0,61,11.4,24.6,45.9,29.5


In [9]:
# (outcomes
#  .load(sort='med/$').pipe(lambda df_: df_.loc[df_['team'].isin(['NYK', 'SAC'])])
# )