### Player Outcomes

- The goal of this notebook is to take in the current contest data and produce the results for the following:
    - `proj-fpts` : median projection fantasy points (FP)
    - `ceiling-fpts`: 75% outcome of FP
    - `floor-fpts`: 25% outcome of FP
    
    - `boom-prob`: likliehood of player achieving *at least* 5x+10 FP
    - `bust-prob`: likliehood of player achieving *at most* 5x FP
    - `neutral-prob`: likliehood of player achieving between (5x, 5x+10) FP

- NOTES: these projections will not go to into depth, therefore results will not include outcomes for those specific lineups but rather just players for whole of season.
- This may cause issues for players who are missing high FP scoring players for current contest, players who have past outcomes distorted by long durations of team missing high FP scoring players, and for players who have been traded.

In [1]:
import numpy as np
import pandas as pd

In [2]:
def pandas_settings() -> None:
        for option in ('display.max_rows', 'display.max_columns', 'display.width', 'display.memory_usage'):
            pd.set_option(option, 250 if 'memory_usage' not in option else False)
        # message('pandas')
        return None
    
pandas_settings()

In [3]:

# Helper function to pass into .agg for a groupby
# Calculates percentile outcome of given np array ()
# Example: percentile(50) = np.median
def percentile(n: int) -> float:
    def percentile_(arr):
        return np.percentile(arr, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

In [4]:
class Outcomes:
    def __init__(self, **kwargs):
        
        # Need to convert fantasy points if draftkings
        mode: str = kwargs.get('mode', 'fanduel')
        
        stats_: list[str,...] = [
            'name',
            'date',
            'team',
            'opp',
            'fpts',
            'mp',
            'fppm',
            'usg',
            'pts',
            'ast',
            'trb',
            'stl',
            'blk',
            'tov',
            'starter',
            'ast_perc',
            '3p'
        ]
        
        stats = sum([
            stats_,
            [stat for stat in kwargs.get('stats', list()) if stat not in stats_]
        ], list())
        
        self.szn: pd.DataFrame = (pd
                                  .read_csv('../data/season-data-clean.csv')
                                  .pipe(lambda df_: df_.loc[(df_['mp'] >= 8.0)])
                                  [stats]
                                 )
        
        fd_columns: dict[str, str] = {
            'Nickname': 'name',
            'Position': 'pos',
            'Team': 'team',
            'Salary': 'salary',
        }
        
        dk_columns: dict[str, str] = {
            'Name': 'name',
            'Roster Position': 'pos',
            'TeamAbbrev': 'team',
            'Salary': 'salary'
        }
        
        columns: dict[str, str] = fd_columns if mode=='fanduel' else dk_columns
        
        # Only fanduel gives injuries
        injured_players: tuple[str,...] = tuple(pd
                                                .read_csv('../data/current-fanduel.csv', usecols=['Nickname', 'Injury Indicator'])
                                                .rename({'Nickname': 'name', 'Injury Indicator': 'injury'}, axis=1)
                                                .pipe(lambda df_: df_.loc[df_['injury']=='O'])
                                                ['name']
                                               )
        
#         Right now only good for fanduel
        self.current: pd.DataFrame = (pd
                                      .read_csv(f'../data/current-{mode}-sg.csv', usecols=columns)
                                      .rename(columns, axis=1)
                                      .pipe(lambda df_: df_.loc[df_['name'].isin(injured_players)==False])
                                      .assign(name=lambda df_: df_.name.str.replace('.','',regex=False))
                                      .sort_values('name')
                                      .set_index('name')
                                     )
    
        if mode == 'draftkings':
            
            name_issues: dict[str,str] = {
                'KJ Martin': 'Kenyon Martin',
                'Guillermo Hernangomez': 'Willy Hernangomez',
            }

            self.current.index = self.current.index.map(lambda x: ' '.join(x.split(' ')[:2]))
            self.current.index = self.current.index.map(lambda x: name_issues.get(x,x))
            
            self.current: pd.DataFrame = (self.current
                                          .assign(
                                            pos=lambda df_: df_.pos
                                            .str.replace('/[GF]/UTIL','', regex=True)
                                            .str.replace('C/UTIL','C',regex=False)
                                            .str.replace('/[GF]', '', regex=True)
                                          )
                                         )
            
            def conv_dk_to_fd(pts, ast, trb, stl, blk, tov, three):
                
                result = sum([
                    1.0*pts,
                    1.5*ast,
                    1.25*trb,
                    2*stl,
                    2*blk,
                    -0.5*tov,
                    0.5*three,
                ])
                
                # result = 1.0*pts + 1.5*ast + 1.25*trb + 2*stl + 2*blk + -0.5*tov + 0.5*three
                
                if pts >= 10.0:
                    if ast >= 10.0 and trb >= 10.0:
                        result += 3
                    elif ast >=10.0 or trb >= 10.0:
                        result += 1.5
                        
                return result
            
            self.szn['fpts'] = self.szn[['pts', 'ast', 'trb', 'stl', 'blk', 'tov', '3p']].apply(lambda vals: conv_dk_to_fd(*vals), axis=1)
    
        self.players: tuple[str,...] = tuple(self.current.index)
    
    
    def add_projections(self):
        
        percs_df: pd.DataFrame = (self.szn
                                  .groupby('name')
                                  ['fpts']
                                  .agg([
                                      percentile(25), 
                                      percentile(50), # same as np.median
                                      percentile(75),
                                      'count',
                                      np.std
                                  ])
                                  .set_axis(['floor', 'med', 'ceiling', 'games', 'std'], axis=1)
                                  .pipe(lambda df_: df_.loc[df_.index.isin(self.players)])
                                 )
        
        # self.current: pd.DataFrame = (self.current
        #                               .loc[self.current.index.isin(percs_df.index)]
        #                              )
        
        return percs_df
        
        
    
    
    def load(self, **kwargs) -> pd.DataFrame:
        return (pd
                .concat([
                    self.current,
                    self.add_projections(),
                    # self.add_probabilities(),
                ], axis=1)
                .dropna()
                .sort_values('med', ascending=False)
                .assign(
                    games=lambda df_: df_.games.astype('uint16'),
                    f_per_dollar=lambda df_: 1000 * df_.floor / df_.salary,
                    med_per_dollar=lambda df_: 1000 * df_.med / df_.salary,
                    c_per_dollar=lambda df_: 1000 * df_.ceiling / df_.salary
                )
                .rename({
                    'med': 'median',
                    'f_per_dollar': 'floor/$',
                    'med_per_dollar': 'med/$',
                    'c_per_dollar': 'ceiling/$'
                }, axis=1)
                .round(2)
                .pipe(lambda df_: df_.loc[(df_['games'] >= 5) ])
                .sort_values(by=kwargs.get('sort', 'med/$'), ascending=False)
               )

In [5]:
outcomes = Outcomes(mode='fanduel')

In [6]:
outcomes.load(sort='ceiling/$')

Unnamed: 0_level_0,pos,salary,team,floor,median,ceiling,games,std,floor/$,med/$,ceiling/$
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Gary Trent,SG,8500,TOR,21.7,29.0,33.67,58,10.46,2.55,3.41,3.96
OG Anunoby,SF/SG,10000,TOR,25.6,30.2,38.6,53,10.63,2.56,3.02,3.86
Anthony Davis,PF/C,16500,LAL,40.3,56.5,61.3,41,15.86,2.44,3.42,3.72
Scottie Barnes,PF/SF,11500,TOR,27.02,35.1,41.47,64,10.78,2.35,3.05,3.61
Lonnie Walker,SG/SF,7500,LAL,12.35,18.6,26.9,47,9.37,1.65,2.48,3.59
Precious Achiuwa,PF/C,7500,TOR,14.12,21.35,26.55,42,10.79,1.88,2.85,3.54
Fred VanVleet,PG,13500,TOR,30.85,39.8,47.7,55,13.72,2.29,2.95,3.53
Pascal Siakam,PF,14500,TOR,36.4,42.7,51.1,57,10.99,2.51,2.94,3.52
Chris Boucher,C/PF,8000,TOR,14.85,21.6,26.85,60,9.89,1.86,2.7,3.36
Jakob Poeltl,C,12000,TOR,24.8,30.6,38.8,57,11.83,2.07,2.55,3.23


In [7]:
# (outcomes
#  .load(sort='med/$').pipe(lambda df_: df_.loc[df_['team'].isin(['NYK', 'SAC'])])
# )