#### Note: 

- Slate is defined as that day of daily fantasy competitions, pretty much synonymous with date --> slate is all games played that day
- For the time being, this dataset will not accomodate unusual schedules as it does not include start times for games, so it will just include all games played that day.
    - `slate_size` is going to be a column to refer to how many games were played on a given day, i.e. the number of teams for each day divided by 2
    - This will be useful to analyze patterns in optimal lineups, for example:
        - How many players from a team / game would one want to include in their 9-player lineup
        - Is it better to focus on certain games and ignore others, or is it better to try and have the best players from each team
        - How much of an impact does `total_pts` and `diff_score` have on fpts outcomes

In [1]:
import pandas as pd
import numpy as np

# pandas settings....

from file_manager import FileManager
fm = FileManager()

In [2]:
pd.options.display.max_rows = 200

In [3]:
def categorizing_slates(**kwargs):
    df: pd.DataFrame = fm.load_clean_data()

    ret = (df
           .drop_duplicates(['date', 'team'])
           .groupby('date')
           ['team']
           .agg(['count'])
          )
    
    slate_sizes = {date: ret.loc[date, 'count'] // 2 for date in ret.index}
    
    for date, size in slate_sizes.items():
        df.loc[df['date']==date, 'slate_size'] = size
    
    df['slate_size'] = df['slate_size'].astype('uint8')
    
    # Categorize slates by quartile, excluding slates which include single game as that is different format
    
    df: pd.DataFrame = df.loc[df['slate_size']>1]
    
    slate_descrip = df['slate_size'].describe()
    
    slate_nums_categories = {
        tuple(range(*(int(slate_descrip['min']), int(slate_descrip['25%'])))): 1, # first element will always be 2
        tuple(range(*(int(slate_descrip['25%']), int(slate_descrip['50%'])))): 2,
        tuple(range(*(int(slate_descrip['50%']), int(slate_descrip['75%'])))): 3,
        tuple(range(*(int(slate_descrip['75%']), int(slate_descrip['max'])+1))): 4,
        
    }
    
    for nums, cat in slate_nums_categories.items():
        df.loc[ df['slate_size'].isin(nums), 'slate_cat' ] = cat
    
    df['slate_cat'] = df['slate_cat'].astype('uint8')
    
    # Also be useful to label each matchup to see if some games command disproportionate amounts of fantasy points
    df['matchup'] = tuple(zip(df['team'], df['opp']))
    df['matchup'] = df['matchup'].map(lambda x: tuple(sorted(x)))
    
    # make list of matchups then assign index for each date
    # sort first so index is in order? highest index # was highest/lowest fantasy points?
    # add salaries as well for determining value
    
    
#     fm.save_dataframe(df, 'season-data-slates')
    return df
#     return None

In [None]:
categorizing_slates()

In [5]:
from tqdm.notebook import tqdm
def sorting_matchups(**kwargs):
    
    df: pd.DataFrame = fm.load_clean_data()
    
    # team fantasy points
    tm = (df
          .groupby(['date', 'team'])
          ['fpts']
          .agg([np.sum])
         )
    
    msg = 'Adding team fantasy points\n'
    
    print(msg)
    for idx in tqdm(tm.index):
        date, team = idx
        df.loc[(df['date']==date) & (df['team']==team), 'team_fpts'] = tm.loc[idx,'sum']
    
    
    df['matchup'] = tuple(zip(df['team'], df['opp']))
    df['matchup'] = df['matchup'].map(lambda x: tuple(sorted(x)))
    
    # matchup fantasy points
    mu = (df
          .groupby(['date', 'matchup'])
          ['fpts']
          .agg([np.sum])
          .reset_index()
         )
    
#   Doing it this way because issue with having nested tuple in the index

    matchup_info = {
        date: {
            matchup: (mu
                      .loc[(mu['date']==date)
                           & (mu['matchup']==matchup)
                          ]
                      ['sum']
                      .item()
                     )
            for matchup in mu.loc[mu['date']==date, 'matchup'].drop_duplicates()
        }
        for date in mu['date'].drop_duplicates()
    }
    
    print(msg.replace('team', 'matchup'))
    for date, mu_totals in tqdm(matchup_info.items()):
        for matchup, total in mu_totals.items():
            df.loc[(df['date']==date) & (df['matchup']==matchup), 'mu_fpts'] = total
    
    return df

In [6]:
sorting_matchups()

Adding team fantasy points



  0%|          | 0/1742 [00:00<?, ?it/s]

Adding matchup fantasy points



  0%|          | 0/117 [00:00<?, ?it/s]

Unnamed: 0,date,name,team,opp,fpts,mp,fppm,home,w,w_pts,...,tov_perc,usg,ortg,drtg,bpm,total_pts,pts_diff,team_fpts,matchup,mu_fpts
0,2022-10-18,Matt Ryan,LAL,GS,6.2,17.816667,0.347989,0,GS,123,...,0.200,0.107,60,111,-10.6,232,-14,228.1,"(GS, LAL)",482.2
1,2022-10-18,Kendrick Nunn,LAL,GS,19.6,23.050000,0.850325,0,GS,123,...,0.250,0.198,102,111,-0.3,232,-14,228.1,"(GS, LAL)",482.2
2,2022-10-18,Lonnie Walker,LAL,GS,20.1,29.283333,0.686397,0,GS,123,...,0.202,0.129,78,112,-5.5,232,-14,228.1,"(GS, LAL)",482.2
3,2022-10-18,Patrick Beverley,LAL,GS,25.4,24.666667,1.029730,0,GS,123,...,0.167,0.093,74,97,0.4,232,-14,228.1,"(GS, LAL)",482.2
4,2022-10-18,Russell Westbrook,LAL,GS,35.7,30.683333,1.163498,0,GS,123,...,0.220,0.226,108,107,-0.3,232,-14,228.1,"(GS, LAL)",482.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18357,2023-02-14,Devin Booker,PHO,SAC,46.0,29.233333,1.573546,1,PHO,120,...,0.042,0.366,136,112,8.7,229,11,248.9,"(PHO, SAC)",459.4
18358,2023-02-14,Josh Okogie,PHO,SAC,28.7,39.433333,0.727811,1,PHO,120,...,0.130,0.174,125,122,-0.1,229,11,248.9,"(PHO, SAC)",459.4
18359,2023-02-14,Torrey Craig,PHO,SAC,17.1,30.033333,0.569367,1,PHO,120,...,0.333,0.133,84,117,-4.3,229,11,248.9,"(PHO, SAC)",459.4
18360,2023-02-14,Gary Harris,ORL,TOR,8.7,22.733333,0.382698,0,TOR,123,...,0.000,0.058,195,132,0.9,236,-10,192.8,"(ORL, TOR)",452.3
