#### Note: 

- Slate is defined as that day of daily fantasy competitions, pretty much synonymous with date --> slate is all games played that day
- For the time being, this dataset will not accomodate unusual schedules as it does not include start times for games, so it will just include all games played that day.
    - `slate_size` is going to be a column to refer to how many games were played on a given day, i.e. the number of teams for each day divided by 2
    - This will be useful to analyze patterns in optimal lineups, for example:
        - How many players from a team / game would one want to include in their 9-player lineup
        - Is it better to focus on certain games and ignore others, or is it better to try and have the best players from each team
        - How much of an impact does `total_pts` and `diff_score` have on fpts outcomes

In [None]:
import pandas as pd
import numpy as np

# pandas settings....

from file_manager import FileManager
fm = FileManager()

In [76]:
def categorizing_slates(**kwargs):
    df: pd.DataFrame = fm.load_clean_data()

    ret = (df
           .drop_duplicates(['date', 'team'])
           .groupby('date')
           ['team']
           .agg(['count'])
          )
    
    slate_sizes = {date: ret.loc[date, 'count'] // 2 for date in ret.index}
    
    for date, size in slate_sizes.items():
        df.loc[df['date']==date, 'slate_size'] = size
    
    df['slate_size'] = df['slate_size'].astype('uint8')
    
    # Categorize slates by quartile, excluding slates which include single game as that is different format
    
    df: pd.DataFrame = df.loc[df['slate_size']>1]
    
    slate_descrip = df['slate_size'].describe()
    
    slate_nums_categories = {
        tuple(range(*(int(slate_descrip['min']), int(slate_descrip['25%'])))): 1, # first element will always be 2
        tuple(range(*(int(slate_descrip['25%']), int(slate_descrip['50%'])))): 2,
        tuple(range(*(int(slate_descrip['50%']), int(slate_descrip['75%'])))): 3,
        tuple(range(*(int(slate_descrip['75%']), int(slate_descrip['max'])+1))): 4,
        
    }
    
    for nums, cat in slate_nums_categories.items():
        df.loc[ df['slate_size'].isin(nums), 'slate_cat' ] = cat
    
    df['slate_cat'] = df['slate_cat'].astype('uint8')
    
    # Also be useful to label each matchup to see if some games command disproportionate amounts of fantasy points
    df['matchup'] = tuple(zip(df['team'], df['opp']))
    df['matchup'] = df['matchup'].map(lambda x: tuple(sorted(x)))
    
    
    fm.save_dataframe(df, 'season-data-slates')
    
    return None

In [None]:
categorizing_slates()