#### Note: 

- Slate is defined as that day of daily fantasy competitions, pretty much synonymous with date --> slate is all games played that day
- For the time being, this dataset will not accomodate unusual schedules as it does not include start times for games, so it will just include all games played that day.
    - `slate_size` is going to be a column to refer to how many games were played on a given day, i.e. the number of teams for each day divided by 2
    - This will be useful to analyze patterns in optimal lineups, for example:
        - How many players from a team / game would one want to include in their 9-player lineup
        - Is it better to focus on certain games and ignore others, or is it better to try and have the best players from each team
        - How much of an impact does `total_pts` and `diff_score` have on fpts outcomes

In [None]:
import pandas as pd
import numpy as np

# pandas settings....

from file_manager import FileManager
fm = FileManager()

In [55]:
def categorizing_slates(**kwargs):
    df: pd.DataFrame = fm.load_clean_data()

    ret = (df
           .drop_duplicates(['date', 'team'])
           .groupby('date')
           ['team']
           .agg(['count'])
          )
    
    slate_sizes = {date: ret.loc[date, 'count'] // 2 for date in ret.index}
    
    for date, size in slate_sizes.items():
        df.loc[df['date']==date, 'slate_size'] = size
    
    df['slate_size'] = df['slate_size'].astype('uint8')
    
    # Categorize slates by quartile, excluding slates which include single game as that is different format
    
    df: pd.DataFrame = df.loc[df['slate_size']>1]
    
    slate_descrip = df['slate_size'].describe()
    
    slate_nums_categories = {
        tuple(range(*(int(slate_descrip['min']), int(slate_descrip['25%'])))): 1, # first element will always be 2
        tuple(range(*(int(slate_descrip['25%']), int(slate_descrip['50%'])))): 2,
        tuple(range(*(int(slate_descrip['50%']), int(slate_descrip['75%'])))): 3,
        tuple(range(*(int(slate_descrip['75%']), int(slate_descrip['max'])+1))): 4,
        
    }
    
    for nums, cats in slate_nums_categories.items():
        df.loc[ df['slate_size'].isin(nums), 'slate_cat' ] = cats
    
    df['slate_cat'] = df['slate_cat'].astype('uint8')
    
    fm.save_dataframe(df, 'season-data-slates')
    
    return None

In [56]:
categorizing_slates()

Unnamed: 0,date,name,team,opp,fpts,mp,fppm,home,w,w_pts,...,blk_perc,tov_perc,usg,ortg,drtg,bpm,total_pts,pts_diff,slate_size,slate_cat
0,2022-10-18,Matt Ryan,LAL,GS,6.2,17.816667,0.347989,0,GS,123,...,0.0,0.2,0.107,60,111,-10.6,232,-14,2,1
1,2022-10-18,Kendrick Nunn,LAL,GS,19.6,23.05,0.850325,0,GS,123,...,0.0,0.25,0.198,102,111,-0.3,232,-14,2,1
2,2022-10-18,Lonnie Walker,LAL,GS,20.1,29.283333,0.686397,0,GS,123,...,0.03,0.202,0.129,78,112,-5.5,232,-14,2,1
3,2022-10-18,Patrick Beverley,LAL,GS,25.4,24.666667,1.02973,0,GS,123,...,0.072,0.167,0.093,74,97,0.4,232,-14,2,1
4,2022-10-18,Russell Westbrook,LAL,GS,35.7,30.683333,1.163498,0,GS,123,...,0.0,0.22,0.226,108,107,-0.3,232,-14,2,1
5,2022-10-18,Sam Hauser,BOS,PHI,0.0,3.316667,0.0,1,BOS,126,...,0.0,0.0,0.0,0,125,-11.9,243,9,2,1
6,2022-10-18,Blake Griffin,BOS,PHI,8.5,8.283333,1.026157,1,BOS,126,...,0.0,0.0,0.16,103,120,-12.3,243,9,2,1
7,2022-10-18,Noah Vonleh,BOS,PHI,10.4,20.166667,0.515702,1,BOS,126,...,0.052,0.0,0.046,101,114,-4.0,243,9,2,1
8,2022-10-18,Al Horford,BOS,PHI,13.5,23.1,0.584416,1,BOS,126,...,0.0,0.0,0.139,104,123,-6.9,243,9,2,1
9,2022-10-18,Derrick White,BOS,PHI,13.6,24.033333,0.565881,1,BOS,126,...,0.0,0.225,0.085,100,120,-3.3,243,9,2,1


In [34]:
categorizing_slates()

Unnamed: 0,date,name,team,opp,fpts,mp,fppm,home,w,w_pts,...,blk_perc,tov_perc,usg,ortg,drtg,bpm,total_pts,pts_diff,slate_size,slate_cat
45,2022-10-19,Joshua Primo,SA,CHA,21.1,25.850000,0.816248,1,CHA,129,...,0.029,0.190,0.237,82,128,-9.1,231,-27,12,
46,2022-10-19,Jusuf Nurkic,POR,SAC,19.9,25.183333,0.790205,0,POR,115,...,0.000,0.288,0.233,71,103,-10.5,223,7,12,
47,2022-10-19,Anfernee Simons,POR,SAC,34.6,35.850000,0.965132,0,POR,115,...,0.000,0.000,0.270,103,107,-1.6,223,7,12,
48,2022-10-19,Justise Winslow,POR,SAC,23.6,26.316667,0.896770,0,POR,115,...,0.000,0.000,0.145,135,108,0.2,223,7,12,
49,2022-10-19,Devin Vassell,SA,CHA,20.4,28.583333,0.713703,1,CHA,129,...,0.000,0.097,0.280,72,129,-12.7,231,-27,12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18357,2023-02-14,Devin Booker,PHO,SAC,46.0,29.233333,1.573546,1,PHO,120,...,0.000,0.042,0.366,136,112,8.7,229,11,5,
18358,2023-02-14,Josh Okogie,PHO,SAC,28.7,39.433333,0.727811,1,PHO,120,...,0.000,0.130,0.174,125,122,-0.1,229,11,5,
18359,2023-02-14,Torrey Craig,PHO,SAC,17.1,30.033333,0.569367,1,PHO,120,...,0.036,0.333,0.133,84,117,-4.3,229,11,5,
18360,2023-02-14,Gary Harris,ORL,TOR,8.7,22.733333,0.382698,0,TOR,123,...,0.000,0.000,0.058,195,132,0.9,236,-10,5,
