# Prepare Master DataFrame
In this notebook, I am looking at merging the player stats data frames. The overview of the purpose is below.

Purpose:
- Merge the data
- Determine what players to remove from the model based on playing time (lack of data)

Results:
- All data sets were included except unless they had irrelevant (to offense) or overlapping data.
- 

# Importing Libraries and Data

In [1]:
# Importing libraries
import pandas as pd

In [2]:
# Importing data sets
advanced = pd.read_csv('./data/general_advanced', index_col=0)
touches = pd.read_csv('./data/tracking_touches', index_col=0)
drives = pd.read_csv('./data/tracking_drives', index_col=0)
defensive_impact = pd.read_csv('./data/tracking_defensive_impact', index_col=0)
passing = pd.read_csv('./data/tracking_passing', index_col=0)
shooting_efficiency = pd.read_csv('./data/tracking_shooting_efficiency', index_col=0)
speed_distance = pd.read_csv('./data/tracking_speed_distance', index_col=0)
rebounding = pd.read_csv('./data/tracking_rebounding', index_col=0)
catch_shoot = pd.read_csv('./data/tracking_catch_shoot', index_col=0)
pullup_shooting = pd.read_csv('./data/tracking_pullup_shooting', index_col=0)
elbow_touSches = pd.read_csv('./data/tracking_elbow_touches', index_col=0)
post_ups = pd.read_csv('./data/tracking_post_ups', index_col=0)
paint_touches = pd.read_csv('./data/tracking_paint_touches', index_col=0)
offensive_rebounding = pd.read_csv('./data/tracking_offensive_rebounding', index_col=0)
defensive_rebounding = pd.read_csv('./data/tracking_defensive_rebounding', index_col=0)
hustle = pd.read_csv('./data/hustle', index_col=0)
shooting_tight = pd.read_csv('./data/shooting_tight', index_col=0)
shooting_very_tight = pd.read_csv('./data/shooting_very_tight', index_col=0)
shooting_open = pd.read_csv('./data/shooting_open', index_col=0)
shooting_very_open = pd.read_csv('./data/shooting_very_open', index_col=0)

# Merging the Data Frames
#### Making it possible to view the data frames

In [17]:
pd.options.display.max_columns = 100

#### Adding season to all of the data frames

In [68]:
# Function to add the season to a data frame
def add_season(df):
    
    # If we already have season, return the data frame
    if 'SEASON' in df.columns:
        return df
    
    # Initialize variables up for the loop
    df = df.reset_index()
    season = 2019 # Since we immediately reduce it by 1
    
    # Loop through the data frame and add season
    for i in df.index:
        
        # When the "index" column hits 0, change to the next season
        if df.loc[i, 'index'] == 0:
            season -= 1
        
        # Save the season in the data frame
        df.loc[i, "SEASON"] = season
    
    # Turn the season in an int instead of a float
    df['SEASON'] = df['SEASON'].astype('int64')
    
    return df

In [76]:
advanced = add_season(advanced)
touches = add_season(touches)
drives = add_season(drives)
defensive_impact = add_season(defensive_impact)
passing = add_season(passing)
shooting_efficiency = add_season(shooting_efficiency)
speed_distance = add_season(speed_distance)
rebounding = add_season(rebounding)
catch_shoot = add_season(catch_shoot)
pullup_shooting = add_season(pullup_shooting)
elbow_touches = add_season(elbow_touches)
post_ups = add_season(post_ups)
paint_touches = add_season(paint_touches)
hustle = add_season(hustle)
shooting_tight = add_season(shooting_tight)
shooting_very_tight = add_season(shooting_very_tight)
shooting_open = add_season(shooting_open)
shooting_very_open = add_season(shooting_very_open)

#### Updating the different shooting columns to have different names

In [113]:
# Creating a list of columns they share
columns = list(shooting_tight.columns)

In [120]:
# Function to change the column names based on a prefix and condition
def update_columns(df, columns, prefix, condition):
    
    # Creating the new columns
    new_columns = [prefix + column if condition in column else column for column in columns]
    
    # Updating the columns for the dataframe
    df.columns = new_columns
    
    return df

In [121]:
# Updating the dataframes
shooting_open = update_columns(shooting_open, columns, 'OPEN_', 'FG')
shooting_very_open = update_columns(shooting_very_open, columns, 'VERY_OPEN_', 'FG')
shooting_tight = update_columns(shooting_tight, columns, 'TIGHT_', 'FG')
shooting_very_tight = update_columns(shooting_very_tight, columns, 'VERY_TIGHT_', 'FG')

#### Saving columns to use

In [156]:
# Putting in the potentially relevant non-overlapping columns
# Defensive impact not used because it did not have useful information
# Shooting efficiency not included since it is in other data sets
columns_dict = {
    'advanced': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE', 'GP', 'W', 'L', 'MIN',
                 'OFF_RATING', 'DEF_RATING', 'NET_RATING', 'AST_PCT', 'AST_TO', 'AST_RATIO', 'TM_TOV_PCT', 'EFG_PCT',
                 'TS_PCT', 'USG_PCT', 'PACE', 'FGM', 'FGA', 'FGM_PG', 'FGA_PG', 'FG_PCT'],
    'touches': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'POINTS', 'TOUCHES', 'FRONT_CT_TOUCHES', 'TIME_OF_POSS',
                'AVG_SEC_PER_TOUCH', 'AVG_DRIB_PER_TOUCH', 'PTS_PER_TOUCH', 'PTS_PER_ELBOW_TOUCH',
                'PTS_PER_POST_TOUCH', 'PTS_PER_PAINT_TOUCH'],
    'drives': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'DRIVES', 'DRIVE_FGM', 'DRIVE_FGA', 'DRIVE_FG_PCT', 'DRIVE_FTM',
               'DRIVE_FTA', 'DRIVE_FT_PCT', 'DRIVE_PTS', 'DRIVE_PTS_PCT', 'DRIVE_PASSES', 'DRIVE_PASSES_PCT',
               'DRIVE_AST', 'DRIVE_AST_PCT', 'DRIVE_TOV', 'DRIVE_TOV_PCT', 'DRIVE_PF', 'DRIVE_PF_PCT', ],
    'passing': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'PASSES_MADE', 'PASSES_RECEIVED', 'AST', 'FT_AST',
                'SECONDARY_AST', 'POTENTIAL_AST', 'AST_POINTS_CREATED', 'AST_ADJ', 'AST_TO_PASS_PCT', 
                'AST_TO_PASS_PCT_ADJ'],
    'speed_distance': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'DIST_FEET', 'DIST_MILES', 'DIST_MILES_OFF',
                       'DIST_MILES_DEF', 'AVG_SPEED', 'AVG_SPEED_OFF', 'AVG_SPEED_DEF'],
    'rebounding': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'OREB', 'OREB_CONTEST', 'OREB_UNCONTEST', 'OREB_CONTEST_PCT',
                   'OREB_CHANCES', 'OREB_CHANCE_PCT', 'OREB_CHANCE_DEFER', 'OREB_CHANCE_PCT_ADJ', 'AVG_OREB_DIST',
                   'DREB', 'DREB_CONTEST', 'DREB_UNCONTEST', 'DREB_CONTEST_PCT', 'DREB_CHANCES', 'DREB_CHANCE_PCT',
                   'DREB_CHANCE_DEFER', 'DREB_CHANCE_PCT_ADJ', 'AVG_DREB_DIST', 'REB', 'REB_CONTEST', 'REB_UNCONTEST', 
                   'REB_CONTEST_PCT', 'REB_CHANCES', 'REB_CHANCE_PCT', 'REB_CHANCE_DEFER', 'REB_CHANCE_PCT_ADJ',
                   'AVG_REB_DIST'],
    'catch_shoot': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'CATCH_SHOOT_FGM', 'CATCH_SHOOT_FGA', 'CATCH_SHOOT_FG_PCT',
                    'CATCH_SHOOT_PTS', 'CATCH_SHOOT_FG3M', 'CATCH_SHOOT_FG3A', 'CATCH_SHOOT_FG3_PCT',
                    'CATCH_SHOOT_EFG_PCT'],
    'pullup_shooting': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'PULL_UP_FGM', 'PULL_UP_FGA', 'PULL_UP_FG_PCT',
                        'PULL_UP_FG3M', 'PULL_UP_FG3A', 'PULL_UP_FG3_PCT', 'PULL_UP_PTS', 'PULL_UP_EFG_PCT'],
    'elbow_touches': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'ELBOW_TOUCHES', 'ELBOW_TOUCH_FGM', 'ELBOW_TOUCH_FGA',
                      'ELBOW_TOUCH_FG_PCT', 'ELBOW_TOUCH_FTM', 'ELBOW_TOUCH_FTA', 'ELBOW_TOUCH_FT_PCT',
                      'ELBOW_TOUCH_PTS', 'ELBOW_TOUCH_PASSES', 'ELBOW_TOUCH_AST', 'ELBOW_TOUCH_AST_PCT', 
                      'ELBOW_TOUCH_TOV', 'ELBOW_TOUCH_TOV_PCT', 'ELBOW_TOUCH_FOULS'],
    'post_ups': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'POST_TOUCHES', 'POST_TOUCH_FGM', 'POST_TOUCH_FGA',
                 'POST_TOUCH_FG_PCT', 'POST_TOUCH_FTM', 'POST_TOUCH_FTA', 'POST_TOUCH_FT_PCT', 'POST_TOUCH_PTS',
                 'POST_TOUCH_PTS_PCT', 'POST_TOUCH_PASSES', 'POST_TOUCH_PASSES_PCT', 'POST_TOUCH_AST',
                 'POST_TOUCH_AST_PCT', 'POST_TOUCH_TOV', 'POST_TOUCH_TOV_PCT', 'POST_TOUCH_FOULS',
                 'POST_TOUCH_FOULS_PCT'],
    'paint_touches': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'PAINT_TOUCHES', 'PAINT_TOUCH_FGM','PAINT_TOUCH_FGA',
                      'PAINT_TOUCH_FG_PCT', 'PAINT_TOUCH_FTM','PAINT_TOUCH_FTA', 'PAINT_TOUCH_FT_PCT',
                      'PAINT_TOUCH_PTS','PAINT_TOUCH_PTS_PCT', 'PAINT_TOUCH_PASSES', 'PAINT_TOUCH_PASSES_PCT',
                      'PAINT_TOUCH_AST', 'PAINT_TOUCH_AST_PCT', 'PAINT_TOUCH_TOV','PAINT_TOUCH_TOV_PCT',
                      'PAINT_TOUCH_FOULS', 'PAINT_TOUCH_FOULS_PCT'],
    'hustle': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'CONTESTED_SHOTS', 'CONTESTED_SHOTS_2PT', 'CONTESTED_SHOTS_3PT',
               'CHARGES_DRAWN', 'DEFLECTIONS', 'LOOSE_BALLS_RECOVERED', 'SCREEN_ASSISTS', 'BOX_OUTS'],
    'shooting_tight': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'TIGHT_FGA_FREQUENCY', 'TIGHT_FGM', 'TIGHT_FGA',
                       'TIGHT_FG_PCT', 'TIGHT_EFG_PCT', 'TIGHT_FG2A_FREQUENCY', 'TIGHT_FG2M', 'TIGHT_FG2A',
                       'TIGHT_FG2_PCT', 'TIGHT_FG3A_FREQUENCY', 'TIGHT_FG3M', 'TIGHT_FG3A', 'TIGHT_FG3_PCT'],
    'shooting_very_tight': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'VERY_TIGHT_FGA_FREQUENCY', 'VERY_TIGHT_FGM',
                            'VERY_TIGHT_FGA', 'VERY_TIGHT_FG_PCT', 'VERY_TIGHT_EFG_PCT', 'VERY_TIGHT_FG2A_FREQUENCY',
                            'VERY_TIGHT_FG2M', 'VERY_TIGHT_FG2A', 'VERY_TIGHT_FG2_PCT', 'VERY_TIGHT_FG3A_FREQUENCY',
                            'VERY_TIGHT_FG3M', 'VERY_TIGHT_FG3A', 'VERY_TIGHT_FG3_PCT'],
    'shooting_open': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'OPEN_FGM', 'OPEN_FGA', 'OPEN_FG_PCT', 'OPEN_EFG_PCT',
                      'OPEN_FG2A_FREQUENCY', 'OPEN_FG2M', 'OPEN_FG2A', 'OPEN_FG2_PCT', 'OPEN_FG3A_FREQUENCY',
                      'OPEN_FG3M', 'OPEN_FG3A', 'OPEN_FG3_PCT'],
    'shooting_very_open': ['SEASON', 'PLAYER_ID', 'PLAYER_NAME', 'VERY_OPEN_FGA_FREQUENCY', 'VERY_OPEN_FGM',
                           'VERY_OPEN_FGA', 'VERY_OPEN_FG_PCT', 'VERY_OPEN_EFG_PCT', 'VERY_OPEN_FG2A_FREQUENCY',
                           'VERY_OPEN_FG2M', 'VERY_OPEN_FG2A', 'VERY_OPEN_FG2_PCT', 'VERY_OPEN_FG3A_FREQUENCY',
                           'VERY_OPEN_FG3M', 'VERY_OPEN_FG3A', 'VERY_OPEN_FG3_PCT']
}

#### Merging dataframes

In [157]:
# Making a dictionary of the dataframes
df_dict = {
    'advanced': advanced,
    'touches': touches,
    'drives': drives,
    'passing': passing,
    'speed_distance': speed_distance,
    'rebounding': rebounding,
    'catch_shoot': catch_shoot,
    'pullup_shooting': pullup_shooting,
    'elbow_touches': elbow_touches,
    'post_ups': post_ups,
    'paint_touches': paint_touches,
    'hustle': hustle,
    'shooting_tight': shooting_tight,
    'shooting_very_tight': shooting_very_tight,
    'shooting_open': shooting_open,
    'shooting_very_open': shooting_very_open
}

In [158]:
# Function to merge a bunch of dataframes together
def merge_df(df_dict, columns_dict, how, on):
    
    # Initiating to show we need a master df
    have_master = False
    
    # Looping over dataframes
    for key, df in df_dict.items():
        
        # Getting the columns for the dataframe
        columns = columns_dict[key]
        
        # Merging the dataframes
        if have_master == False:
            master_df = df
            have_master = True
        else:
            master_df = pd.merge(master_df, df[columns], how=how, on=on)

    return master_df

In [159]:
# Saving the columns to merge on
on_columns = ["PLAYER_ID", "SEASON", "PLAYER_NAME"]

In [160]:
# Creating the master dataframe
master_df = merge_df(df_dict, columns_dict, 'outer', on_columns)

In [169]:
# Peeking at the master dataframe
master_df[master_df['PLAYER_NAME']=='Al Horford']

Unnamed: 0,index,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,OFF_RATING,DEF_RATING,NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,PACE,PIE,FGM,FGA,FGM_PG,FGA_PG,FG_PCT,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,OFF_RATING_RANK,DEF_RATING_RANK,NET_RATING_RANK,AST_PCT_RANK,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,USG_PCT_RANK,PACE_RANK,...,TIGHT_FGM,TIGHT_FGA,TIGHT_FG_PCT,TIGHT_EFG_PCT,TIGHT_FG2A_FREQUENCY,TIGHT_FG2M,TIGHT_FG2A,TIGHT_FG2_PCT,TIGHT_FG3A_FREQUENCY,TIGHT_FG3M,TIGHT_FG3A,TIGHT_FG3_PCT,VERY_TIGHT_FGA_FREQUENCY,VERY_TIGHT_FGM,VERY_TIGHT_FGA,VERY_TIGHT_FG_PCT,VERY_TIGHT_EFG_PCT,VERY_TIGHT_FG2A_FREQUENCY,VERY_TIGHT_FG2M,VERY_TIGHT_FG2A,VERY_TIGHT_FG2_PCT,VERY_TIGHT_FG3A_FREQUENCY,VERY_TIGHT_FG3M,VERY_TIGHT_FG3A,VERY_TIGHT_FG3_PCT,OPEN_FGM,OPEN_FGA,OPEN_FG_PCT,OPEN_EFG_PCT,OPEN_FG2A_FREQUENCY,OPEN_FG2M,OPEN_FG2A,OPEN_FG2_PCT,OPEN_FG3A_FREQUENCY,OPEN_FG3M,OPEN_FG3A,OPEN_FG3_PCT,VERY_OPEN_FGA_FREQUENCY,VERY_OPEN_FGM,VERY_OPEN_FGA,VERY_OPEN_FG_PCT,VERY_OPEN_EFG_PCT,VERY_OPEN_FG2A_FREQUENCY,VERY_OPEN_FG2M,VERY_OPEN_FG2A,VERY_OPEN_FG2_PCT,VERY_OPEN_FG3A_FREQUENCY,VERY_OPEN_FG3M,VERY_OPEN_FG3A,VERY_OPEN_FG3_PCT
6,6,201143,Al Horford,1610612738,BOS,32.0,72,47,25,0.653,31.6,108.2,101.1,7.1,0.225,2.57,26.6,0.052,0.197,0.128,10.3,0.553,0.575,0.187,97.74,0.134,368,753,5.1,10.5,0.489,154,52,276,86,65,123,72,58,88,66,71,170,123,139,278,133,156,235,414,...,1.58,2.91,0.542,0.545,0.286,1.57,2.87,0.545,0.004,0.01,0.04,0.333,0.143,0.87,1.43,0.606,0.606,0.143,0.87,1.43,0.606,0.0,0.0,0.0,,0.99,2.13,0.463,0.503,0.168,0.81,1.68,0.483,0.045,0.17,0.45,0.387,0.354,1.62,3.55,0.457,0.616,0.095,0.49,0.96,0.515,0.259,1.13,2.59,0.436
545,5,201143,Al Horford,1610612738,BOS,31.0,68,46,22,0.676,32.3,110.7,105.8,5.0,0.239,2.93,25.7,0.049,0.183,0.118,8.8,0.527,0.553,0.199,98.96,0.125,379,801,5.6,11.8,0.473,192,62,183,50,59,56,219,70,65,28,63,190,138,156,150,163,184,171,189,...,1.75,2.93,0.598,0.598,0.255,1.75,2.91,0.601,0.001,0.0,0.01,0.0,0.156,1.09,1.78,0.612,0.616,0.155,1.07,1.76,0.608,0.001,0.01,0.01,1.0,1.06,2.44,0.434,0.458,0.179,0.94,2.04,0.46,0.035,0.12,0.4,0.296,0.374,1.68,4.26,0.393,0.526,0.099,0.54,1.13,0.481,0.274,1.13,3.13,0.362
1030,4,201143,Al Horford,1610612737,ATL,30.0,82,48,34,0.585,32.1,103.1,98.2,4.9,0.165,2.46,17.8,0.062,0.18,0.122,7.3,0.547,0.565,0.206,99.75,0.137,529,1048,6.5,12.8,0.505,1,48,307,133,60,221,52,76,126,48,152,150,145,143,59,88,114,154,130,...,1.82,2.95,0.616,0.616,0.237,1.82,2.93,0.621,0.002,0.0,0.02,0.0,0.114,0.88,1.41,0.621,0.621,0.114,0.88,1.41,0.621,0.0,0.0,0.0,,1.48,2.63,0.56,0.579,0.189,1.38,2.34,0.589,0.024,0.1,0.29,0.333,0.434,2.28,5.37,0.425,0.516,0.207,1.3,2.56,0.51,0.227,0.98,2.8,0.348
1506,4,201143,Al Horford,1610612737,ATL,29.0,76,56,20,0.737,30.5,107.6,101.0,6.6,0.178,2.44,17.8,0.068,0.193,0.134,7.3,0.544,0.563,0.225,95.85,0.145,519,965,6.8,12.7,0.538,87,11,158,32,84,53,140,52,117,64,153,148,115,124,53,76,104,109,263,...,2.42,3.62,0.669,0.669,0.295,2.42,3.61,0.672,0.001,0.0,0.01,0.0,0.128,0.78,1.57,0.496,0.496,0.128,0.78,1.57,0.496,0.0,0.0,0.0,,1.41,2.72,0.517,0.522,0.219,1.38,2.67,0.517,0.004,0.03,0.05,0.5,0.352,2.22,4.3,0.517,0.531,0.319,2.11,3.89,0.541,0.033,0.12,0.41,0.29
1999,5,201143,Al Horford,1610612737,ATL,28.0,29,16,13,0.552,33.0,104.4,100.7,3.7,0.139,1.19,12.7,0.078,0.212,0.145,10.7,0.571,0.588,0.246,97.04,0.153,238,420,8.2,14.5,0.567,372,332,102,194,54,188,101,126,162,256,263,128,77,100,223,37,60,57,184,...,2.52,3.83,0.658,0.658,0.273,2.52,3.83,0.658,0.0,0.0,0.0,,0.17,1.48,2.38,0.623,0.623,0.17,1.48,2.38,0.623,0.0,0.0,0.0,,1.93,3.59,0.538,0.538,0.254,1.93,3.55,0.544,0.002,0.0,0.03,0.0,0.3,2.28,4.21,0.541,0.557,0.276,2.14,3.86,0.554,0.025,0.14,0.34,0.4
2480,4,201143,Al Horford,1610612737,ATL,27.0,74,42,32,0.568,37.3,104.8,101.5,3.3,0.149,1.63,15.6,0.084,0.229,0.158,9.6,0.545,0.56,0.221,93.61,0.143,576,1060,7.8,14.3,0.543,146,99,272,159,14,130,153,109,153,161,199,121,48,65,128,70,105,121,293,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2948,3,201143,Al Horford,1610612737,ATL,26.0,11,7,4,0.636,31.6,106.1,99.3,6.8,0.113,1.5,15.4,0.088,0.179,0.133,10.2,0.553,0.585,0.178,91.56,0.13,57,103,5.2,9.4,0.553,428,390,35,86,65,64,143,60,208,179,190,114,130,123,177,43,41,262,397,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3428,5,201143,Al Horford,1610612737,ATL,25.0,77,41,36,0.532,35.1,104.5,104.4,0.2,0.169,2.24,19.2,0.081,0.233,0.158,8.6,0.558,0.587,0.199,90.93,0.15,513,921,6.7,12.0,0.557,103,96,319,179,37,180,223,193,112,70,131,130,36,61,83,50,57,171,418,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# Removing Players
#### Removing players who played only one year

In [218]:
# Creating a list of player IDs to keep
vet_id_list = [ID for ID, n in master_df['PLAYER_ID'].value_counts().items() if n > 1]

In [227]:
# Saving the dataframe with only "veteran" players
master_df = master_df[master_df['PLAYER_ID'].isin(vet_id_list)].reset_index(drop=True)

#### Removing players who played too few minutes over their career

In [228]:
# Creating a total minutes column
master_df['TOTAL_MIN'] = master_df['MIN'] * master_df['GP']

In [264]:
# Finding the minutes played for the 301st player each year (based on )
sum_total_min = 0
for season in master_df['SEASON'].unique():
    sum_total_min += sorted(master_df[master_df['SEASON'] == season]['TOTAL_MIN'], reverse=True)[301]
min_cutoff = sum_total_min/len(master_df['SEASON'].unique())

In [267]:
# Finding which players have played enough minutes
below_min_id_list = master_df[master_df['TOTAL_MIN'] < min_cutoff]['PLAYER_ID'].unique()
above_min_id_list = master_df[master_df['TOTAL_MIN'] >= min_cutoff]['PLAYER_ID'].unique()

In [275]:
# Keeping players who were above (or equal to) the cutoff in at least one season
master_df = master_df[master_df['PLAYER_ID'].isin(above_min_id_list)].reset_index(drop=True)

# Saving Data

In [276]:
master_df.to_csv('./data/master_df')