Essential imports

In [119]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Essential Functions

In [120]:
"""Generates season stat projections for players. Function inputs: df, current player_id &
     current player season. Outputs: projected stats for next season"""
def player_comparison_tool(df, current_player_season, current_player_id):
    if (((df['season_id'] == current_player_season) & (df['player_id'] == current_player_id)).any() == False):
        print('Can\'t find player with id {} and season {}'.format(current_player_id, current_player_season))
        return
    for row in df.itertuples():
        if current_player_season == row.season_id and current_player_id == row.player_id:
            current_player_id = row.player_id
            break
      
    current_player_vector = np.array([
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'pts_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'oreb_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'dreb_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'ast_norm']).item()

    ])
    
    print('Projecting player_id {0} for season {1}'.format(current_player_id, season_list[(season_list.index(row.season_id) + 1)]))
    
    player_distance = []
    
    for row in df.itertuples():
        compared_player_vector = np.array([
        row.pts_norm,
        row.oreb_norm,
        row.dreb_norm,
        row.ast_norm

        ])
        
        vfunc = np.vectorize(calc_distance)
        distance_vect = vfunc(current_player_vector, compared_player_vector)
        number = np.sum(np.abs(distance_vect)) / len(distance_vect)
        player_distance.append(number)
    
    df['distance'] = player_distance
    
    ranked_df = df.sort_values('distance')
    
    stats = ['pts',
             'oreb',
             'dreb',
             'ast'

             ]
    
    projected_stats = {}
    
    for col in stats:
        sum_stat = 0
        sum_weight = 0
        for index, row in ranked_df.iloc[1:11].iterrows():
            # skip over the row if it was 2017-18 season because we can't take the next
            if row.season_id == '2017-18':
                continue
            # get the players next season
            weight = (1 / row.distance)
            next_season = season_list[(season_list.index(row.season_id) + 1)]
            # find the player row with the id and the next season
            player_next_season = find_player(ranked_df, row.player_id, next_season)
            # if player_next_season doesn't exist then skip
            if player_next_season == None:
                continue
            sum_stat += getattr(player_next_season, col) * weight
            sum_weight += weight
        projected_stats['player_id'] = current_player_id
        projected_stats['proj_season_id'] = season_list[(season_list.index(current_player_season) + 1)]
        projected_stats['proj_' + col] = (sum_stat / sum_weight)
        
    return projected_stats
#-------------------------------


"""Function to normalize stats and add the new normalized column """
def vorp(df):
    for col_name in cols_to_norm: #each column to normalize
        df['{}_norm'.format(col_name)] = normalize(df[col_name]) #use normalize function + create new column
    return df
#--------------------


"""Function to normalize individual stats """
def normalize(col):
    return (col - col.min()) / (col.max() - col.min())
#---------------------------


"""Function to calculate euclidean distance between two points """ 
def calc_distance(u, v):
    dist = np.sqrt(np.sum((u - v)**2))
    return dist
#--------------------------------


"""Function to find the player data based on player id and season id """ 
def find_player(df, player_id, season):
    for row in df.itertuples():
        if season == row.season_id and player_id == row.player_id:
            return row

In [121]:
season_list = [
    '1996-97',
    '1997-98',
    '1998-99',
    '1999-00',
    '2000-01',
    '2001-02',
    '2002-03',
    '2003-04',
    '2004-05',
    '2005-06',
    '2006-07',
    '2007-08',
    '2008-09',
    '2009-10',
    '2010-11',
    '2011-12',
    '2012-13',
    '2013-14',
    '2014-15',
    '2015-16',
    '2016-17',
    '2017-18',
    '2018-19']

In [122]:
cols_to_norm = [
    'pts',
    'oreb',
    'dreb',
    'ast'

]

In [123]:
current_player_season = '2017-18'

In [124]:
final_projections = []

In [125]:
# read in per game data from csv folder
df = pd.read_csv('../nba-stats-csv/player_general_traditional_per_game_data.csv', header = 0)

In [126]:
df_new = df[df.season_id != '2018-19'] #not including 2018-19 season

In [127]:
df_cleaned = df_new.dropna(how='any') #drop null values

In [128]:
min_gp = 10
df_filter = df_cleaned[df_cleaned['gp'] > min_gp] #set minimum gp number

In [129]:
df_final = df_filter.groupby(['season_id']).apply(vorp) #use vorp for normalized stats

## Simulate

In [130]:
player_df = pd.read_csv('../nba-stats-csv/player_info.csv') #read in player info

In [131]:
df_names = pd.read_csv('../nba-stats-csv/player_id_player_name.csv') #reads players names

In [132]:
player_filter = player_df[player_df['season_id'] == '2017-18'] #set season

In [133]:
player_ids_2018 = player_filter['player_id'].tolist() #make list of ids

In [134]:
player_ids_2018 #all players we will use for modelling

[201166.0,
 203932.0,
 1626151.0,
 1628935.0,
 1627846.0,
 203940.0,
 201143.0,
 2744.0,
 202329.0,
 1626210.0,
 202692.0,
 1628409.0,
 203518.0,
 1627936.0,
 203458.0,
 1627816.0,
 1628035.0,
 203459.0,
 101161.0,
 203083.0,
 2738.0,
 201281.0,
 203460.0,
 101106.0,
 1626150.0,
 1628510.0,
 203952.0,
 1627790.0,
 1626148.0,
 203076.0,
 201229.0,
 1628469.0,
 1628499.0,
 203382.0,
 201167.0,
 203085.0,
 202340.0,
 1628389.0,
 203463.0,
 1628500.0,
 1627732.0,
 202687.0,
 201933.0,
 1626246.0,
 201628.0,
 1626171.0,
 203992.0,
 202711.0,
 203078.0,
 201148.0,
 1627742.0,
 201943.0,
 203464.0,
 1627362.0,
 1627744.0,
 201572.0,
 203998.0,
 1627854.0,
 1627741.0,
 203710.0,
 203468.0,
 101139.0,
 1628403.0,
 1626166.0,
 1627747.0,
 2546.0,
 1626224.0,
 202718.0,
 101112.0,
 1628429.0,
 1627767.0,
 1627778.0,
 1628449.0,
 1626191.0,
 101108.0,
 203991.0,
 203469.0,
 202332.0,
 201147.0,
 202709.0,
 201584.0,
 1626245.0,
 1626156.0,
 201571.0,
 1628391.0,
 1626177.0,
 1627745.0,
 203081.0,


In [135]:
for baller_id in player_ids_2018: #loop function over each player
    current_player_id = baller_id
    current_player_season = '2017-18'
    projections = player_comparison_tool(df_final, current_player_season, current_player_id)
    if (projections == None):
        continue
    final_projections.append(projections)

Projecting player_id 201166.0 for season 2018-19
Projecting player_id 203932.0 for season 2018-19
Can't find player with id 1626151.0 and season 2017-18
Can't find player with id 1628935.0 and season 2017-18
Projecting player_id 1627846.0 for season 2018-19
Can't find player with id 203940.0 and season 2017-18
Projecting player_id 201143.0 for season 2018-19
Projecting player_id 2744.0 for season 2018-19
Projecting player_id 202329.0 for season 2018-19
Can't find player with id 1626210.0 and season 2017-18
Projecting player_id 202692.0 for season 2018-19
Projecting player_id 1628409.0 for season 2018-19
Projecting player_id 203518.0 for season 2018-19
Projecting player_id 1627936.0 for season 2018-19
Projecting player_id 203458.0 for season 2018-19
Projecting player_id 1627816.0 for season 2018-19
Projecting player_id 1628035.0 for season 2018-19
Projecting player_id 203459.0 for season 2018-19
Projecting player_id 101161.0 for season 2018-19
Projecting player_id 203083.0 for season 20

Projecting player_id 202066.0 for season 2018-19
Projecting player_id 203914.0 for season 2018-19
Projecting player_id 1627780.0 for season 2018-19
Projecting player_id 201588.0 for season 2018-19
Can't find player with id 1627777.0 and season 2017-18
Projecting player_id 1627834.0 for season 2018-19
Projecting player_id 101123.0 for season 2018-19
Can't find player with id 1628492.0 and season 2017-18
Projecting player_id 203507.0 for season 2018-19
Projecting player_id 203922.0 for season 2018-19
Projecting player_id 201609.0 for season 2018-19
Can't find player with id 202330.0 and season 2017-18
Projecting player_id 203476.0 for season 2018-19
Projecting player_id 202328.0 for season 2018-19
Projecting player_id 1627824.0 for season 2018-19
Projecting player_id 203084.0 for season 2018-19
Projecting player_id 202355.0 for season 2018-19
Projecting player_id 1627740.0 for season 2018-19
Projecting player_id 203546.0 for season 2018-19
Projecting player_id 101133.0 for season 2018-19

Projecting player_id 201601.0 for season 2018-19
Projecting player_id 203512.0 for season 2018-19
Can't find player with id 1626242.0 and season 2017-18
Projecting player_id 202337.0 for season 2018-19
Projecting player_id 1628379.0 for season 2018-19
Projecting player_id 1628436.0 for season 2018-19
Can't find player with id 2736.0 and season 2017-18
Projecting player_id 1627781.0 for season 2018-19
Projecting player_id 1627763.0 for season 2018-19
Projecting player_id 1627098.0 for season 2018-19
Projecting player_id 1626259.0 for season 2018-19
Projecting player_id 1627736.0 for season 2018-19
Projecting player_id 1628370.0 for season 2018-19
Can't find player with id 1628493.0 and season 2017-18
Projecting player_id 1938.0 for season 2018-19
Can't find player with id 202705.0 and season 2017-18
Projecting player_id 201188.0 for season 2018-19
Projecting player_id 101162.0 for season 2018-19
Projecting player_id 201158.0 for season 2018-19
Projecting player_id 1627875.0 for season 2

Projecting player_id 1626203.0 for season 2018-19
Projecting player_id 2772.0 for season 2018-19
Projecting player_id 202344.0 for season 2018-19
Projecting player_id 203504.0 for season 2018-19
Projecting player_id 1626168.0 for season 2018-19
Can't find player with id 203590.0 and season 2017-18
Projecting player_id 202684.0 for season 2018-19
Projecting player_id 203584.0 for season 2018-19
Projecting player_id 1627786.0 for season 2018-19
Projecting player_id 1628463.0 for season 2018-19
Projecting player_id 1628416.0 for season 2018-19
Projecting player_id 203898.0 for season 2018-19
Projecting player_id 204020.0 for season 2018-19
Can't find player with id 1628399.0 and season 2017-18
Projecting player_id 1627755.0 for season 2018-19
Projecting player_id 203092.0 for season 2018-19
Projecting player_id 201936.0 for season 2018-19
Projecting player_id 1627820.0 for season 2018-19
Projecting player_id 2199.0 for season 2018-19
Projecting player_id 1626145.0 for season 2018-19
Can't

In [136]:
test_projections = pd.DataFrame(final_projections)

In [137]:
final_stat_df = pd.merge(test_projections, df_names, left_on=['player_id'], right_on=['player_id'], how='inner')

In [138]:

player_info_columns = [
    'player_name',
    'player_id',
    'proj_season_id'
]

In [139]:
player_info = final_stat_df[player_info_columns]

In [140]:
final_stat_df.to_csv('../nba-stats-csv/player_proj_df.csv', index=False)
player_info.to_csv('../nba-stats-csv/player_info_df.csv', index=False)