In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import requests
from pulp import *
import pulp
import warnings 
warnings.filterwarnings('ignore')

### Upload and Clean Player Stats

In [3]:
# 2017/2018 Player Stats
url = 'https://www.basketball-reference.com/leagues/NBA_2018_per_game.html#per_game_stats::none'
html = requests.get(url).content
df_list = pd.read_html(html)
playerstats_17_18 = df_list[-1]
playerstats_17_18['Season']='2017-2018 Regular Season'
playerstats_17_18 = playerstats_17_18.replace({'OKC': 'Oklahoma City', 'BRK': 'Brooklyn','MIA': 'Miami','ORL': 'Orlando',
                'MIN': 'Minnesota', 'SAS': 'San Antonio', 'BOS': 'Boston', 'NOP': 'New Orleans',
                'POR': 'Portland', 'PHI': 'Philadelphia', 'HOU': 'Houston', 'IND': 'Indiana',
                'MIL': 'Milwaukee', 'TOR': 'Toronto', 'CHI': 'Chicago', 'DEN': 'Denver',
                'ATL': 'Atlanta', 'CHO': 'Charlotte', 'NYK': 'New York', 'LAL': 'LA Lakers',
                'DAL': 'Dallas', 'WAS': 'Washington', 'GSW': 'Golden State', 'LAC': 'LA Clippers', 
                'PHO': 'Phoenix', 'SAC': 'Sacramento', 'DET': 'Detroit', 'UTA': 'Utah',
                'MEM': 'Mamphis','CLE': 'Cleveland'})
playerstats_17_18 = playerstats_17_18.rename(columns={"Tm": "Team"})
playerstats_17_18 = playerstats_17_18[playerstats_17_18.Rk.str.contains("Rk") == False]

In [4]:
# 2016/2017 Player Stats
url = 'https://www.basketball-reference.com/leagues/NBA_2017_per_game.html#per_game_stats::none'
html = requests.get(url).content
df_list = pd.read_html(html)
playerstats_16_17 = df_list[-1]
playerstats_16_17 = playerstats_16_17.replace({'OKC': 'Oklahoma City', 'BRK': 'Brooklyn','MIA': 'Miami','ORL': 'Orlando',
                'MIN': 'Minnesota', 'SAS': 'San Antonio', 'BOS': 'Boston', 'NOP': 'New Orleans',
                'POR': 'Portland', 'PHI': 'Philadelphia', 'HOU': 'Houston', 'IND': 'Indiana',
                'MIL': 'Milwaukee', 'TOR': 'Toronto', 'CHI': 'Chicago', 'DEN': 'Denver',
                'ATL': 'Atlanta', 'CHO': 'Charlotte', 'NYK': 'New York', 'LAL': 'LA Lakers',
                'DAL': 'Dallas', 'WAS': 'Washington', 'GSW': 'Golden State', 'LAC': 'LA Clippers', 
                'PHO': 'Phoenix', 'SAC': 'Sacramento', 'DET': 'Detroit', 'UTA': 'Utah',
                'MEM': 'Mamphis','CLE': 'Cleveland'})
playerstats_16_17 = playerstats_16_17.rename(columns={"Tm": "Team"})
playerstats_16_17['Season']='2016-2017 Regular Season'
playerstats_16_17 = playerstats_16_17[playerstats_16_17.Rk.str.contains("Rk") == False]

### Upload and Clean Team Stats

In [5]:
# 2017-2018 Team Stats
url = 'http://www.espn.com/nba/hollinger/teamstats'
html = requests.get(url).content
df_list = pd.read_html(html)
team_stats_17_18 = df_list[-1]
team_stats_17_18 = team_stats_17_18.drop(0, 1)
team_stats_17_18 = team_stats_17_18.drop(0, 0)
team_stats_17_18 = team_stats_17_18.drop(1, 0)
team_stats_17_18 = team_stats_17_18.rename(columns={1: "Team", 
                                                    2:'team_pace',
                                                    3: 'team_ast',
                                                    4: 'team_to',
                                                    5: 'team_orr',
                                                    6: 'team_drr',
                                                    7: 'team_rebr',
                                                    8: 'team_eff%',
                                                    9: 'team_ts%',
                                                    10: 'team_offeff',
                                                    11: 'team_defeff'})
team_stats_17_18['Season']='2017-2018 Regular Season'

In [6]:
# 2016-2017 Team Stats
url = 'http://www.espn.com/nba/hollinger/teamstats/_/year/2017'
html = requests.get(url).content
df_list = pd.read_html(html)
team_stats_16_17 = df_list[-1]
team_stats_16_17 = team_stats_16_17.drop(0, 1)
team_stats_16_17 = team_stats_16_17.drop(0, 0)
team_stats_16_17 = team_stats_16_17.drop(1, 0)
team_stats_16_17 = team_stats_16_17.rename(columns={1: "Team", 
                                                    2:'team_pace',
                                                    3: 'team_ast',
                                                    4: 'team_to',
                                                    5: 'team_orr',
                                                    6: 'team_drr',
                                                    7: 'team_rebr',
                                                    8: 'team_eff%',
                                                    9: 'team_ts%',
                                                    10: 'team_offeff',
                                                    11: 'team_defeff'})
team_stats_16_17['Season']='2016-2017 Regular Season'

### Upload and Clean Draft Kings Data

In [7]:
# DK_16_17
dk_16_17 = pd.read_excel('/Users/sauce/Desktop/DraftKings/RawData/NBA-2016-2017-DFS-Dataset.xlsx')
dk_16_17 = dk_16_17.rename(index=str, columns={"DATASET": "season", 
                                   "POSITION": "dk_position", 
                                   'Unnamed: 9': 'fanduual_Pos',
                                   'Unnamed: 10': 'yahoo_pos',
                                   'SALARY ($)': 'dk_salary',
                                   'Unnamed: 15': 'fanduel_points_scored',
                                   'Yahoo-$': 'yahoo_points_scored',
                                   'Unnamed: 12': 'fanDuel-$',
                                   'Unnamed: 13': 'yahoo-$',
                                   'FANTASY POINTS SCORED': 'draftkings_points_scored',
                                   'Unnamed: 16': 'yahoo_points_scored'
                                 })
dk_16_17 = dk_16_17.iloc[1:]
dk_16_17 = dk_16_17.drop(columns=['fanduual_Pos', 
                      'yahoo_pos', 
                      'fanDuel-$', 
                      'yahoo-$', 
                      'fanduel_points_scored', 
                      'yahoo_points_scored'])
dk_16_17 = dk_16_17.dropna(subset=["dk_position"])
dk_16_17 = dk_16_17.dropna(subset=["USAGE \nRATE (%)"])
dk_16_17 = dk_16_17.round({'USAGE \nRATE (%)': 2})
dk_16_17 = dk_16_17[dk_16_17['USAGE \nRATE (%)'] < 100]

In [8]:
dk_17_18 = pd.read_excel('/Users/sauce/Desktop/DraftKings/RawData/NBA-2017-2018-DFS-Dataset.xlsx')
dk_17_18 = dk_17_18.rename(index=str, columns={"DATASET": "season", 
                                   "POSITION": "dk_position", 
                                   'Unnamed: 9': 'fanduual_Pos',
                                   'Unnamed: 10': 'yahoo_pos',
                                   'SALARY ($)': 'dk_salary',
                                   'Unnamed: 15': 'fanduel_points_scored',
                                   'Yahoo-$': 'yahoo_points_scored',
                                   'Unnamed: 12': 'fanDuel-$',
                                   'Unnamed: 13': 'yahoo-$',
                                   'FANTASY POINTS SCORED': 'draftkings_points_scored',
                                   'Unnamed: 16': 'yahoo_points_scored'
                                 })
dk_17_18 = dk_17_18.iloc[1:]
dk_17_18 = dk_17_18.drop(columns=['fanduual_Pos', 
                      'yahoo_pos', 
                      'fanDuel-$', 
                      'yahoo-$', 
                      'fanduel_points_scored', 
                      'yahoo_points_scored'])
dk_17_18 = dk_17_18.dropna(subset=["dk_position"])
dk_17_18 = dk_17_18.dropna(subset=["USAGE \nRATE (%)"])
dk_17_18 = dk_17_18.round({'USAGE \nRATE (%)': 2})
dk_17_18 = dk_17_18[dk_17_18['USAGE \nRATE (%)'] < 100]  

### Upload and Clean Rest Data

In [9]:
# Box Score Stats 2017-2018
BS_17_18 = pd.read_excel('/Users/sauce/Desktop/DraftKings/RawData/2017-2018_NBA_Box_Score_Team-Stats.xlsx')
BS_17_18 = BS_17_18.drop(columns=['VENUE', '1Q', '2Q', '3Q', '4Q', 'OT1',
        'OT2', 'OT3', 'OT4', 'F', 'MIN', 'FG', 'FGA', '3P', '3PA', 'FT',
        'FTA', 'OR', 'DR', 'TOT', 'A', 'PF', 'ST', 'TO', 'TO TO', 'BL',
        'PTS', 'POSS', 'PACE', 'OEFF', 'DEFF','STARTING LINEUPS', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38',
        'Unnamed: 39', 'MAIN REF', 'CREW', 'OPENING ODDS',
        'OPENING SPREAD', 'OPENING TOTAL', 'MOVEMENTS', 'CLOSING ODDS',
        'MONEYLINE', 'HALFTIME', 'BOX SCORE', 'ODDS'])
BS_17_18 = BS_17_18.rename(columns={"TEAMS": "Team"})
restTeam = BS_17_18['Team']
restTeamEven = restTeam[::2]
restTeamOdd = restTeam[1::2]
rte = []
for i in restTeamEven:
    rte.append(i)
rto = []
for i in restTeamOdd:
    rto.append(i)
rest_opp = [None]*(len(rte)+len(rto))
rest_opp[::2] = rto
rest_opp[1::2] = rte
BS_17_18["Opponent"] = rest_opp

In [10]:
# Box Score Stats 2016-2017
BS_16_17 = pd.read_excel('/Users/sauce/Desktop/DraftKings/RawData/2016-2017_NBA_Box_Score_Team-Stats.xlsx')
BS_16_17 = BS_16_17.drop(columns=['VENUE', '1Q', '2Q', '3Q', '4Q', 'OT1',
        'OT2', 'OT3', 'OT4', 'F', 'MIN', 'FG', 'FGA', '3P', '3PA', 'FT',
        'FTA', 'OR', 'DR', 'TOT', 'A', 'PF', 'ST', 'TO', 'TO TO', 'BL',
        'PTS', 'POSS', 'PACE', 'OEFF', 'DEFF','STARTING LINEUPS', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38',
        'Unnamed: 39', 'MAIN REF', 'CREW', 'OPENING ODDS',
        'OPENING SPREAD', 'OPENING TOTAL', 'MOVEMENTS', 'CLOSING ODDS',
        'MONEYLINE', 'HALFTIME', 'BOX SCORE', 'ODDS'])
BS_16_17 = BS_16_17.rename(columns={"TEAMS": "Team"})
restTeam = BS_16_17['Team']
restTeamEven = restTeam[::2]
restTeamOdd = restTeam[1::2]
rte = []
for i in restTeamEven:
    rte.append(i)
rto = []
for i in restTeamOdd:
    rto.append(i)
rest_opp = [None]*(len(rte)+len(rto))
rest_opp[::2] = rto
rest_opp[1::2] = rte
BS_16_17["Opponent"] = rest_opp

## Combine Data

In [11]:
BoxScore = BS_16_17.append(BS_17_18)
DraftKings = dk_16_17.append(dk_17_18)
TeamStats = team_stats_16_17.append(team_stats_17_18)
PlayerStats = playerstats_16_17.append(playerstats_17_18)
Opponents = team_stats_16_17.append(team_stats_17_18)

### Clean and Join Data

In [12]:
Opponents = Opponents.rename(columns = {'Team' : 'opponent',
                                       'team_pace':'opp_pace',
                                       'team_ast': 'opp_ast',
                                       'team_to': 'opp_tov',
                                       'team_orr':'opp_orr',
                                       'team_drr': 'opp_drr',
                                       'team_rebr': 'opp_reb_rate',
                                       'team_eff%': 'opp_eff%',
                                       'team_ts%': 'opp_ts%',
                                       'team_offeff': 'opp_offeff',
                                       'team_defeff': 'opp_defeff'})
DraftKings = DraftKings.rename(columns ={'DATE':'game_date',
                                        'PLAYER': 'player',
                                        'TEAM': 'team',
                                        'OPPONENT': 'opponent',
                                        'VENUE\nR/H': 'venue',
                                        'MINUTES':'minutes',
                                        'USAGE \nRATE (%)': 'usage_rate',
                                        'dk_position':'position',
                                        'dk_salary': 'salary',
                                        'draftkings_points_scored': 'fantasy_points'})
BoxScore = BoxScore.rename(columns = {'DATASET':'season',
                                     'DATE':'game_date',
                                     'Team': 'team',
                                     'REST DAYS': 'rest',
                                     'Opponent':'opponent'})
df1 = pd.merge(DraftKings, BoxScore, on = ['season', 'game_date', 'team', 'opponent'])

PlayerStats = PlayerStats.drop(columns=['Rk','G', 'GS', 'MP', 'FG', 'FGA',
       'FG%', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA',
       'FT%', 'ORB', 'DRB', 'PF', 'Pos'])

PlayerStats = PlayerStats.rename(columns = {'Player':'player',
                                            'Age':'age', 
                                            'Team':'team', 
                                            '3P':'avg_threes', 
                                            'TRB': 'avg_reb', 
                                            'AST': 'avg_ast', 
                                            'STL': 'avg_stl', 
                                            'BLK':'avg_blk',
                                            'TOV':'avg_tov', 
                                            'PS/G': 'avg_pts', 
                                            'Season': 'season'})

df2 = pd.merge(df1, PlayerStats, on = ['season', 'player', 'team'])

TeamStats = TeamStats.drop(columns = ['team_eff%', 'team_ts%', 'team_orr', 'team_drr'])
TeamStats = TeamStats.rename(columns = {'Season': 'season', 'Team':'team', 'team_rebr': 'team_reb_rate'
                                       , 'team_to': 'team_tov'})

df3 = pd.merge(df2, TeamStats, on = ['team', 'season'])

Opponents = Opponents.drop(columns = ['opp_orr', 'opp_drr', 'opp_eff%', 'opp_ts%'])
Opponents = Opponents.rename(columns = {'Season':'season'})

data1 = pd.merge(df3, Opponents, on = ['opponent', 'season'])

data2 = pd.read_csv('/Users/sauce/Desktop/DraftKings/ready_data/Mock4.csv')

data = data1.append(data2)
data = data.drop(columns = ['age', 'opp_rebr', 'opp_to', 'team_rebr', 'team_to'])

data[['avg_ast', 'avg_blk', 'avg_pts', 'avg_reb', 'avg_stl',
       'avg_threes', 'avg_tov', 'fantasy_points',
       'opp_ast', 'opp_defeff', 'opp_offeff', 'opp_pace', 'opp_reb_rate',
       'opp_tov',  'salary',
        'team_ast', 'team_defeff', 'team_offeff',
       'team_pace', 'team_reb_rate', 'team_tov' ]] = data[['avg_ast', 'avg_blk', 'avg_pts', 'avg_reb', 'avg_stl',
       'avg_threes', 'avg_tov', 'fantasy_points',
       'opp_ast', 'opp_defeff', 'opp_offeff', 'opp_pace', 'opp_reb_rate',
       'opp_tov',  'salary',
        'team_ast', 'team_defeff', 'team_offeff',
       'team_pace', 'team_reb_rate', 'team_tov' ]].apply(pd.to_numeric)

data17 = data.groupby(['player', 'opponent', 'season']).mean()
data17 = data17.reset_index()
data17 = data17.drop(columns = ['minutes', 'opp_ast', 'opp_defeff', 'opp_offeff', 'opp_pace',
       'opp_reb_rate', 'opp_tov', 'salary', 'team_ast', 'team_defeff',
       'team_offeff', 'team_pace', 'team_reb_rate', 'team_tov',
       'usage_rate', 'avg_ast', 'avg_blk', 'avg_pts',
       'avg_reb', 'avg_stl', 'avg_threes', 'avg_tov'])
data17 = data17.rename(columns = {'fantasy_points' : 'avg_points_vs_opp'})
data = pd.merge(data, data17, on= ['player', 'opponent', 'season'])

data18 = data.groupby(['season', 'opponent', 'position']).mean()
data18 = data18.reset_index()
data18 = data18.drop(columns = ['minutes', 'opp_ast', 'opp_defeff', 'opp_offeff', 'opp_pace',
       'opp_reb_rate', 'opp_tov', 'salary', 'team_ast', 'team_defeff',
       'team_offeff', 'team_pace', 'team_reb_rate', 'team_tov',
       'usage_rate', 'avg_ast', 'avg_blk', 'avg_pts',
       'avg_reb', 'avg_stl', 'avg_threes', 'avg_tov', 'avg_points_vs_opp'])
data18 = data18.rename(columns = {'fantasy_points' : 'opp_pos_avg'})
data = pd.merge(data, data18, on= ['position', 'opponent', 'season'])

### Feature Engineering

In [13]:
#Avg 10 data
data["game_date"] = pd.to_datetime(data.game_date)
data.set_index('game_date', inplace=True)
data.sort_index(inplace=True)
df_rolling = data.groupby(['player']).rolling(10).mean().rename(columns={'season':'season1', 'player':'player1'}).reset_index()
data = data.reset_index()
df_rolling = df_rolling.drop(columns=['player'])
df_rolling = df_rolling.rename(columns = {'season1': 'season', 'player1': 'player'})
data = pd.merge(data, df_rolling, on=['player', 'season', 'game_date'], left_index= True , suffixes=['', '_AVG10'])

# Avg 3 data
data["game_date"] = pd.to_datetime(data.game_date)
data.set_index('game_date', inplace=True)
data.sort_index(inplace=True)
df_rolling = data.groupby(['player']).rolling(3).mean().rename(columns={'season':'season1', 'player':'player1'}).reset_index()
data = data.reset_index()
df_rolling = df_rolling.drop(columns=['player'])
df_rolling = df_rolling.rename(columns = {'season1': 'season', 'player1': 'player'})
df = pd.merge(data, df_rolling, on=['player', 'season', 'game_date'], left_index= True , suffixes=['', '_AVG3'])

# Drop useless features
df = df.drop(columns=['team_AVG10','opponent_AVG10','venue_AVG10','rest_AVG10',
                      'avg_threes_AVG10','avg_reb_AVG10','avg_ast_AVG10','avg_stl_AVG10',
                      'avg_blk_AVG10','avg_tov_AVG10','avg_pts_AVG10','avg_points_vs_opp_AVG10',
                      'team_pace_AVG10','team_ast_AVG10','team_tov_AVG10','team_reb_rate_AVG10',
                      'team_defeff_AVG10','opp_pace_AVG10','opp_ast_AVG10','opp_tov_AVG10',
                      'opp_reb_rate_AVG10','opp_offeff_AVG10','salary_AVG10',
                      'team_AVG3', 'opponent_AVG3', 'venue_AVG3','rest_AVG3',
                      'avg_threes_AVG3', 'avg_reb_AVG3','avg_ast_AVG3', 'avg_stl_AVG3', 
                      'avg_blk_AVG3', 'avg_tov_AVG3','avg_pts_AVG3', 
                      'avg_points_vs_opp_AVG3', 'team_pace_AVG3','team_ast_AVG3', 
                      'team_tov_AVG3', 'team_reb_rate_AVG3','team_defeff_AVG3', 
                      'opp_pace_AVG3','opp_ast_AVG3', 'opp_tov_AVG3', 'opp_reb_rate_AVG3',
                      'opp_offeff_AVG3', 'opp_pos_avg_AVG3','salary_AVG3',
                      'team_AVG10_AVG3','opponent_AVG10_AVG3', 'venue_AVG10_AVG3', 
                      'minutes_AVG10_AVG3','usage_rate_AVG10_AVG3', 'rest_AVG10_AVG3',
                      'avg_threes_AVG10_AVG3', 'avg_reb_AVG10_AVG3',
                      'avg_ast_AVG10_AVG3', 'avg_stl_AVG10_AVG3', 'avg_blk_AVG10_AVG3',
                      'avg_tov_AVG10_AVG3', 'avg_pts_AVG10_AVG3',
                      'avg_points_vs_opp_AVG10_AVG3', 'team_pace_AVG10_AVG3',
                      'team_ast_AVG10_AVG3', 'team_tov_AVG10_AVG3',
                      'team_reb_rate_AVG10_AVG3', 'team_offeff_AVG10_AVG3',
                      'team_defeff_AVG10_AVG3', 'opp_pace_AVG10_AVG3',
                      'opp_ast_AVG10_AVG3', 'opp_tov_AVG10_AVG3',
                      'opp_reb_rate_AVG10_AVG3', 'opp_offeff_AVG10_AVG3',
                      'opp_defeff_AVG10_AVG3', 'opp_pos_avg_AVG10_AVG3',
                      'salary_AVG10_AVG3', 'fantasy_points_AVG10_AVG3', 'position_AVG10_AVG3', 'position_AVG3', 'position_AVG10'])

# Fill NAs
df['fantasy_points_AVG3'] = df['fantasy_points_AVG3'].fillna(df.groupby(['player', 'season'])['fantasy_points'].transform('mean'))
df['opp_defeff_AVG3'] = df['opp_defeff_AVG3'].fillna(df.groupby(['opponent', 'season'])['opp_defeff'].transform('mean'))
df['team_offeff_AVG3'] = df['team_offeff_AVG3'].fillna(df.groupby(['team', 'season'])['team_offeff'].transform('mean'))
df['usage_rate_AVG3'] = df['usage_rate_AVG3'].fillna(df.groupby(['player', 'season'])['usage_rate'].transform('mean'))
df['fantasy_points_AVG10'] = df['fantasy_points_AVG10'].fillna(df.groupby(['player', 'season'])['fantasy_points'].transform('mean'))
df['minutes_AVG3'] = df['minutes_AVG3'].fillna(df.groupby(['player', 'season'])['minutes'].transform('mean'))
df['opp_pos_avg_AVG10'] = df['opp_pos_avg_AVG10'].fillna(df.groupby(['opponent', 'season'])['opp_pos_avg'].transform('mean'))
df['opp_defeff_AVG10'] = df['opp_defeff_AVG10'].fillna(df.groupby(['opponent', 'season'])['opp_defeff'].transform('mean'))
df['team_offeff_AVG10'] = df['team_offeff_AVG10'].fillna(df.groupby(['team', 'season'])['team_offeff'].transform('mean'))
df['usage_rate_AVG10'] = df['usage_rate_AVG10'].fillna(df.groupby(['player', 'season'])['usage_rate'].transform('mean'))
df['minutes_AVG10'] = df['minutes_AVG10'].fillna(df.groupby(['player', 'season'])['minutes'].transform('mean'))

df = df.dropna()

# # #Scale data
# # from sklearn.preprocessing import StandardScaler
# # sc_X = StandardScaler()
# # X_train = sc_X.fit_transform(X)

### Run Multiple Linear Regression Model

In [None]:
#Drop non predictive columns 
df2 = df.drop(columns=['game_date', 'minutes', 'usage_rate', 'position', 'season', 'player', 'team', 'opponent',
                    'salary'])
# Encode Dummies
df2 = pd.get_dummies(df2, columns=['venue', 'rest'], drop_first=True)

# Grab Target Variable and remove it from data.
y = df2['fantasy_points']
X = df2.drop(columns = ['fantasy_points'])

# #Scale data
# from sklearn.preprocessing import StandardScaler
# sc_X = StandardScaler()
# X_train = sc_X.fit_transform(X)


from sklearn.cross_validation import train_test_split
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size = 0.2, random_state = 0)

from sklearn.linear_model import LinearRegression
Mregressor2 = LinearRegression(normalize=True)
Mregressor2.fit(X_train2, y_train2)

y_pred2 = Mregressor2.predict(X_test2)

from sklearn import metrics
print('Mean Squared Error:', metrics.mean_squared_error(y_test2, y_pred2))
print('R2 Score:', metrics.r2_score(y_test2, y_pred2))


In [225]:
# Load mock data with all Rest variables.
data2 = pd.read_csv('/Users/sauce/Desktop/DraftKings/mock5.csv')

data = df.append(data2)

data = data.set_index(pd.DatetimeIndex(data['game_date']))

data2 = data.loc['2016-12-12'] # Any date can be selected here.

data1 = data.loc['2011-11-11']

data = data2.append(data1)

data = data.rename(columns = {'game_date': 'game_date1'})

data = data.reset_index()

data = data.drop(columns = ['game_date1'])

df = data

In [280]:
# Save varibales for final table
players = df['player']
position = df['position']
salary = df['salary']

#Drop non predictive columns 
df2 = df.drop(columns=['game_date', 'minutes', 'usage_rate', 'position', 'season', 'player', 'team', 'opponent',
                    'salary'])
# Encode Dummies
df2 = pd.get_dummies(df2, columns=['venue', 'rest'], drop_first=True)

# Grab Target Variable and remove it from data.
y = df2['fantasy_points']
X = df2.drop(columns = ['fantasy_points'])

pred = Mregressor2.predict(X)

In [282]:
print('Mean Squared Error:', metrics.mean_squared_error(pred, y))
print('R2 Score:', metrics.r2_score(y, pred))

Mean Squared Error: 34.863818816567594
R2 Score: 0.8298004965496585


In [283]:
table = pd.DataFrame({'Player': players, 'Position':position, 'Salary': salary, 'Projected_Points':pred})

In [284]:
df = table
df = table.reset_index()

In [285]:
import pulp

In [286]:
table

Unnamed: 0,Player,Position,Projected_Points,Salary
0,Kemba Walker,PG,27.796,7500.000
1,Ben McLemore,SG,9.903,3000.000
2,D'Angelo Russell,PG,17.471,6300.000
3,Tony Snell,SG/SF,17.103,3300.000
4,Meyers Leonard,PF,5.416,3000.000
5,Jake Layman,PF,4.436,3000.000
6,Darren Collison,PG,29.892,4900.000
7,Arron Afflalo,SG,5.037,3000.000
8,Dorian Finney-Smith,SF,27.762,3100.000
9,Clint Capela,C,17.622,5300.000


### Optimize Line-Up

Code borrowed from this GitHub repository https://github.com/coaltunbey/nba-lineup-optimization/blob/master/main.ipynb

In [200]:
# df = table.reset_index()
# df = df.iloc[9:]

In [287]:
multiples = df[(df['Position'].str.contains('/') == True) | (df['Position'].str.contains('-') == True)]
multiples['Position'] = multiples['Position'].str.replace('-', ',')
multiples['Position'] = multiples['Position'].str.replace('/', ',')
multiples['Position'] = multiples['Position'].str.split(',')

pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x))

# Create columns for each position
multiples_dummies = pd.get_dummies(multiples['Position'].apply(pd.Series).stack()).sum(level=0)

# Merge it to multiples data
multiples = pd.concat([multiples, multiples_dummies], axis=1)

# Remove unnecessary column
del multiples['Position']

def multiple_position_handler(x):
    if 'F' in x:
        if(x['F'] == 1):
            x['PF'] = 1
            x['SF'] = 1

    if 'G' in x:
        if(x['G'] == 1):
            x['SG'] = 1
            x['PG'] = 1
        
    return x

multiples = multiples.apply(lambda x : multiple_position_handler(x), axis=1)

# Remove unnecessary columns
if 'F' in multiples:
    del multiples['F']

if 'G' in multiples:
    del multiples['G']
    
not_multiples = df[(df['Position'].str.contains('/') == False) & (df['Position'].str.contains('-') == False)]

not_multiples = pd.get_dummies(not_multiples, prefix=['Postion'], columns=['Position'])

def not_multiple_position_handler(x):
    if 'Position_F' in x:
        if(x['Position_F'] == 1):
            x['Position_PF'] = 1
            x['Position_SF'] = 1
            
    if 'Position_G' in x:
        if(x['Position_G'] == 1):
            x['Position_SG'] = 1
            x['Position_PG'] = 1
        
    return x

not_multiples = not_multiples.apply(lambda x : not_multiple_position_handler(x), axis=1)

# Remove unnecessary columns
if 'Position_F' in not_multiples:
    del not_multiples['Position_F']

if 'Position_G' in not_multiples:
    del not_multiples['Position_G']
    
# Column renaming
not_multiples.columns = multiples.columns.tolist()

# Merge multiples and non_multiples
main = pd.concat([multiples, not_multiples])

# Initialize required lists for PulP package
player_ids = main['index'].astype(str)
player_salaries = main['Salary']
player_scores = main['Projected_Points']
player_c = main['C']
player_pf = main['PF']
player_pg = main['PG']
player_sf = main['SF']
player_sg = main['SG']

player_salariesx = dict(zip(player_ids, player_salaries))
player_scoresx = dict(zip(player_ids, player_scores))

player_cx = dict(zip(player_ids, player_c))
player_pfx = dict(zip(player_ids, player_pf))
player_pgx = dict(zip(player_ids, player_pg))
player_sfx = dict(zip(player_ids, player_sf))
player_sgx = dict(zip(player_ids, player_sg))

player_ids = main['index'].astype(str).tolist()

W = 50000
maxplayer = 8
minplayer = 5

x = LpVariable.dicts('index', player_ids, 0, 1, LpBinary)

prob = LpProblem('knapsack', LpMaximize)

cost = lpSum([ player_scoresx[i]*x[i] for i in player_ids])
prob += cost

# Declare constraints

# Do not exceed $50,000
prob += lpSum([player_salariesx[i]*x[i] for i in player_ids]) <= W

# Select at least 5, at most 8 players
prob += lpSum([x[i] for i in player_ids]) <= maxplayer
prob += lpSum([x[i] for i in player_ids]) >= minplayer

# Select at least one player for each position
prob += lpSum([player_cx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_pfx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_pgx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_sfx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_sgx[i]*x[i] for i in player_ids]) >= 1

# Select extra players for F and G positions
prob += lpSum([player_sgx[i]*x[i] + player_pgx[i]*x[i] for i in player_ids]) == 4
prob += lpSum([player_sfx[i]*x[i] + player_pfx[i]*x[i] for i in player_ids]) == 4

# Solve LP
prob.solve()
print(LpStatus[prob.status])

# Collect results
result = {}

for i in player_ids: 
    #print(i, value(x[i]))
    result[float(i)] = value(x[i])
        
squad = []

for i,k in result.items():
    if k == 1:
        squad.append(i)
        
df[df['index'].isin(squad)]

Optimal


Unnamed: 0,index,Player,Position,Projected_Points,Salary
8,8,Dorian Finney-Smith,SF,27.762,3100.0
10,10,Bradley Beal,SG,39.859,6700.0
11,11,Patrick Beverley,PG,35.107,4900.0
15,15,Blake Griffin,PF,52.513,8500.0
16,16,James Harden,PG/SG,63.098,11400.0
43,43,Nikola Jokic,C,42.549,5800.0
54,54,Marreese Speights,PF/C,28.311,4000.0
73,73,Trevor Booker,PF,34.481,5600.0


In [289]:
print(value(prob.objective))
print(sum([ player_salariesx[i]*value(x[i]) for i in player_ids]))

323.67982744666324
50000.0
