I'm going to pretend that this is a new day with new data and that I want to optomize my line-up for tonight's game. The daft kings scoring is set up in the following way:

- Each player is assigned an salary by draft kings for that given day.
- Each draft kings competitor is given $50,000 to use to draft their lineup.
- The players play in the games, their fantasy points are calculated by how well they play in the games.

The goal is to get as many points as possible with your team.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics
from sklearn.model_selection import cross_val_score
import pickle
import warnings 
from pulp import *
warnings.filterwarnings('ignore')

In [7]:
# Load Data
data1 = pd.read_csv('/Users/sauce/Desktop/DraftKings/ready_data/DraftKingsCleaned.csv', header = None)

data2 = pd.read_csv('/Users/sauce/Desktop/DraftKings/ready_data/mock_rest2.csv', header = None)

data = data1.append(data2)

# rename columns
data = data.rename(columns={0: "season", 
                        1:'game_date',
                        2: 'player',
                        3: 'position',    
                        4: 'team',
                        5: 'opponent',
                        6: 'venue',
                        7: 'minutes',
                        8: 'usage_rate',
                        9: 'rest',
                        10: 'avg_threes',
                        11: 'avg_reb',
                        12: 'avg_ast',
                        13: 'avg_stl',
                        14: 'avg_blk',
                        15: 'avg_tov',
                        16: 'avg_pts',
                        17: 'avg_points_vs_opp',
                        18: 'team_pace',
                        19: 'team_ast',
                        20: 'team_tov',
                        21: 'team_reb_rate',
                        22: 'team_offeff',
                        23: 'team_defeff',
                        24: 'opp_pace',
                        25: 'opp_ast',
                        26: 'opp_tov',
                        27: 'opp_reb_rate',
                        28: 'opp_offeff',
                        29: 'opp_defeff',
                        30: 'opp_pos_avg',
                        31: 'salary',
                        32: 'fantasy_points',
                       })

data = data.set_index(pd.DatetimeIndex(data['game_date']))

data2 = data.loc['2016-12-12']

data1 = data.loc['2011-11-11']

data = data2.append(data1)

data = data.rename(columns = {'game_date': 'game_date1'})

data = data.reset_index()

data = data.drop(columns = ['game_date1'])

# Avg 10 data
data["game_date"] = pd.to_datetime(data.game_date)
data.set_index('game_date', inplace=True)
data.sort_index(inplace=True)
df_rolling = data.groupby(['player']).rolling(10).mean().rename(columns={'season':'season1', 'player':'player1'}).reset_index()
data = data.reset_index()
df_rolling = df_rolling.drop(columns=['player'])
df_rolling = df_rolling.rename(columns = {'season1': 'season', 'player1': 'player'})
data = pd.merge(data, df_rolling, on=['player', 'season', 'game_date'], left_index= True , suffixes=['', '_AVG10'])

# Avg 3 data
data["game_date"] = pd.to_datetime(data.game_date)
data.set_index('game_date', inplace=True)
data.sort_index(inplace=True)
df_rolling = data.groupby(['player']).rolling(3).mean().rename(columns={'season':'season1', 'player':'player1'}).reset_index()
data = data.reset_index()
df_rolling = df_rolling.drop(columns=['player'])
df_rolling = df_rolling.rename(columns = {'season1': 'season', 'player1': 'player'})
df = pd.merge(data, df_rolling, on=['player', 'season', 'game_date'], left_index= True , suffixes=['', '_AVG3'])

# Drop useless features
df = df.drop(columns=['team_AVG10','opponent_AVG10','venue_AVG10','rest_AVG10',
                      'avg_threes_AVG10','avg_reb_AVG10','avg_ast_AVG10','avg_stl_AVG10',
                      'avg_blk_AVG10','avg_tov_AVG10','avg_pts_AVG10','avg_points_vs_opp_AVG10',
                      'team_pace_AVG10','team_ast_AVG10','team_tov_AVG10','team_reb_rate_AVG10',
                      'team_defeff_AVG10','opp_pace_AVG10','opp_ast_AVG10','opp_tov_AVG10',
                      'opp_reb_rate_AVG10','opp_offeff_AVG10','salary_AVG10',
                      'team_AVG3', 'opponent_AVG3', 'venue_AVG3','rest_AVG3',
                      'avg_threes_AVG3', 'avg_reb_AVG3','avg_ast_AVG3', 'avg_stl_AVG3', 
                      'avg_blk_AVG3', 'avg_tov_AVG3','avg_pts_AVG3', 
                      'avg_points_vs_opp_AVG3', 'team_pace_AVG3','team_ast_AVG3', 
                      'team_tov_AVG3', 'team_reb_rate_AVG3','team_defeff_AVG3', 
                      'opp_pace_AVG3','opp_ast_AVG3', 'opp_tov_AVG3', 'opp_reb_rate_AVG3',
                      'opp_offeff_AVG3', 'opp_pos_avg_AVG3','salary_AVG3',
                      'team_AVG10_AVG3','opponent_AVG10_AVG3', 'venue_AVG10_AVG3', 
                      'minutes_AVG10_AVG3','usage_rate_AVG10_AVG3', 'rest_AVG10_AVG3',
                      'avg_threes_AVG10_AVG3', 'avg_reb_AVG10_AVG3',
                      'avg_ast_AVG10_AVG3', 'avg_stl_AVG10_AVG3', 'avg_blk_AVG10_AVG3',
                      'avg_tov_AVG10_AVG3', 'avg_pts_AVG10_AVG3',
                      'avg_points_vs_opp_AVG10_AVG3', 'team_pace_AVG10_AVG3',
                      'team_ast_AVG10_AVG3', 'team_tov_AVG10_AVG3',
                      'team_reb_rate_AVG10_AVG3', 'team_offeff_AVG10_AVG3',
                      'team_defeff_AVG10_AVG3', 'opp_pace_AVG10_AVG3',
                      'opp_ast_AVG10_AVG3', 'opp_tov_AVG10_AVG3',
                      'opp_reb_rate_AVG10_AVG3', 'opp_offeff_AVG10_AVG3',
                      'opp_defeff_AVG10_AVG3', 'opp_pos_avg_AVG10_AVG3',
                      'salary_AVG10_AVG3', 'fantasy_points_AVG10_AVG3', 'position_AVG10_AVG3', 'position_AVG3', 'position_AVG10'])

# Fill NAs
df['fantasy_points_AVG3'] = df['fantasy_points_AVG3'].fillna(df.groupby(['player', 'season'])['fantasy_points'].transform('mean'))
df['opp_defeff_AVG3'] = df['opp_defeff_AVG3'].fillna(df.groupby(['opponent', 'season'])['opp_defeff'].transform('mean'))
df['team_offeff_AVG3'] = df['team_offeff_AVG3'].fillna(df.groupby(['team', 'season'])['team_offeff'].transform('mean'))
df['usage_rate_AVG3'] = df['usage_rate_AVG3'].fillna(df.groupby(['player', 'season'])['usage_rate'].transform('mean'))
df['fantasy_points_AVG10'] = df['fantasy_points_AVG10'].fillna(df.groupby(['player', 'season'])['fantasy_points'].transform('mean'))
df['minutes_AVG3'] = df['minutes_AVG3'].fillna(df.groupby(['player', 'season'])['minutes'].transform('mean'))
df['opp_pos_avg_AVG10'] = df['opp_pos_avg_AVG10'].fillna(df.groupby(['opponent', 'season'])['opp_pos_avg'].transform('mean'))
df['opp_defeff_AVG10'] = df['opp_defeff_AVG10'].fillna(df.groupby(['opponent', 'season'])['opp_defeff'].transform('mean'))
df['team_offeff_AVG10'] = df['team_offeff_AVG10'].fillna(df.groupby(['team', 'season'])['team_offeff'].transform('mean'))
df['usage_rate_AVG10'] = df['usage_rate_AVG10'].fillna(df.groupby(['player', 'season'])['usage_rate'].transform('mean'))
df['minutes_AVG10'] = df['minutes_AVG10'].fillna(df.groupby(['player', 'season'])['minutes'].transform('mean'))



players = df['player']
position = df['position']
salary = df['salary']

#Drop non predictive columns 
df2 = df.drop(columns=['game_date', 'minutes', 'usage_rate', 'position', 'season', 'player', 'team', 'opponent',
                    'salary'])
# Encode Dummies
df2 = pd.get_dummies(df2, columns=['venue', 'rest'], drop_first=True)

# Grab Target Variable and remove it from data.
y = df2['fantasy_points']
X = df2.drop(columns = ['fantasy_points'])


# #Scale data
# from sklearn.preprocessing import StandardScaler
# sc_X = StandardScaler()
# X_train = sc_X.fit_transform(X)

# Make Predictions with Linear Regression Model

loaded_model = pickle.load(open('linreg_model.sav', 'rb'))
pred = loaded_model.predict(X)

# Create Table with predictions

table = pd.DataFrame({'Player': players, 'Position':position, 'Salary': salary, 'Projected_Points':pred})

# table.to_csv('/Users/sauce/Desktop/DraftKings/ready_data/pred.csv',
#                         header=True,
#                         sep=',')

In [8]:
table.head()

Unnamed: 0,Player,Position,Projected_Points,Salary
109,Mock9,PG,7.881476,3300
101,Mock0,SG,37.11827,6400
102,Mock1,PF/C,38.034045,7100
103,Mock2,PG,45.024782,9300
108,Mock8,PF/C,66.805986,11700


In [9]:
df = table.reset_index()
df = df.iloc[9:]

In [10]:
df.head()

Unnamed: 0,index,Player,Position,Projected_Points,Salary
9,0,Aaron Brooks,PG,10.822813,3000
10,17,Chris Paul,PG,53.227731,8900
11,49,Jabari Parker,SF/PF,33.17052,6200
12,100,Mirza Teletovic,PF,8.459579,3200
13,1,Al Jefferson,C,11.880645,3400


### Optimize Linup

Code borrowed from https://github.com/coaltunbey/nba-lineup-optimization/blob/master/main.ipynb

In [92]:
import pulp

In [94]:
from pulp import *

In [95]:
multiples = df[(df['Position'].str.contains('/') == True) | (df['Position'].str.contains('-') == True)]

In [96]:
multiples['Position'] = multiples['Position'].str.replace('-', ',')
multiples['Position'] = multiples['Position'].str.replace('/', ',')
multiples['Position'] = multiples['Position'].str.split(',')

multiples.head()

Unnamed: 0,index,Player,Position,Projected_Points,Salary
11,49,Jabari Parker,"[SF, PF]",33.171,6200
20,15,CJ McCollum,"[PG, SG]",27.949,7300
23,133,Sam Dekker,"[SF, PF]",11.992,3900
31,34,Dwight Powell,"[PF, C]",17.67,4800
35,4,Allen Crabbe,"[SG, SF]",15.515,3800


In [97]:
pd.set_option('display.float_format', lambda x: '{:.3f}'.format(x))

In [98]:
# Create columns for each position
multiples_dummies = pd.get_dummies(multiples['Position'].apply(pd.Series).stack()).sum(level=0)

# Merge it to multiples data
multiples = pd.concat([multiples, multiples_dummies], axis=1)

# Remove unnecessary column
del multiples['Position']

multiples.head()

Unnamed: 0,index,Player,Projected_Points,Salary,C,PF,PG,SF,SG
11,49,Jabari Parker,33.171,6200,0,1,0,1,0
20,15,CJ McCollum,27.949,7300,0,0,1,0,1
23,133,Sam Dekker,11.992,3900,0,1,0,1,0
31,34,Dwight Powell,17.67,4800,1,1,0,0,0
35,4,Allen Crabbe,15.515,3800,0,0,0,1,1


In [99]:
def multiple_position_handler(x):
    if 'F' in x:
        if(x['F'] == 1):
            x['PF'] = 1
            x['SF'] = 1

    if 'G' in x:
        if(x['G'] == 1):
            x['SG'] = 1
            x['PG'] = 1
        
    return x

In [100]:
multiples = multiples.apply(lambda x : multiple_position_handler(x), axis=1)

In [101]:
# Remove unnecessary columns
if 'F' in multiples:
    del multiples['F']

if 'G' in multiples:
    del multiples['G']

In [102]:
multiples.head()

Unnamed: 0,index,Player,Projected_Points,Salary,C,PF,PG,SF,SG
11,49,Jabari Parker,33.171,6200,0,1,0,1,0
20,15,CJ McCollum,27.949,7300,0,0,1,0,1
23,133,Sam Dekker,11.992,3900,0,1,0,1,0
31,34,Dwight Powell,17.67,4800,1,1,0,0,0
35,4,Allen Crabbe,15.515,3800,0,0,0,1,1


In [103]:
not_multiples = df[(df['Position'].str.contains('/') == False) & (df['Position'].str.contains('-') == False)]

not_multiples.head()

Unnamed: 0,index,Player,Position,Projected_Points,Salary
9,0,Aaron Brooks,PG,10.823,3000
10,17,Chris Paul,PG,53.228,8900
12,100,Mirza Teletovic,PF,8.46,3200
13,1,Al Jefferson,C,11.881,3400
14,31,Deron Williams,PG,38.355,5700


In [104]:
not_multiples = pd.get_dummies(not_multiples, prefix=['Postion'], columns=['Position'])

not_multiples.head()

Unnamed: 0,index,Player,Projected_Points,Salary,Postion_C,Postion_PF,Postion_PG,Postion_SF,Postion_SG
9,0,Aaron Brooks,10.823,3000,0,0,1,0,0
10,17,Chris Paul,53.228,8900,0,0,1,0,0
12,100,Mirza Teletovic,8.46,3200,0,1,0,0,0
13,1,Al Jefferson,11.881,3400,1,0,0,0,0
14,31,Deron Williams,38.355,5700,0,0,1,0,0


In [105]:
def not_multiple_position_handler(x):
    if 'Position_F' in x:
        if(x['Position_F'] == 1):
            x['Position_PF'] = 1
            x['Position_SF'] = 1
            
    if 'Position_G' in x:
        if(x['Position_G'] == 1):
            x['Position_SG'] = 1
            x['Position_PG'] = 1
        
    return x

In [106]:
not_multiples = not_multiples.apply(lambda x : not_multiple_position_handler(x), axis=1)

In [107]:
# Remove unnecessary columns
if 'Position_F' in not_multiples:
    del not_multiples['Position_F']

if 'Position_G' in not_multiples:
    del not_multiples['Position_G']

In [108]:
not_multiples.head()

Unnamed: 0,index,Player,Projected_Points,Salary,Postion_C,Postion_PF,Postion_PG,Postion_SF,Postion_SG
9,0,Aaron Brooks,10.823,3000,0,0,1,0,0
10,17,Chris Paul,53.228,8900,0,0,1,0,0
12,100,Mirza Teletovic,8.46,3200,0,1,0,0,0
13,1,Al Jefferson,11.881,3400,1,0,0,0,0
14,31,Deron Williams,38.355,5700,0,0,1,0,0


In [109]:
# Column renaming
not_multiples.columns = multiples.columns.tolist()

# Merge multiples and non_multiples
main = pd.concat([multiples, not_multiples])

main.head()

Unnamed: 0,index,Player,Projected_Points,Salary,C,PF,PG,SF,SG
11,49,Jabari Parker,33.171,6200,0,1,0,1,0
20,15,CJ McCollum,27.949,7300,0,0,1,0,1
23,133,Sam Dekker,11.992,3900,0,1,0,1,0
31,34,Dwight Powell,17.67,4800,1,1,0,0,0
35,4,Allen Crabbe,15.515,3800,0,0,0,1,1


In [115]:
# Initialize required lists for PulP package
player_ids = main['index'].astype(str)
player_salaries = main['Salary']
player_scores = main['Projected_Points']
player_c = main['C']
player_pf = main['PF']
player_pg = main['PG']
player_sf = main['SF']
player_sg = main['SG']

player_salariesx = dict(zip(player_ids, player_salaries))
player_scoresx = dict(zip(player_ids, player_scores))

player_cx = dict(zip(player_ids, player_c))
player_pfx = dict(zip(player_ids, player_pf))
player_pgx = dict(zip(player_ids, player_pg))
player_sfx = dict(zip(player_ids, player_sf))
player_sgx = dict(zip(player_ids, player_sg))

player_ids = main['index'].astype(str).tolist()

In [116]:
W = 50000
maxplayer = 8
minplayer = 5

In [117]:
x = LpVariable.dicts('index', player_ids, 0, 1, LpBinary)

In [118]:
prob = LpProblem('knapsack', LpMaximize)

In [119]:
cost = lpSum([ player_scoresx[i]*x[i] for i in player_ids])
prob += cost

In [120]:
# Declare constraints

# Do not exceed $50,000
prob += lpSum([player_salariesx[i]*x[i] for i in player_ids]) <= W

# Select at least 5, at most 8 players
prob += lpSum([x[i] for i in player_ids]) <= maxplayer
prob += lpSum([x[i] for i in player_ids]) >= minplayer

# Select at least one player for each position
prob += lpSum([player_cx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_pfx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_pgx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_sfx[i]*x[i] for i in player_ids]) >= 1
prob += lpSum([player_sgx[i]*x[i] for i in player_ids]) >= 1

# Select extra players for F and G positions
prob += lpSum([player_sgx[i]*x[i] + player_pgx[i]*x[i] for i in player_ids]) == 4
prob += lpSum([player_sfx[i]*x[i] + player_pfx[i]*x[i] for i in player_ids]) == 4

In [121]:
# Solve LP
prob.solve()
print(LpStatus[prob.status])

Optimal


In [127]:
# Collect results
result = {}

for i in player_ids: 
    #print(i, value(x[i]))
    result[float(i)] = value(x[i])
        
squad = []

for i,k in result.items():
    if k == 1:
        squad.append(i)

In [128]:
df[df['index'].isin(squad)]

Unnamed: 0,index,Player,Position,Projected_Points,Salary
14,31,Deron Williams,PG,38.355,5700
38,92,Mason Plumlee,C,43.921,5500
57,138,Terrence Ross,SG/SF,34.219,3700
62,29,DeMarcus Cousins,PF/C,69.372,10900
80,68,Josh McRoberts,PF/C,29.261,3900
87,55,James Harden,PG/SG,68.237,11400
128,33,Dorian Finney-Smith,SF,33.628,3100
144,115,Nikola Jokic,C,50.576,5800


In [None]:
print(value(prob.objective))
print(sum([ player_salariesx[i]*value(x[i]) for i in player_ids]))