### DK Salaries Optimizer

In [1]:
import pandas as pd
import numpy as np
from pulp import *

In [2]:
dk_data = pd.read_csv("data/dksalaries_mlb.csv")

# Get the fields that are useful
dk_data = dk_data[['Name', 'Roster Position', 'Salary', 'TeamAbbrev', 'AvgPointsPerGame']]

dk_data.head()

Unnamed: 0,Name,Roster Position,Salary,TeamAbbrev,AvgPointsPerGame
0,Yu Darvish,P,11000,CHC,26.25
1,Shane Bieber,P,11000,CLE,33.76
2,Max Scherzer,P,11000,WAS,20.84
3,Chris Sale,P,10800,BOS,0.0
4,Eduardo Rodriguez,P,10500,BOS,0.0


In [51]:
injured_players = ["Darren O'Day", "Travis d'Arnaud", "Ryan O'Hearn", "Peter O'Brien", "Brian O'Grady",
                  "Ke'Bryan Hayes", "Logan O'Hoppe"]

In [52]:
dk_data = dk_data[~dk_data['Name'].isin(injured_players)]

In [53]:
# Roster Positions for NBA 
mlb_roster_positions = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']

SALARY_CAP = 50000
MAX_PLAYERS = 10


def make_vars(name, position, roster_position):
    if position in roster_position:
        return LpVariable(name + '_' + position,lowBound=0, cat='Binary')
    return 0

for position in mlb_roster_positions:
    dk_data[position] = dk_data.apply(lambda x: make_vars(x['Name'],position, x['Roster Position']),axis=1)

    #[1 if position in x else 0 for x in dk_data['Roster Position']]

In [54]:
prob = LpProblem("OptimizeDK",LpMaximize)

In [55]:
names = dk_data['Name']
salaries = dk_data['Salary']
projected_points = dk_data['AvgPointsPerGame']

In [63]:
dk_data.tail(30)

Unnamed: 0,Name,Roster Position,Salary,TeamAbbrev,AvgPointsPerGame,P,C,1B,2B,3B,SS,OF
1220,Nick Ciuffo,C,2000,TEX,0.0,0,Nick_Ciuffo_C,0,0,0,0,0
1221,Sam Huff,C,2000,TEX,0.0,0,Sam_Huff_C,0,0,0,0,0
1222,Adolis Garcia,OF,2000,TEX,0.67,0,0,0,0,0,0,Adolis_Garcia_OF
1223,Yadiel Rivera,2B/OF,2000,TEX,1.67,0,0,0,Yadiel_Rivera_2B,0,0,Yadiel_Rivera_OF
1224,Sherten Apostel,3B,2000,TEX,0.0,0,0,0,0,Sherten_Apostel_3B,0,0
1225,Rob Refsnyder,1B/OF,2000,TEX,2.4,0,0,Rob_Refsnyder_1B,0,0,0,Rob_Refsnyder_OF
1226,Scott Heineman,OF,2000,TEX,3.71,0,0,0,0,0,0,Scott_Heineman_OF
1227,Ronald Guzman,1B,2000,TEX,9.0,0,0,Ronald_Guzman_1B,0,0,0,0
1228,Scott Kingery,2B/OF,2000,PHI,2.91,0,0,0,Scott_Kingery_2B,0,0,Scott_Kingery_OF
1229,Victor Robles,OF,2000,WAS,5.41,0,0,0,0,0,0,Victor_Robles_OF


In [57]:
print(dk_data['Name'])

0              Yu Darvish
1            Shane Bieber
2            Max Scherzer
3              Chris Sale
4       Eduardo Rodriguez
5           Lucas Giolito
6              Aaron Nola
7         Carlos Carrasco
8          Marcus Stroman
9        Mike Foltynewicz
10           Zack Wheeler
11         Walker Buehler
12        Clayton Kershaw
13           Jacob deGrom
14           Corey Kluber
15         Patrick Corbin
16            Gerrit Cole
17      Stephen Strasburg
18           Zack Greinke
19            Julio Urias
20             Zac Gallen
21            Kenta Maeda
22            Mike Soroka
23        Cristian Javier
24             Jon Lester
25           Aaron Civale
26            Blake Snell
27         Kyle Hendricks
28              Max Fried
29             Lance Lynn
              ...        
1220          Nick Ciuffo
1221             Sam Huff
1222        Adolis Garcia
1223        Yadiel Rivera
1224      Sherten Apostel
1225        Rob Refsnyder
1226       Scott Heineman
1227        

In [58]:
# The objective function is added to 'prob' first
# OBJ FUNCTION
prob += lpSum([dk_data[i] * projected_points for i in mlb_roster_positions]), 'Projected Points'

In [59]:
# CONSTRAINTS

# Stay under salary cap
prob += lpSum(salaries * dk_data[i] for i in mlb_roster_positions) <= SALARY_CAP

# Stay under Num Players
prob += lpSum(dk_data[i] for i in mlb_roster_positions) <= MAX_PLAYERS

# Select one of each position
prob += lpSum([dk_data['P']]) >= 2
prob += lpSum([dk_data['C']]) >= 1
prob += lpSum([dk_data['1B']]) >= 1
prob += lpSum([dk_data['2B']]) >= 1
prob += lpSum([dk_data['3B']]) >= 1
prob += lpSum([dk_data['SS']]) >= 1
prob += lpSum([dk_data['OF']]) >= 3


# Don't use same player twice
for index, row in dk_data.iterrows():
    prob += lpSum(row[i] for i in mlb_roster_positions) <= 1


In [60]:
prob

OptimizeDK:
MAXIMIZE
7.06*AJ_Pollock_OF + 5.39*Aaron_Bummer_P + 19.34*Aaron_Civale_P + 6.31*Aaron_Hicks_OF + 11.44*Aaron_Judge_OF + 3.34*Aaron_Loup_P + 24.2*Aaron_Nola_P + 5.66*Aaron_Slegers_P + 4.43*Abraham_Toro_3B + 4.6*Adalberto_Mondesi_SS + 2.02*Adam_Cimber_P + 5.47*Adam_Duvall_OF + 7.28*Adam_Eaton_OF + 5.0*Adam_Engel_OF + 5.68*Adam_Frazier_2B + 5.68*Adam_Frazier_OF + 2.47*Adam_Haseley_OF + 4.5*Adam_Kolarek_P + 1.95*Adam_Morgan_P + 2.69*Adam_Ottavino_P + 4.88*Adam_Plutko_P + 13.25*Adbert_Alzolay_P + 1.67*Adeiny_Hechavarria_2B + 1.67*Adeiny_Hechavarria_SS + 0.67*Adolis_Garcia_OF + 10.91*Adrian_Houser_P + -0.6*Albert_Abreu_P + 1.25*Albert_Almora_Jr._OF + 6.6*Alec_Bohm_3B + 10.21*Alec_Mills_P + 2.5*Aledmys_Diaz_1B + 2.5*Aledmys_Diaz_OF + 3.89*Alex_Avila_C + 8.46*Alex_Bregman_3B + 1.75*Alex_Claudio_P + 9.81*Alex_Cobb_P + 3.46*Alex_Colome_P + 5.4*Alex_Dickerson_OF + 4.9*Alex_Gordon_OF + 1.6*Alex_Jackson_C + 8.39*Alex_Verdugo_OF + 5.15*Alex_Wood_P + 6.3*Alex_Young_P + 0.75*Ali_Sanchez_C 

In [61]:
# The problem is solved using PuLP's choice of Solver
prob.solve()

PulpSolverError: Pulp: Error while executing C:\Users\prido\Anaconda3\lib\site-packages\pulp\apis\..\solverdir\cbc\win\64\cbc.exe

In [None]:
prob.status

In [None]:
print("Total Estimated Points = ", value(prob.objective))


In [None]:
# Each of the variables is printed with it's resolved optimum value
for v in prob.variables():
    if v.varValue == 1:
        print(v.name)
        v.getName