### DK Salaries Optimizer

In [1]:
import pandas as pd
import numpy as np
from pulp import *

In [2]:
dk_data = pd.read_csv("DKSalaries_mlb.csv")

# Get the fields that are useful
dk_data = dk_data[['Name', 'Roster Position', 'Salary', 'TeamAbbrev', 'AvgPointsPerGame']]

dk_data.head()

Unnamed: 0,Name,Roster Position,Salary,TeamAbbrev,AvgPointsPerGame
0,Yu Darvish,P,11000,CHC,26.25
1,Shane Bieber,P,11000,CLE,33.76
2,Max Scherzer,P,11000,WAS,20.84
3,Chris Sale,P,10800,BOS,0.0
4,Eduardo Rodriguez,P,10500,BOS,0.0


In [3]:
# Roster Positions for NBA 
mlb_roster_positions = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']

SALARY_CAP = 50000
MAX_PLAYERS = 10

for position in mlb_roster_positions:
    dk_data[position] = [1 if position in x else 0 for x in dk_data['Roster Position']]

In [4]:
prob = LpProblem("OptimizeDK",LpMaximize)

In [5]:
names = dk_data['Name']
salaries = dk_data['Salary']
projected_points = dk_data['AvgPointsPerGame']

In [6]:
dk_data.head()

Unnamed: 0,Name,Roster Position,Salary,TeamAbbrev,AvgPointsPerGame,P,C,1B,2B,3B,SS,OF
0,Yu Darvish,P,11000,CHC,26.25,1,0,0,0,0,0,0
1,Shane Bieber,P,11000,CLE,33.76,1,0,0,0,0,0,0
2,Max Scherzer,P,11000,WAS,20.84,1,0,0,0,0,0,0
3,Chris Sale,P,10800,BOS,0.0,1,0,0,0,0,0,0
4,Eduardo Rodriguez,P,10500,BOS,0.0,1,0,0,0,0,0,0


In [7]:
players = pulp.LpVariable.dicts("player", dk_data.index, lowBound=0, cat='Binary')

In [8]:
# The objective function is added to 'prob' first
# OBJ FUNCTION
prob += sum([players[i] * projected_points[i] for i in players]), 'Projected Points'

In [None]:
# CONSTRAINTS

# Stay under salary cap
prob += lpSum([salaries[i] * players[i] for i in players]) <= SALARY_CAP

# Stay under Num Players
prob += lpSum([players[i] for i in players]) <= MAX_PLAYERS

# Select one of each position
prob += lpSum([dk_data['P'] * players[i] for i in players]) >= 2
prob += lpSum([dk_data['C'] * players[i] for i in players]) >= 1
prob += lpSum([dk_data['1B'] * players[i] for i in players]) >= 1
prob += lpSum([dk_data['2B'] * players[i] for i in players]) >= 1
prob += lpSum([dk_data['3B'] * players[i] for i in players]) >= 1
prob += lpSum([dk_data['SS'] * players[i] for i in players]) >= 1
prob += lpSum([dk_data['OF'] * players[i] for i in players]) >= 3



In [None]:
#prob

In [None]:
# The problem is solved using PuLP's choice of Solver
prob.solve()

In [None]:
# Each of the variables is printed with it's resolved optimum value
for v in prob.variables():
    if v.varValue == 1:
        print(v.name)
        v.getName

In [None]:
flow = {l:players[l].varValue for l in players}

In [None]:
output = []
for p in players:
    var_output = {
     'Player':p,
     'Selected':flow[p]
     }
    output.append(var_output)

In [None]:
dfOptResults = pd.DataFrame.from_records(output)
dfOptResults.set_index('Player', inplace=True)

data = pd.merge(dk_data, dfOptResults, how='left', left_index=True, right_index=True)

In [None]:
data[data['Selected'] == 1]