In [1]:
from os.path import expanduser
import sys
import csv
import pulp
import copy
import pandas as pd
from tqdm import tqdm
import os
import import_ipynb
import Modeling_Functions

importing Jupyter notebook from Modeling_Functions.ipynb


In [2]:
num_lineups_to_generate = 20
max_player_overlap = 4

In [3]:
class DraftKingsOptimizer:
    def __init__(self, num_lineups, overlap, solver, players_filepath, output_filepath):
        self.num_lineups = num_lineups
        self.overlap = overlap
        self.solver = solver
        self.players_df = self.load_inputs(players_filepath)
        self.positions = {'PG':[], 'SG':[], 'SF':[], 'PF':[], 'C':[]}
        self.num_players = len(self.players_df.index)
        self.output_filepath = output_filepath
        self.actuals = True if 'Points' in self.players_df else False
        self.salary_cap = 50000
        self.num_teams = None
        self.header = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'UTIL']
        
    def create_indicators(self):
        for pos in self.players_df.loc[:, 'Position']:
            for key in self.positions:
                self.positions[key].append(1 if key in pos else 0)
            
    def type_1(self, lineups):
        """ 
        Sets up the pulp LP problem, adds all of the constraints and solves for the maximum value for each generated lineup.
        Returns a single lineup (i.e all of the players either set to 0 or 1) indicating if a player was included in a lineup or not.
        """
        #define the pulp object problem
        prob = pulp.LpProblem('NBA', pulp.LpMaximize)

        #define the player and goalie variables
        lineup = [pulp.LpVariable("player_{}".format(i+1), cat="Binary") for i in range(self.num_players)]

        #add the max player constraints
        prob += (pulp.lpSum(lineup[i] for i in range(self.num_players)) == 8)

        #add the positional constraints
        prob += (1 <= pulp.lpSum(self.positions['PG'][i]*lineup[i] for i in range(self.num_players)))
        prob += (pulp.lpSum(self.positions['PG'][i]*lineup[i] for i in range(self.num_players)) <= 3)
        prob += (1 <= pulp.lpSum(self.positions['SG'][i]*lineup[i] for i in range(self.num_players)))
        prob += (pulp.lpSum(self.positions['SG'][i]*lineup[i] for i in range(self.num_players)) <= 3)
        prob += (1 <= pulp.lpSum(self.positions['SF'][i]*lineup[i] for i in range(self.num_players)))
        prob += (pulp.lpSum(self.positions['SF'][i]*lineup[i] for i in range(self.num_players)) <= 3)
        prob += (1 <= pulp.lpSum(self.positions['PF'][i]*lineup[i] for i in range(self.num_players)))
        prob += (pulp.lpSum(self.positions['PF'][i]*lineup[i] for i in range(self.num_players)) <= 3)
        prob += (1 <= pulp.lpSum(self.positions['C'][i]*lineup[i] for i in range(self.num_players)))
        prob += (pulp.lpSum(self.positions['C'][i]*lineup[i] for i in range(self.num_players)) <= 2)
        
        prob += (pulp.lpSum(self.positions['PG'][i]*lineup[i]+self.positions['SG'][i]*lineup[i]+
                            self.positions['C'][i]*lineup[i] for i in range(self.num_players)) <= 5)
        
        prob += (pulp.lpSum(self.positions['PF'][i]*lineup[i]+self.positions['SF'][i]*lineup[i]+
                            self.positions['C'][i]*lineup[i] for i in range(self.num_players)) <= 5)
        
        #add the salary constraint
        prob += (pulp.lpSum(self.players_df.loc[i, 'Salary']*lineup[i] for i in range(self.num_players))<= self.salary_cap)

        #variance constraints - each lineup can't have more than the num overlap of any combination of players in any previous lineups
        for i in range(len(lineups)):
            prob += (pulp.lpSum(lineups[i][k]*lineup[k] for k in range(self.num_players)) <= self.overlap)

        #add the objective
        prob += pulp.lpSum(self.players_df.loc[i, 'Predicted Points']*lineup[i] for i in range(self.num_players))
        #solve the problem
        status = prob.solve(self.solver)

        #check if the optimizer found an optimal solution
        if status != pulp.LpStatusOptimal:
            print('Only {} feasible lineups produced'.format(len(lineups)), '\n')
            return None

        # Puts the output of one lineup into a format that will be used later
        lineup_copy = []
        for i in range(self.num_players):
            if lineup[i].varValue >= 0.9 and lineup[i].varValue <= 1.1:
                lineup_copy.append(1)
            else:
                lineup_copy.append(0)
        return lineup_copy
    
    def load_inputs(self, filepath):
        """
        Returns the loaded data from the user filepath into a pandas dataframe.
        """
        try:
            data = pd.read_csv(filepath)
        except IOError:
            sys.exit('INVALID FILEPATH: {}'.format(filepath))
        return data

    def save_file(self, header, filled_lineups):
        """
        Save the filled lineups to CSV.
        If show_proj is True the file will be saved with the projections
            and possibly the actual fantasy points if they exist.
        """
        #Remove the projections and actuals if they exist to get lineups ready to upload to DK or FD
        header_copy = copy.deepcopy(header)
        output_projection_path = self.output_filepath.split('.')[0] + '.csv'
        if self.actuals:
            lineups_for_upload = [lineup[:-2] for lineup in filled_lineups]
            header_copy.extend(('Projected Points', 'Points'))
        else:
            lineups_for_upload = [lineup[:-1] for lineup in filled_lineups]
            header_copy.extend(('Projected Points'))
        with open(output_projection_path, 'w') as f:
                writer = csv.writer(f)
                writer.writerow(header_copy)
                writer.writerows(filled_lineups)
        print("Saved lineups with projection to: {}".format(output_projection_path))
        return

    def generate_lineups(self, formula):
        lineups = []
        for _ in tqdm(range(self.num_lineups)):
            lineup = formula(lineups)
            if lineup:
                lineups.append(lineup)
            else:
                break
        return lineups
    
    def fill_lineups(self, lineups):
        """ 
        Takes in the lineups with 1's and 0's indicating if the player is used in a lineup.
        Matches the player in the dataframe and replaces the value with their name.
        """
        filled_lineups = []
        for lineup in lineups:
            a_lineup = ["", "", "", "", "", "", "", ""]
            players_lineup = lineup[:self.num_players]
            total_proj = 0
            if self.actuals:
                total_actual = 0
            for num, player in enumerate(players_lineup):
                if player > 0.9 and player < 1.1:
                    if self.positions['PG'][num] == 1:
                        if a_lineup[0] == "":
                            a_lineup[0] = self.players_df.loc[num, 'Name']
                        elif a_lineup[5] == "":
                            a_lineup[5] = self.players_df.loc[num, 'Name']
                        elif a_lineup[7] == "":
                            a_lineup[7] = self.players_df.loc[num, 'Name']
                    elif self.positions['SG'][num] == 1:
                        if a_lineup[1] == "":
                            a_lineup[1] = self.players_df.loc[num, 'Name']
                        elif a_lineup[5] == "":
                            a_lineup[5] = self.players_df.loc[num, 'Name']
                        elif a_lineup[7] == "":
                            a_lineup[7] = self.players_df.loc[num, 'Name']
                    elif self.positions['SF'][num] == 1:
                        if a_lineup[2] == "":
                            a_lineup[2] = self.players_df.loc[num, 'Name']
                        elif a_lineup[6] == "":
                            a_lineup[6] = self.players_df.loc[num, 'Name']
                        elif a_lineup[7] == "":
                            a_lineup[7] = self.players_df.loc[num, 'Name']
                    elif self.positions['PF'][num] == 1:
                        if a_lineup[3] == "":
                            a_lineup[3] = self.players_df.loc[num, 'Name']
                        elif a_lineup[6] == "":
                            a_lineup[6] = self.players_df.loc[num, 'Name']
                        elif a_lineup[7] == "":
                            a_lineup[7] = self.players_df.loc[num, 'Name']
                    elif self.positions['C'][num] == 1:
                        if a_lineup[4] == "":
                            a_lineup[4] = self.players_df.loc[num, 'Name']
                        elif a_lineup[7] == "":
                            a_lineup[7] = self.players_df.loc[num, 'Name']
                    total_proj += self.players_df.loc[num, 'Predicted Points']
                    if self.actuals:
                        total_actual += self.players_df.loc[num, 'Points']
            a_lineup.append(round(total_proj, 2))
            if self.actuals:
                a_lineup.append(round(total_actual, 2))
            filled_lineups.append(a_lineup)
        return filled_lineups

In [4]:
df = pd.read_csv('Predictions.csv')
if True:
    #enter the parameters
    optimizer = DraftKingsOptimizer(num_lineups=num_lineups_to_generate,
                       overlap=max_player_overlap,
                       solver=pulp.CPLEX_PY(msg=0),
                       players_filepath = os.getcwd()+("/Predictions.csv"),
                       output_filepath = os.path.expanduser("~/draftkings-nba-model/DraftKingsLineup.csv"))
    optimizer.create_indicators()
    #generate the lineups with the formula and the indicators
    lineups = optimizer.generate_lineups(formula=optimizer.type_1)
    #fill the lineups with player names - send in the positions indicator
    filled_lineups = optimizer.fill_lineups(lineups)
    #save the lineups
    optimizer.save_file(optimizer.header, filled_lineups)
    


  0%|          | 0/20 [00:00<?, ?it/s]

  5%|▌         | 1/20 [00:00<00:03,  4.97it/s]

 10%|█         | 2/20 [00:00<00:03,  5.46it/s]

 15%|█▌        | 3/20 [00:00<00:03,  5.33it/s]

 20%|██        | 4/20 [00:00<00:03,  4.37it/s]

 25%|██▌       | 5/20 [00:01<00:03,  4.30it/s]

 35%|███▌      | 7/20 [00:01<00:02,  5.11it/s]

 40%|████      | 8/20 [00:01<00:02,  4.23it/s]

 45%|████▌     | 9/20 [00:02<00:03,  3.61it/s]

 50%|█████     | 10/20 [00:02<00:02,  3.60it/s]

 55%|█████▌    | 11/20 [00:02<00:02,  3.44it/s]

 60%|██████    | 12/20 [00:03<00:02,  3.09it/s]

 65%|██████▌   | 13/20 [00:03<00:02,  3.43it/s]

 70%|███████   | 14/20 [00:03<00:01,  3.07it/s]

 75%|███████▌  | 15/20 [00:04<00:01,  2.74it/s]

 80%|████████  | 16/20 [00:04<00:01,  2.72it/s]

 85%|████████▌ | 17/20 [00:04<00:01,  2.74it/s]

 90%|█████████ | 18/20 [00:05<00:00,  2.89it/s]

 95%|█████████▌| 19/20 [00:05<00:00,  2.94it/s]

100%|██████████| 20/20 [00:05<00:00,  2.72it/s]

100%|██████████| 20/20 [00:05<00:00,  3.39it/s]

Saved lineups with projection to: /Users/ethanariowitsch/draftkings-nba-model/DraftKingsLineup.csv



