All dependencies should be imported here

In [1]:
import time
import pandas as pd
import numpy as np
import re as re
from pulp import *
import os
import copy
import icecream
from contest_lineup_defs import DEFINITIONS




ModuleNotFoundError: No module named 'contest_lineup_defs'

In [34]:
DEFINITIONS['DRAFTKINGS']['NFL']['CLASSIC']['LINEUP_RULE']

lineup.Lineup_Rule

Global constants defined here. Generally this will be site-specific rules regarding what is allowed in particularly lineups

In [None]:
FANDUEL_CONTEST_TYPES={'NFL':['SINGLE_GAME', 'FULL_ROSTER'], 'NBA':['SINGLE_GAME', 'FULL_ROSTER'], 'MLB':['SINGLE_GAME', 'FULL_ROSTER'], 'NHL':['SINGLE_GAME', 'FULL_ROSTER'], 'NASCAR':['SINGLE_GAME', 'FULL_ROSTER'], 'MMA':['SINGLE_GAME', 'FULL_ROSTER'], 'CFB':['SINGLE_GAME', 'FULL_ROSTER']}
DRAFTKINGS_CONTEST_TYPES={'NFL':['CLASSIC', 'SHOWDOWN'], 'NBA':['CLASSIC', 'SHOWDOWN'], 'MLB':['CLASSIC', 'SHOWDOWN'], 'NHL':['CLASSIC', 'SHOWDOWN'], 'NASCAR':['CLASSIC', 'SHOWDOWN'], 'MMA':['CLASSIC', 'SHOWDOWN'], 'CFB':['CLASSIC', 'SHOWDOWN']}



In [None]:
class ContestType():
    def __init__(self, site, sport, contest_type):
        self.site=site
        self.sport=sport
        
        self.contest_type=contest_type


Define the base object of our module, the 'slate_player' object. This object is more elementary than a 'player' object because we want the flexibility to create a slate_player object with no existing data on the given player. This might be important in the case there is simply a new last-minute player we don't have existing data on and perhaps don't have time to "set up" the player in the back-end.

In [22]:
class Slate_Player():
    def __init__(self, name, slate_name_id, position, var_name, game_time, base_proj, percent_own, stdev):
        self.name=name
        self.slate_name_id = slate_name_id
        self.position = position
        self.var_name = var_name
        self.game_time = game_time
        self.base_proj = base_proj
        self.percent_own = percent_own
        self.stdev=stdev
        self._sim_score=0

    def __str__(self):
        return self.slate_name_id
    
    def __repr__(self):
        return self.slate_name_id
    
    def __eq__(self, other):
        if isinstance(other, Slate_Player):
            return self.slate_name_id==other.slate_name_id
        return False
    
    def simulate_score(self):
        #This method will not invoke correlation since only one player is in scope
        return np.random.normal(self.base_proj, self.stdev)



In [23]:
class Lineup_Rule():
    def __init__(self, position_ordered_list: list[str], flex_definitions: dict, salary_cap: int):
        self.lineup_order=position_ordered_list
        self.flex_definitions = flex_definitions
        self.salary_cap = salary_cap
        self.total_players = len(position_ordered_list)
        players_per_posn={}
        for posn in self.lineup_order:
            if posn not in players_per_posn.keys():
                players_per_posn[posn] = 1
            else:
                players_per_posn[posn] = players_per_posn[posn] + 1
        self.players_per_posn = players_per_posn
        #Determine min and max native positions, just need to determine how many flex positions each native position is eligible for
        #First, determine which positions in players_per_posn are actually flex. Create a new dict without this flex, only native
        native_dict=copy.deepcopy(self.players_per_posn)
        flex_pos=[]
        for k, v in native_dict.items():
            if k in flex_definitions.keys():
                flex_pos.append(k)
        
        for key in flex_pos:
            native_dict.pop(key)

        self.position_min_dict=native_dict
        max_dict=copy.deepcopy(native_dict)
        for k, v in max_dict.items():
            for k_, val in flex_definitions.items():
                if k in val:
                    max_dict[k] = max_dict[k] + 1
        self.position_max_dict=max_dict


    def __str__(self):
        return str(self.lineup_order)

    def __repr__(self):
        return str(self.lineup_order)

In [24]:

class Lineup():
    def __init__(self, players: list[Slate_Player], lineup_rule: Lineup_Rule):
        self.players = players#A list of slate_player objects
        base_proj=0
        sum_ownership=0
        for player in self.players:
            base_proj+=player.base_proj
            sum_ownership+=player.percent_own
        self.base_proj = base_proj
        self.sum_ownership = sum_ownership
        self.lineup_rule = lineup_rule
        self.lineup_sim_score=0

    def __str__(self):
        return str(self.players)
    
    def __repr__(self):
        return str(self.players)
    
    def __eq__(self, other):
        if isinstance(other, Lineup):
            for player in other.players:
                if player not in self.players:
                    return False
            return True
        return False
        

    def calc_simulated_score(self):
        sim_score=0
        for player in self.players:
            sim_score+=player._sim_score
        self.lineup_sim_score=sim_score
        

    def sort_player_list(self):
        position_output_order = self.lineup_rule.lineup_order
        flex_definitions = self.lineup_rule.flex_definitions
        ordered_player_list=[]
        player_dict_by_pos={}
        for player in self.players:
            if player.position in player_dict_by_pos.keys():
                player_dict_by_pos[player.position].append(player)
            else:
                player_dict_by_pos[player.position]=[player]
        for pos in position_output_order:
            if pos not in player_dict_by_pos.keys():
                for pos_ in flex_definitions[pos]:
                    if len(player_dict_by_pos[pos_])>0:
                        ordered_player_list.append(player_dict_by_pos[pos_].pop(-1))
                        break
            else:
                ordered_player_list.append(player_dict_by_pos[pos].pop(-1))
        self.players = ordered_player_list



In [25]:

class Ship():
    def __init__(self, slate: Slate, n=10, *args, **kwargs):
        pass






In [26]:
class Slate():
    def __init__(self, slate_df: pd.DataFrame, slate_lineup_rule: Lineup_Rule):
        required_cols = ['position', 'name + id', 'name', 'id', 'roster position', 'salary', 'game info', 'teamabbrev', 'proj', 'ownership', 'opponent']
        slate_total_players = slate_df.shape[0]
        #make everything lower case internally so that case doesn't matter going fwd
        df_cols = slate_df.columns.to_list()
        for i in range(len(df_cols)):
            df_cols[i]=df_cols[i].lower()
        slate_df.columns = df_cols
        missing_cols=[]
        for col in required_cols:
            if col.lower() not in df_cols:
                missing_cols.append(col)
        if len(missing_cols)>0:
            raise ValueError('Missing the following required columns: ' + str(missing_cols))
        else:
            print('DataFrame has all required columns, new Slate object successfully completed')

        #initialize object data
        self.slate_df = slate_df
        self.slate_lineup_rule = slate_lineup_rule
        #initalize Numpy arrays for faster calculation. Numpy arrays are faster than Pandas series objects
        self.name_array = np.array(self.slate_df['name'])
        self.id_array = np.array(self.slate_df['id'])
        self.name_id_array = np.array(self.slate_df['name + id'])
        self.position_array = np.array(self.slate_df['position'])
        self.roster_position_array = np.array(self.slate_df['roster position'])
        self.salary_array = np.array(self.slate_df['salary'])
        self.game_info_array = np.array(self.slate_df['game info'])
        self.teamabbrev_array = np.array(self.slate_df['teamabbrev'])
        self.proj_array = np.array(self.slate_df['proj'])
        self.ownership_array = np.array(self.slate_df['ownership'])
        self.opponent_array = np.array(self.slate_df['opponent'])
        if 'stdev' not in self.slate_df.columns.to_list():
            self.stdev_array = np.zeros(self.proj_array.shape)
            self.slate_df['stdev'] = self.stdev_array
        else:
            self.stdev_array = np.array(self.slate_df['stdev'])
        
        #initialize arrays for just the optimizer's use, so that you dont lose original data about projection, etc
        self._opto_proj_array = copy.deepcopy(self.proj_array)
        self._opto_stdev_array = copy.deepcopy(self.stdev_array)
        self._opto_gametime_array = copy.deepcopy(self.game_info_array)
        
        #There has to be a better way to do this
        var_names=[]
        for i in range(slate_total_players):
            var_names.append(str(self.position_array[i]) + '_' + str(self.id_array[i]))
        self._opto_var_names = np.array(var_names)
        
        #Initialize a list of slate_player objects so that later work is faster and easier
        slate_player_list = []
        for i in range(slate_total_players):
            slate_player_list.append(Slate_Player(name=self.name_array[i], slate_name_id=self.name_id_array[i], position=self.position_array[i], var_name=self._opto_var_names[i], game_time=self._opto_gametime_array[i], base_proj=self.proj_array[i], percent_own=self.ownership_array[i], stdev=self.stdev_array[i]))
        self.slate_player_array = np.array(copy.deepcopy(slate_player_list))


    def __str__(self):
        return str(self.slate_df)
    
    def __repr__(self):
        return str(self.slate_df)
    
    def configure_optimization(self):
        #Do all the optimizer stuff that doesn't need to be repeated every time you call the solver
        self._optimizer_LpProblem = LpProblem('maximize', LpMaximize)
        self._optimizer_salary_cap = self.slate_lineup_rule.salary_cap
        self._optimizer_total_players = self.slate_lineup_rule.total_players
        self._optimizer_players_per_position = self.slate_lineup_rule.players_per_posn
        self._optimizer_flex_eligibility = self.slate_lineup_rule.flex_definitions


        #NEED TO FIX THIS FOR THE NEW DATA FORMAT
        #Here we convert lineup rules into something the LpSolver will understand
        self._optimizer_position_constraint_equal_to = {k:v for k, v in self._optimizer_players_per_position.items() if (k not in self._optimizer_flex_eligibility['FLEX'] and k!='FLEX')}
        if 'FLEX' in self._optimizer_players_per_position.keys():
            self._optimizer_position_constraint_greater_than = {k:v for k, v in self._optimizer_players_per_position.items() if (k in self._optimizer_flex_eligibility['FLEX'] and k!='FLEX')}
            self._optimizer_position_constraint_less_than = {k:v+1 for k, v in self._optimizer_players_per_position.items() if (k in self._optimizer_flex_eligibility['FLEX'] and k!='FLEX')}

        #local variable for creating dictionaries by position
        availables = self.slate_df.groupby(['position','id','name + id','proj','salary']).agg('count').reset_index()
        salaries_by_position = {}
        projections_by_position = {}
        #Create a dictionaries by position for both salary and projection
        for pos in availables.position.unique():
            available_by_position = availables[availables.position==pos]
            salaries_by_position[pos] = list(available_by_position[['id', 'salary']].set_index('id').to_dict().values())[0]
            projections_by_position[pos] = list(available_by_position[['id', 'proj']].set_index('id').to_dict().values())[0]

        _vars = {k: LpVariable.dict(k, v, cat='Binary') for k, v in projections_by_position.items()}#Changed v from a proj to an LpVariable
        rewards=[]
        costs=[]
        for k, v in _vars.items():
            costs += lpSum([salaries_by_position[k][i] * _vars[k][i] for i in v])
            rewards += lpSum([projections_by_position[k][i] * _vars[k][i] for i in v])
            if k in self._optimizer_position_constraint_equal_to:
                self._optimizer_LpProblem += lpSum([_vars[k][i] for i in v]) == self._optimizer_position_constraint_equal_to[k]
            if k in self._optimizer_position_constraint_less_than:
                self._optimizer_LpProblem += lpSum([_vars[k][i] for i in v]) <= self._optimizer_position_constraint_less_than[k]
            if k in self._optimizer_position_constraint_greater_than:
                self._optimizer_LpProblem += lpSum([_vars[k][i] for i in v]) >= self._optimizer_position_constraint_greater_than[k]

        total_players_constraint = lpSum([v.values() for v in _vars.values()]) == self._optimizer_total_players
        self._optimizer_LpProblem += total_players_constraint
        salary_cap_constraint = lpSum(costs) <= self._optimizer_salary_cap
        self._optimizer_LpProblem += salary_cap_constraint
        objective = lpSum(rewards)#no constraint so will be treated like the objective 
        self._optimizer_LpProblem += objective
        
        #Create a numpy array of optimization binary variables so it's easy to reset the objective quickly
        opto_bin_var=[]
        for i in range(self.id_array.shape[0]):
            var=_vars[self.position_array[i]][self.id_array[i]]
            opto_bin_var.append(var)
        self._opto_bin_var=np.array(opto_bin_var)

    def reset_optimizer_objective(self):
        new_objective=[]
        for i in range(self._opto_proj_array.shape[0]):
            new_objective+=lpSum(self._opto_bin_var[i]*self._opto_proj_array[i])
        self._optimizer_LpProblem.objective = new_objective 

    def calc_optimal_lineup(self):
        self._optimizer_LpProblem.solve()
        lineup_players=[]
        for _var in self._optimizer_LpProblem.variables():
            if _var.value()==1:
                index = np.where(self._opto_var_names==_var.name)[0][0]
                lineup_players.append(self.slate_player_array[index])
        return Lineup(lineup_players, self.slate_lineup_rule)

    def simulate_slate(self, apply_to_opto=True):
        sim = np.random.normal(self.proj_array, self.stdev_array)
        if apply_to_opto:
            self._opto_proj_array = sim
            self.reset_optimizer_objective()
        return sim#Maybe shouldnt return anything

    def calc_optimal_frequency(self, n=1000):
        opto_percent_array = np.zeros(self._opto_proj_array.shape,dtype=float)
        for sim in range(n):
            self.simulate_slate(apply_to_opto=True)
            optimal_lineup = self.calc_optimal_lineup()
            #There has to be a faster way
            for j in range(opto_percent_array.shape[0]):
                if self.slate_player_array[j] in optimal_lineup.players:
                    opto_percent_array[j] += 1
        opto_percent_array=100*opto_percent_array/n
        boom_bust_df=pd.DataFrame({'Name':self.name_array,'Name_id':self.name_id_array,'Team':self.teamabbrev_array,'Opponent':self.opponent_array,'Position':self.position_array,'MedianProj':self.proj_array,'StDev':self.stdev_array,'Optimal%':opto_percent_array,'Ownership':self.ownership_array,'Leverage':opto_percent_array-self.ownership_array})
        boom_bust_df.to_excel('BoomBustProbability.xlsx',index=False)
        return boom_bust_df
    
    def produce_simulated_opto_lineups(self, n=1000):
        lineup_list=[]
        for sim in range(n):
            self.simulate_slate(apply_to_opto=True)
            optimal_lineup=self.calc_optimal_lineup()
            optimal_lineup.sort_player_list()
            lineup_list.append(optimal_lineup)
        return lineup_list

    


In [27]:

def read_slate_file(directory_path, file_name):
    # Check if the CSV file exists
    csv_file_path = os.path.join(directory_path, file_name + '.csv')
    if os.path.exists(csv_file_path):
        return pd.read_csv(csv_file_path)

    # Check if the XLSX file exists
    xlsx_file_path = os.path.join(directory_path, file_name + '.xlsx')
    if os.path.exists(xlsx_file_path):
        return pd.read_excel(xlsx_file_path)
    
    # Check if the XLS file exists
    xls_file_path = os.path.join(directory_path, file_name + '.xls')
    if os.path.exists(xls_file_path):
        return pd.read_excel(xls_file_path)
    
    # Check if the XLSM file exists
    xlsm_file_path = os.path.join(directory_path, file_name + '.xlsm')
    if os.path.exists(xlsm_file_path):
        return pd.read_excel(xlsm_file_path)

    # If neither CSV nor XLSX file exists, return None or raise an error
    return None



In [29]:


path = './'
file = 'Week2_MNF_Proj'
new_slate_df=read_slate_file(path,file)
my_lineup_rule = Lineup_Rule(position_ordered_list=['QB', 'RB', 'RB', 'WR', 'WR', 'WR', 'TE', 'FLEX', 'DST'], flex_definitions={'FLEX': ['RB', 'TE', 'WR']}, salary_cap=50000)
week2_main_slate=Slate(new_slate_df, slate_lineup_rule=my_lineup_rule)
week2_main_slate.configure_optimization()
opto = week2_main_slate.calc_optimal_frequency(n=2500)
print(opto)
opto.to_csv('optimal_week2_MNF.csv', index=False)











DataFrame has all required columns, new Slate object successfully completed
Welcome to the CBC MILP Solver 
Version: 2.9.0 
Build Date: Feb 12 2015 

command line - /Users/matthewmoore/.local/share/virtualenvs/optimizer_v1-vT5i3nNP/lib/python3.11/site-packages/pulp/apis/../solverdir/cbc/osx/64/cbc /var/folders/1q/3wk2pn_x6rdgn29l0fs4j47r0000gn/T/a65af817e96f4b90bcdf692504b4462b-pulp.mps max branch printingOptions all solution /var/folders/1q/3wk2pn_x6rdgn29l0fs4j47r0000gn/T/a65af817e96f4b90bcdf692504b4462b-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 15 COLUMNS
At line 619 RHS
At line 630 BOUNDS
At line 725 ENDATA
Problem MODEL has 10 rows, 94 columns and 361 elements
Coin0008I MODEL read with 0 errors
Continuous objective value is 157.678 - 0.01 seconds
Cgl0003I 6 fixed, 0 tightened bounds, 0 strengthened rows, 0 substitutions
Cgl0003I 6 fixed, 0 tightened bounds, 0 strengthened rows, 0 substitutions
Cgl0003I 0 fixed, 1 tightened bounds, 0 strengt

In [223]:

class Payout_Structure():
    def __init__(self, structure: dict):
        #structure will be defined as a key representing the first placing that achieves said payout. It is assumed that all placings between this and the next listed placing receive the same payout. For example if 300th place receives 4x their money, but the next defined payout is listed as 400th place receiving 3x their money, you can assume finishers in placement spots 300-399 received 4x.
        self.structure=structure

class Contest():
    def __init__(self, slate: Slate, lineups: list[Lineup], payout_structure: Payout_Structure):
        self.slate = slate
        self.lineups = lineups
        self.payout_structure = payout_structure

    def simulate_slate(self):
        sim = self.slate.simulate_slate()
        return sim
    

my_payout_structure={1: 100, 2:0}



    

my_contest=Contest(slate=week2_main_slate, lineups=my_lineups, payout_structure=my_payout_structure)
my_contest.simulate_slate()




array([ 1.13742029e+01,  0.00000000e+00,  2.01912102e+01,  4.11839724e+01,
        2.77372889e+01,  1.00286564e+01,  3.70748455e+01,  3.99075696e+00,
        1.84802274e+01,  1.84565071e+01,  1.49569859e+01,  3.88389571e+01,
        1.22520561e+01,  3.06971829e+01,  8.82538279e+00,  6.98241856e+00,
        2.49794788e+01,  1.30354345e+01,  6.70183885e+00,  2.18989346e+01,
        5.26244251e+00,  2.51172689e+01,  2.40082409e+01,  1.67042072e+01,
        1.21492744e+01,  7.90601995e+00,  6.93004001e+00,  1.54551573e+01,
        2.36046669e+01,  1.63098503e+01,  1.51739225e+01,  0.00000000e+00,
        2.08126581e+01,  2.04906592e+01,  1.75324535e+01, -2.51891365e+00,
        2.40694628e+01,  8.77113872e+00,  1.58088381e+01,  2.96594907e+01,
        2.70524331e+00,  1.44100463e+01,  3.24437271e+01, -4.95176232e+00,
        1.37989577e+01,  1.46152055e+00,  2.04063587e+01,  1.98006516e+01,
        2.11039430e+01,  3.08784865e+01,  3.41291301e+01,  1.41666133e+01,
        2.10546192e+01,  

[Ryan Tannehill (29526648),
 Bijan Robinson (29526707),
 Joe Mixon (29526719),
 Ja'Marr Chase (29526977),
 Amon-Ra St. Brown (29526979),
 Marvin Jones Jr. (29527097),
 Cade Otton (29527459),
 Jayden Reed (29527115),
 Cowboys  (29527676)]