In [1]:
import sqlite3
import pandas as pd
import numpy as np
import cvxpy
from difflib import get_close_matches

## ATTEMPT 1

In [36]:
proj = pd.concat([pd.read_csv("proj.csv"), pd.read_csv("proj2.csv")])
proj = proj[proj["Proj"] > 0]

slate = pd.read_csv("slate.csv")
slate["Name"] = names = slate["First Name"] + " " + slate["Last Name"]
# Merge expected number of fantasy points to current slate. Inner-join,
# so any players on the slate not in 'proj' get dropped
slate = slate.merge(proj[["Name", "Proj"]], on="Name")

In [3]:
# For each player(row), denote by a 1 positions that player can play
valid_positions = slate["Position"].str.get_dummies(sep="/")
# Combine 1B and C to one column where a 1 is present if the player can play
# either or both
valid_positions["C/1B"] = (valid_positions["1B"].astype(bool) | valid_positions["C"].astype(bool)).astype(int)
valid_positions = valid_positions.drop(columns=["1B", "C"])
# Any non-pitcher player can fill the UTIL position
valid_positions["UTIL"] = valid_positions.drop(columns="P").astype(bool).any(axis=1).astype(int)
# Reordering columns
valid_positions = valid_positions[["P", "C/1B", "2B", "3B", "SS", "OF", "UTIL"]]

In [4]:
# Each player is a row, each column represents a position
# Order: P, C/1B, 2B, 3B, SS, OF, UTIL
selection = cvxpy.Variable((len(slate), 7), boolean=True)

# Each player can only be selected for 1 position
individual = cvxpy.sum(selection, axis=1) == 1
# Ensure that each player is only selected for positions they can play
feasible_positions = selection <= valid_positions
# Ensure that we select the proper number of players for each position
position_limits = [1, 1, 1, 1, 1, 3, 1]
positions = cvxpy.sum(selection, axis=0) == position_limits

player_teams = pd.get_dummies(slate["Team"])
# Indicator variable for each team when a player has been selected from a team
team_indicator_var = cvxpy.Variable(len(player_teams.columns), boolean=True)
team_indicator_con = team_indicator_var <= cvxpy.sum(selection, axis=1) @ player_teams
# Must have players from at least 3 teams
teams = cvxpy.sum(team_indicator_var) >= 3
# Number of players per team, excluding the pitcher (column 1), must be less than
# or equal to 4
players_per_team = cvxpy.sum(selection[:, 1:], axis=1) @ player_teams <= 4

# Must select players from at least 2 games
player_games = pd.get_dummies(slate["Game"])
# Indicator variable for each game when a player has been selected from that game
game_indicator_var = cvxpy.Variable(len(player_games.columns), boolean=True)
game_indicator_con = game_indicator_var <= cvxpy.sum(selection, axis=1) @ player_games
games = cvxpy.sum(game_indicator_var) >= 2

# Total salary must be less than $35,000
salary = cvxpy.sum(selection, axis=1) @ slate["Salary"] <= 35000

# Total number of fantasy points expected from selected players
tfp = cvxpy.sum(selection, axis=1) @ slate["Proj"]

In [14]:
constraints = [individual,
               feasible_positions,
               positions,
               team_indicator_con,
               teams,
               players_per_team,
               game_indicator_con,
               games,
               salary]
problem = cvxpy.Problem(cvxpy.Maximize(tfp), constraints=constraints)
result = problem.solve(solver=cvxpy.GLPK_MI, verbose=True)

                                     CVXPY                                     
                                     v1.2.1                                    
(CVXPY) May 17 10:54:25 AM: Your problem has 1447 variables, 9 constraints, and 0 parameters.
(CVXPY) May 17 10:54:25 AM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) May 17 10:54:25 AM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) May 17 10:54:25 AM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) May 17 10:54:25 AM: Compiling problem (target solver=GLPK_MI).
(CVXPY) May 17 10:54:25 AM: Reduction chain: FlipObjective -> Dcp2Cone -> CvxAttr2Constr ->

## ATTEMPT 2

In [2]:
def close_matches(x, possible):
    matches = get_close_matches(x, possible)
    if matches:
        return matches[0]
    else:
        return np.nan

In [3]:
proj = pd.read_csv("proj.csv")
proj = proj.rename(columns={"Player": "Name"})

slate = pd.read_csv("slate.csv")
slate["Name"] = names = slate["First Name"] + " " + slate["Last Name"]
# Some name mismatches occur between projection data and slate data,
# set slate names to closest match from projection data
slate["Name"] = slate["Name"].apply(lambda x: close_matches(x, proj["Name"]))
# Merge expected number of fantasy points to current slate. Inner-join,
# so any players on the slate not in 'proj' get dropped
slate = slate.merge(proj[["Name", "Projection"]], on="Name")
# For players that can fill multiple positions, assume they can only fill
# the first position listed
slate["Position"] = slate["Position"].str.split("/", expand=True)[0]
# Only consider players with positive point expectations
slate = slate[slate["Projection"] > 0]

In [4]:
player_positions = pd.get_dummies(slate["Position"])
# If a player plays either C or 1B, they can fill the 1B/C position
player_positions["C/1B"] = (player_positions["1B"].astype(bool) | player_positions["C"].astype(bool)).astype(int)
player_positions = player_positions.drop(columns=["1B", "C"])
# Reorder columns
player_positions = player_positions[["P", "C/1B", "2B", "3B", "SS", "OF"]]

teams = pd.get_dummies(slate["Team"])
games = pd.get_dummies(slate["Game"])

In [5]:
selection = cvxpy.Variable(len(slate), boolean=True)
teams_var = cvxpy.Variable(len(teams.columns), boolean=True)
games_var = cvxpy.Variable(len(games.columns), boolean=True)

# Total salary must be less than or equal to $35,000
salary = selection @ slate["Salary"] <= 35000

# Must select players from at least 3 different teams
teams_var_con = teams_var <= selection @ teams
teams_con = cvxpy.sum(teams_var) >= 3

# Must select players from at least 2 different games
games_var_con = games_var <= selection @ games
games_con = cvxpy.sum(games_var) >= 2

# No more than 4 players, not counting the pitcher, can be selected from the same team
# First term is our selected players multiplied by a boolean array where 1's indicate non-pitcher players.
# This filters the selected players so the constraint only applies to non-pitcher players
players_teams = cvxpy.multiply(selection, (~player_positions["P"].astype(bool)).astype(int)) @ teams <= 4

# Must have 9 players selected
total_players = cvxpy.sum(selection) == 9

# Max and min number of players we can select for each position
# Must always have 1 pitcher, who cannot fill the UTIL position
# We can select up to 1 additional player from each other position because
# the second can fill the UTIL position
positions_max = [1, 2, 2, 2, 2, 4]
positions_min = [1, 1, 1, 1, 1, 3]
positions_max_con = selection @ player_positions <= positions_max
positions_min_con = selection @ player_positions >= positions_min

# Maxmize: Total number of fantasy points expcted for selected roster
tfp = selection @ slate["Projection"]

In [6]:
constraints = [salary,
               teams_var_con,
               teams_con,
               games_var_con,
               games_con,
               players_teams,
               total_players,
               positions_max_con,
               positions_min_con]
problem = cvxpy.Problem(cvxpy.Maximize(tfp), constraints=constraints)
result = problem.solve(solver=cvxpy.GLPK_MI, verbose=True)

                                     CVXPY                                     
                                     v1.2.1                                    
(CVXPY) May 17 05:39:50 PM: Your problem has 422 variables, 9 constraints, and 0 parameters.
(CVXPY) May 17 05:39:50 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) May 17 05:39:50 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) May 17 05:39:50 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) May 17 05:39:50 PM: Compiling problem (target solver=GLPK_MI).
(CVXPY) May 17 05:39:50 PM: Reduction chain: FlipObjective -> Dcp2Cone -> CvxAttr2Constr -> 

In [7]:
roster = dict(zip(slate["Name"], selection.value))
roster = [name for name, select in roster.items() if select > 0]

print(f"Expected Points: {problem.value}\n")
print("Roster:")
for x in roster:
    print(x)

Expected Points: 137.72000000000003

Roster:
Jose Berrios
Aaron Judge
Giancarlo Stanton
Brandon Belt
Wilmer Flores
Joc Pederson
Brandon Crawford
Anthony Rendon
Marcus Semien


In [None]:
# TODO: Players can fill multiple different positions, I.E 2B/SS