In [1]:
from bs4 import BeautifulSoup
import quopri
import pandas as pd
import cvxpy

In [2]:
def extract_row_data(row, dtype="current"):
    cells = row.find_all("td")
    if dtype == "current":
        return {
            "Player": cells[5].find(class_="playername").text,
            "Position": cells[4].text,
            # Some teams only have two characters, causing an extra space
            # at the start, so strip that
            "Team": cells[5].find(class_="playerTeam").text[-3:].strip(),
            "Opponent": cells[13].text,
            "Salary": cells[9].text,
            "Projection": cells[10].find("input").get("value"),
            "Floor": cells[18].text,
            "Ceiling": cells[19].text
        }
    elif dtype == "historical":
        # TODO: get points actually scored
            return {
                "Player": cells[5].find(class_="playername").text,
                "Position": cells[4].text,
                # Some teams only have two characters, causing an extra space
                # at the start, so strip that
                "Team": cells[5].find(class_="playerTeam").text[-3:].strip(),
                "Salary": cells[9].text,
                "Scored": cells[10].text,
                "Projection": cells[11].find("input").get("value"),
                "Consensus": cells[12].text,
                "Time": cells[13].text,
                "Opponent": cells[14].text,
                "Order": cells[16].text,
                "Bat/Arm": cells[17].text,
                "Consistent": cells[18].text,
                "Floor": cells[19].text,
                "Ceiling": cells[20].text,
                "Avg FP": cells[22].text,
                "Imp Runs": cells[23].text,
                "pOwn": cells[25].text,
                "actOwn": cells[26].text,
                "Leverage": cells[27].text,
                "Safety": cells[28].text
            }


def extract_linestar_data(filename, dtype="current"):
    html = open(filename, "r")
    html = quopri.decodestring(html.read())
    soup = BeautifulSoup(html)
    
    table = soup.find_all("table")[0]
    row_data = []
    for row in table.find_all("tr", class_="playerCardRow"):
        row_data.append(extract_row_data(row, dtype))
    
    return pd.DataFrame(row_data)

In [3]:
# TODO: ACCOUNT FOR 2 CHARACTER TEAMS
def make_games(pitchers):
    # Construct unique game strings from pitcher data
    games = set()
    for player in pitchers.itertuples():
        # If @ appears, then the opponent team is at home
        if "@" in player.Opponent:
            # Some teams are only two characters, so selecting the last three
            # causes the @ to be selected as well, so remove it
            game_string = player.Team + "@" + player.Opponent[-3:].replace("@", "")
            games.add(game_string)
        # If @ DOESNT appear, the player is from the at home team
        elif "vs" in player.Opponent:
            # For two character teams, an extra space is selected, so
            # remove it
            game_string = player.Opponent[-3:].strip() + "@" + player.Team
            games.add(game_string)
    return games

def make_game_strings(slate):
    games = make_games(slate[slate["Position"] == "P"])
    game_strings = []
    # For each player, see what game their team belongs to and list them
    for row in slate.itertuples():
        for game in games:
            if row.Team in game:
                game_strings.append(game)
            else:
                continue
    return game_strings

In [4]:
slate = extract_linestar_data("proj.mhtml")
slate["Salary"] = slate["Salary"].replace("[\$,]", "", regex=True).astype(int)
slate["Projection"] = slate["Projection"].astype(float)
slate["Game"] = make_game_strings(slate)
# For players that can fill multiple positions, assume they can only fill the first
# one listed.
# ASSUMPTION: because of the UTIL position, this should cause only minor error
slate["Position"] = slate["Position"].str.split("/", expand=True)[0]

In [5]:
positions = pd.get_dummies(slate["Position"])
positions = pd.get_dummies(slate["Position"])
# If a player plays either C or 1B, they can fill the 1B/C position
positions["C/1B"] = (positions["1B"].astype(bool) | positions["C"].astype(bool)).astype(int)
positions = positions.drop(columns=["1B", "C"])
# Reorder columns
positions = positions[["P", "C/1B", "2B", "3B", "SS", "OF"]]

teams = pd.get_dummies(slate["Team"])
games = pd.get_dummies(slate["Game"])

In [6]:
selection = cvxpy.Variable(len(slate), boolean=True)
teams_var = cvxpy.Variable(len(teams.columns), boolean=True)
games_var = cvxpy.Variable(len(games.columns), boolean=True)

# Total salary must be less than or equal to $35,000
salary = selection @ slate["Salary"] <= 35000

# Must select players from at least 3 different teams
teams_var_con = teams_var <= selection @ teams
teams_con = cvxpy.sum(teams_var) >= 3

# Must select players from at least 2 different games
games_var_con = games_var <= selection @ games
games_con = cvxpy.sum(games_var) >= 2

# No more than 4 players, not counting the pitcher, can be selected from the same team
# First term is our selected players multiplied by a boolean array where 1's indicate non-pitcher players.
# This filters the selected players so the constraint only applies to non-pitcher players
players_teams = cvxpy.multiply(selection, (~positions["P"].astype(bool)).astype(int)) @ teams <= 4

# Must have 9 players selected
total_players = cvxpy.sum(selection) == 9

# Max and min number of players we can select for each position
# Must always have 1 pitcher, who cannot fill the UTIL position
# We can select up to 1 additional player from each other position because
# the second can fill the UTIL position
positions_max = [1, 2, 2, 2, 2, 4]
positions_min = [1, 1, 1, 1, 1, 3]
positions_max_con = selection @ positions <= positions_max
positions_min_con = selection @ positions >= positions_min

# Maxmize: Total number of fantasy points expcted for selected roster
tfp = selection @ slate["Projection"]

In [7]:
constraints = [salary,
               teams_var_con,
               teams_con,
               games_var_con,
               games_con,
               players_teams,
               total_players,
               positions_max_con,
               positions_min_con]
problem = cvxpy.Problem(cvxpy.Maximize(tfp), constraints=constraints)
result = problem.solve(solver=cvxpy.GLPK_MI, verbose=True)

                                     CVXPY                                     
                                     v1.2.1                                    
(CVXPY) May 24 05:31:29 PM: Your problem has 276 variables, 9 constraints, and 0 parameters.
(CVXPY) May 24 05:31:29 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) May 24 05:31:29 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) May 24 05:31:29 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) May 24 05:31:29 PM: Compiling problem (target solver=GLPK_MI).
(CVXPY) May 24 05:31:29 PM: Reduction chain: FlipObjective -> Dcp2Cone -> CvxAttr2Constr -> 

In [8]:
roster = dict(zip(slate["Player"], selection.value))
roster = [name for name, select in roster.items() if select > 0]

print(f"Expected Points: {selection.value @ slate['Projection']}\n")
print("Roster:")
for x in roster:
    print(x)

Expected Points: 142.71

Roster:
Corbin Burnes (R)
Ronald Acuña Jr.
Rafael Devers
William Contreras
Brendan Donovan
Daulton Varsho
Tim Anderson
Kole Calhoun
Edwin Ríos
