In [1]:
import urllib, json
import pandas as pd
import re
from itertools import permutations

from pulp import *

# Fantasy Football Using Linear Progamming

I'll be using python, pandas and PuLP to make my decision. We'll be working off the naive assumption that whatever the person scored last time, he will score this time and we'll optimize for the highest possible score given our salary and position constraints.

First we have to download and clean up the data a bit.

In [2]:
LATEST_URL = "https://api.draftkings.com/draftgroups/v1/draftgroups/21434/draftables?format=json"

In [3]:
response = urllib.request.urlopen(LATEST_URL)
data = json.loads(response.read())
current = pd.DataFrame.from_dict(data["draftables"])

DraftKings has a Flex position that can be filled by any running back, wide receiver or tight end. Generally a player can only fill one role, so we need to add those eligible to the flex position back to our data frame and label them as position "FLEX". 

The previous points the player scored is nested inside a "draftStatAttributes" field. For instance:

In [4]:
# Remove players that are out or questionable
current = current[current.status == "None"]

# Add flex position
flex = current[current.position.isin(["RB","WR","TE"])].copy()
flex.position = "FLEX"

current = pd.concat([current, flex])

In [5]:
current.head()

Unnamed: 0,competition,displayName,draftAlerts,draftStatAttributes,draftableId,firstName,isDisabled,isSwappable,lastName,newsStatus,...,playerImage50,playerImage65,playerImageFull,position,rosterSlotId,salary,shortName,status,teamAbbreviation,teamId
0,"{'competitionId': 5523589, 'name': 'CLE @ NO',...",Alvin Kamara,[],"[{'id': 90, 'value': '32.0', 'sortValue': '32....",11239044,Alvin,False,False,Kamara,Breaking,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_65...,https://d327rxwuxd0q0c.cloudfront.net/nfl/play...,RB,67,9500,A. Kamara,,NO,350
1,"{'competitionId': 5523589, 'name': 'CLE @ NO',...",Alvin Kamara,[],"[{'id': 90, 'value': '32.0', 'sortValue': '32....",11239045,Alvin,False,False,Kamara,Breaking,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_65...,https://d327rxwuxd0q0c.cloudfront.net/nfl/play...,RB,70,9500,A. Kamara,,NO,350
6,"{'competitionId': 5523565, 'name': 'KC @ PIT',...",Antonio Brown,[],"[{'id': 90, 'value': '20.0', 'sortValue': '20....",11239276,Antonio,False,False,Brown,Breaking,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_65...,https://d327rxwuxd0q0c.cloudfront.net/nfl/play...,WR,68,8800,A. Brown,,PIT,356
7,"{'competitionId': 5523565, 'name': 'KC @ PIT',...",Antonio Brown,[],"[{'id': 90, 'value': '20.0', 'sortValue': '20....",11239277,Antonio,False,False,Brown,Breaking,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_65...,https://d327rxwuxd0q0c.cloudfront.net/nfl/play...,WR,70,8800,A. Brown,,PIT,356
8,"{'competitionId': 5523589, 'name': 'CLE @ NO',...",Michael Thomas,[],"[{'id': 90, 'value': '37.0', 'sortValue': '37....",11239278,Michael,False,False,Thomas,Recent,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_65...,https://d327rxwuxd0q0c.cloudfront.net/nfl/play...,WR,68,8600,M. Thomas,,NO,350


The previous points the player scored is nested inside a "draftStatAttributes" field. For some reason its in a list. What we want is the "value" float in the list. It's not always the first element so we need to extract that.

In [6]:
def get_float(l, key):
    """ Returns first float value from a list of dictionaries based on key. Defaults to 0.0 """
    for d in l:
        try:
            return float(d.get(key))
        except:
            pass
    return 0.0

In [7]:
points = [get_float(x, "value") for x in current.draftStatAttributes]
current["points"] = points

We now have everything we need. A few of the records are duplicated, so we can just trim everything down and group by the fields we need: position, displayName, salary and points.

In [8]:
availables = current[["position", "displayName", "salary", "points"]].groupby(["position", "displayName", "salary", "points"]).agg("count")
availables = availables.reset_index()

In [9]:
availables[availables.position=="QB"].head(15)

Unnamed: 0,position,displayName,salary,points
328,QB,AJ McCarron,4600,0.0
329,QB,Aaron Rodgers,6800,20.5
330,QB,Alex Smith,6000,16.3
331,QB,Andrew Luck,6200,18.4
332,QB,Baker Mayfield,4600,0.0
333,QB,Ben Roethlisberger,6900,29.5
334,QB,Blaine Gabbert,4700,6.3
335,QB,Blake Bortles,5400,14.2
336,QB,Brandon Weeden,4600,0.0
337,QB,Brian Hoyer,4500,0.0


Since we have a constraint on position (i.e. only one QB, two RB, etc), we need to pivot our salaries and points on position. We also need to define the number of each position we will be constrained to.

In [10]:
salaries = {}
points = {}
for pos in availables.position.unique():
    available_pos = availables[availables.position == pos]
    salary = list(available_pos[["displayName","salary"]].set_index("displayName").to_dict().values())[0]
    point = list(available_pos[["displayName","points"]].set_index("displayName").to_dict().values())[0]
    salaries[pos] = salary
    points[pos] = point

pos_num_available = {
    "QB": 1,
    "RB": 2,
    "WR": 3,
    "TE": 1,
    "FLEX": 1,
    "DST": 1
}

In [11]:
pos_num_available = {
    "QB": 1,
    "RB": 2,
    "WR": 3,
    "TE": 1,
    "FLEX": 1,
    "DST": 1
}

If we look at the salaries variable, it's just a dictionary of player names and salaries pivoted on position. points is the same.

In [12]:
salaries["DST"]

{'49ers ': 2800,
 'Bills ': 2100,
 'Broncos ': 3300,
 'Browns ': 2000,
 'Buccaneers ': 2200,
 'Cardinals ': 2100,
 'Chargers ': 3600,
 'Chiefs ': 2200,
 'Colts ': 2300,
 'Dolphins ': 3100,
 'Eagles ': 3400,
 'Falcons ': 2700,
 'Jaguars ': 2800,
 'Jets ': 2500,
 'Lions ': 2300,
 'Packers ': 2500,
 'Panthers ': 2900,
 'Patriots ': 3000,
 'Raiders ': 2300,
 'Rams ': 3700,
 'Redskins ': 2700,
 'Saints ': 3200,
 'Steelers ': 2700,
 'Texans ': 2600,
 'Titans ': 2400,
 'Vikings ': 2600}

In [13]:
SALARY_CAP = 50000

Now we have to define our variables. We want a variables for each position (e.g. QB). There will be an index for each player and the variable will be binary (0 or 1) meant to represent whether the player is included or excluded.

In [14]:
_vars = {k: LpVariable.dict(k, v, cat="Binary") for k, v in points.items()}

Now we can setup our problem. Our cost will just be our salaries indexed for the player times 0 if we are choosing that player and 1 if we're not. Same is true for our reward. And finally we have a constraint on the positions available that we had defined earlier.

In [15]:
prob = LpProblem("Fantasy", LpMaximize)
rewards = []
costs = []
position_constraints = []

# Setting up the reward
for k, v in _vars.items():
    costs += lpSum([salaries[k][i] * _vars[k][i] for i in v])
    rewards += lpSum([points[k][i] * _vars[k][i] for i in v])
    prob += lpSum([_vars[k][i] for i in v]) <= pos_num_available[k]
    
prob += lpSum(rewards)
prob += lpSum(costs) <= SALARY_CAP


In [16]:
prob.solve()

1

The prob object is now solved. It has a variables function that has all our variables and each variable has a varValue which will be either 0 or 1. Below is a helper function to display the results.

In [17]:
def summary(prob):
    div = '---------------------------------------\n'
    print("Variables:\n")
    score = str(prob.objective)
    constraints = [str(const) for const in prob.constraints.values()]
    for v in prob.variables():
        score = score.replace(v.name, str(v.varValue))
        constraints = [const.replace(v.name, str(v.varValue)) for const in constraints]
        if v.varValue != 0:
            print(v.name, "=", v.varValue)
    print(div)
    print("Constraints:")
    for constraint in constraints:
        constraint_pretty = " + ".join(re.findall("[0-9\.]*\*1.0", constraint))
        if constraint_pretty != "":
            print("{} = {}".format(constraint_pretty, eval(constraint_pretty)))
    print(div)
    print("Score:")
    score_pretty = " + ".join(re.findall("[0-9\.]+\*1.0", score))
    print("{} = {}".format(score_pretty, eval(score)))

In [18]:
summary(prob)

Variables:

DST_Jets_ = 1.0
FLEX_Jared_Cook = 1.0
QB_Ryan_Fitzpatrick = 1.0
RB_James_Conner = 1.0
RB_Melvin_Gordon_III = 1.0
TE_Jared_Cook = 1.0
WR_DeSean_Jackson = 1.0
WR_Emmanuel_Sanders = 1.0
WR_Michael_Thomas = 1.0
---------------------------------------

Constraints:
2500*1.0 + 3600*1.0 + 5500*1.0 + 6700*1.0 + 7400*1.0 + 3600*1.0 + 4900*1.0 + 6200*1.0 + 8600*1.0 = 49000.0
---------------------------------------

Score:
17.5*1.0 + 30.0*1.0 + 39.6*1.0 + 28.9*1.0 + 30.6*1.0 + 30.0*1.0 + 30.3*1.0 + 32.5*1.0 + 37.0*1.0 = 276.4


## Greedy Search

How much better is our search versus what most people do? Earlier I said that I would look at the numbers and do a relative kind of comparison, or just pick the top QB in points, then the top RB in points, and so on. That's called a greedy search. To make it fair, let's consider all possible order combinations and pick the best one.

In [19]:
def eval_players(players):
    return sum([current[current.displayName == player].iloc[0].points for player in players])

In [20]:
def greedy(val):
    remaining = SALARY_CAP
    positions = current.position.unique()
    best_players = []
    best_so_far = -float("inf")
    for comb_position in permutations(positions):
        players = []
        for pos in comb_position:
            for _ in range(pos_num_available[pos]):
                available = current[(~current.displayName.isin(players)) & 
                                 (current.position == pos) & 
                                 (current.salary <= remaining)]
                if available.size > 0:
                    best = available.sort_values(val,ascending=False).iloc[0]
                    players.append(best.displayName)
                    remaining -= best.salary
        cur_eval = eval_players(players)
        if cur_eval > best_so_far:
            best_players = players
            best_so_far = cur_eval
    return best_players

How does it do?

In [21]:
greedy_points = greedy("points")
print(greedy_points)
eval_players(greedy_points)

['Alvin Kamara', 'Melvin Gordon III', 'Michael Thomas', 'Tyreek Hill', 'Emmanuel Sanders', 'Ryan Fitzpatrick', 'Jared Cook']


234.4

About 25 points behind our optimal choice, which isn't too bad. If you notice there is only 8 choices since we ran out of money by the time we got to the end. Let's try a points per salary dollar.

In [22]:
points_per_dollar = current.points / current.salary
current["points_per_dollar"] = points_per_dollar

In [23]:
points_per_dollar = current.points / current.salary
current["points_per_dollar"] = points_per_dollar
greedy_points = greedy("points_per_dollar")
print(greedy_points)
eval_players(greedy_points)

['Jalen Richard', 'James Conner', 'DeSean Jackson', 'Will Fuller V', 'Emmanuel Sanders', 'Ryan Fitzpatrick', 'Jared Cook', 'Jets ', 'Jesse James']


242.39999999999998

The linear programming method is not likely to bring you into elite status of Fantasy Football, but it greatly simplifies the problem. The basic assumption we were working on was that the points generated last week will be generated this week. Now all there is left is to solve for how many points we can expect a player to generate. This is a simpler problem. We can run a regression, enrich the data with other sources and look at trailing averages. But when we have our numbers, we can run it through this selection method.