In [None]:
# SOURCE:
# https://github.com/breeko/Fantasy_LP/blob/master/fantasy_lp_final.ipynb

In [None]:
# !python -m pip install pulp

In [1]:
import urllib, json
import pandas as pd
import re
from itertools import permutations

from pulp import *

# Fantasy Football Using Linear Progamming

I'll be using python, pandas and PuLP to make my decision. We'll be working off the naive assumption that whatever the person scored last time, he will score this time and we'll optimize for the highest possible score given our salary and position constraints.

First we have to download and clean up the data a bit.

In [2]:
LATEST_URL = "https://api.draftkings.com/draftgroups/v1/draftgroups/21434/draftables?format=json"

In [3]:
response = urllib.request.urlopen(LATEST_URL)
data = json.loads(response.read())
current = pd.DataFrame.from_dict(data["draftables"])

In [4]:
# Remove players that are out or questionable
current = current[current.status == "None"]

In [5]:
current.head()

Unnamed: 0,draftableId,firstName,lastName,displayName,shortName,playerId,playerDkId,position,rosterSlotId,salary,...,playerImage50,playerImage160,competition,draftStatAttributes,playerAttributes,playerGameAttributes,teamId,teamAbbreviation,draftAlerts,playerGameHash
0,11239044,Alvin,Kamara,Alvin Kamara,A. Kamara,750846,0,RB,67,9500,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_re...,"{'competitionId': 5523589, 'name': 'CLE @ NO',...","[{'id': 90, 'value': '32.0', 'sortValue': '32....",[],[],350,NO,[],750846-5523589
1,11239045,Alvin,Kamara,Alvin Kamara,A. Kamara,750846,0,RB,70,9500,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_re...,"{'competitionId': 5523589, 'name': 'CLE @ NO',...","[{'id': 90, 'value': '32.0', 'sortValue': '32....",[],[],350,NO,[],750846-5523589
2,11239046,Todd,Gurley II,Todd Gurley II,T. Gurley II,694641,0,RB,67,9200,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_re...,"{'competitionId': 5523601, 'name': 'ARI @ LAR'...","[{'id': 90, 'value': '29.5', 'sortValue': '29....",[],[],343,LAR,[],694641-5523601
3,11239047,Todd,Gurley II,Todd Gurley II,T. Gurley II,694641,0,RB,70,9200,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_re...,"{'competitionId': 5523601, 'name': 'ARI @ LAR'...","[{'id': 90, 'value': '29.5', 'sortValue': '29....",[],[],343,LAR,[],694641-5523601
6,11239276,Antonio,Brown,Antonio Brown,A. Brown,406214,0,WR,68,8800,...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_50...,https://d327rxwuxd0q0c.cloudfront.net/m/nfl_re...,"{'competitionId': 5523565, 'name': 'KC @ PIT',...","[{'id': 90, 'value': '20.0', 'sortValue': '20....",[],[],356,PIT,[],406214-5523565


The previous points the player scored is nested inside a "draftStatAttributes" field. For some reason its in a list. What we want is the "value" float in the list. It's not always the first element so we need to extract that.

In [6]:
def get_float(l, key):
    """ Returns first float value from a list of dictionaries based on key. Defaults to 0.0 """
    for d in l:
        try:
            return float(d.get(key))
        except:
            pass
    return 0.0

In [7]:
points = [get_float(x, "value") for x in current.draftStatAttributes]
current["points"] = points

We now have everything we need. A few of the records are duplicated, so we can just trim everything down and group by the fields we need: position, displayName, salary and points.

In [8]:
availables = current[["position", "displayName", "salary", "points"]].groupby(["position", "displayName", "salary", "points"]).agg("count")
availables = availables.reset_index()

In [11]:
availables[availables.position=="RB"].head(10)

Unnamed: 0,position,displayName,salary,points
86,RB,Aaron Jones,5000,0.0
87,RB,Adrian Peterson,5500,15.8
88,RB,Alex Armah,3000,3.3
89,RB,Alfred Blue,3900,6.6
90,RB,Alfred Morris,3600,6.4
91,RB,Alvin Kamara,9500,32.0
92,RB,Ameer Abdullah,3600,0.0
93,RB,Andy Janovich,3000,1.1
94,RB,Anthony Sherman,3000,5.3
95,RB,Austin Ekeler,4400,18.2


Since we have a constraint on position (i.e. only one QB, two RB, etc), we need to pivot our salaries and points on position. We also need to define the number of each position we will be constrained to.

DraftKings has a Flex position that can be filled by any running back, wide receiver or tight end. In order to allow an extra running back, wide receiver or tight end, we'll need to increase the allocation for these conditions by 1 and put in an extra condition that says that we cannot pick more 5 total for those.

In [12]:
salaries = {}
points = {}
for pos in availables.position.unique():
    available_pos = availables[availables.position == pos]
    salary = list(available_pos[["displayName","salary"]].set_index("displayName").to_dict().values())[0]
    point = list(available_pos[["displayName","points"]].set_index("displayName").to_dict().values())[0]
    salaries[pos] = salary
    points[pos] = point

pos_num_available = {
    "QB": 1,
    "RB": 3,
    "WR": 4,
    "TE": 2,
    "FLEX": 1,
    "DST": 1
}

pos_flex = {
    "QB": 0,
    "RB": 1,
    "WR": 1,
    "TE": 1,
    "FLEX": 0,
    "DST": 0
}

pos_flex_available = 5

If we look at the salaries variable, it's just a dictionary of player names and salaries pivoted on position. points is the same.

In [21]:
salaries["TE"]

{'A.J. Derby': 2500,
 'Alan Cross': 2500,
 'Alex Ellis': 2500,
 'Anthony Firkser': 2500,
 'Antony Auclair': 2500,
 'Austin Hooper': 2900,
 'Austin Seferian-Jenkins': 3200,
 'Beau Brinkley': 2500,
 'Benjamin Watson': 3400,
 'Cameron Brate': 2800,
 'Charles Clay': 3100,
 'Chris Herndon': 2500,
 'Chris Manhertz': 2500,
 'Cole Wick': 2500,
 'Dallas Goedert': 2700,
 'Dan Arnold': 2500,
 'Darren Fells': 2500,
 'David Njoku': 3000,
 'Demetrius Harris': 2700,
 'Derek Carrier': 2500,
 'Durham Smythe': 2500,
 'Dwayne Allen': 2600,
 'Eric Ebron': 3500,
 'Eric Saubert': 2500,
 'Eric Tomlinson': 2500,
 'Erik Swoope': 2500,
 'Gabe Holmes': 2500,
 'Garrett Celek': 2600,
 'Gavin Escobar': 2500,
 'George Kittle': 3800,
 'Gerald Everett': 2500,
 'Hakeem Valles': 2700,
 'Ian Thomas': 2900,
 'Jake Butt': 2700,
 "James O'Shaughnessy": 2500,
 'James Winchester': 2500,
 'Jared Cook': 3600,
 'Jason Croom': 2500,
 'Jeff Heuerman': 2500,
 'Jeremy Sprinkle': 2500,
 'Jesse James': 2800,
 'Jimmy Graham': 4800,
 'J

In [22]:
SALARY_CAP = 50000

Now we have to define our variables. We want a variables for each position (e.g. QB). There will be an index for each player and the variable will be binary (0 or 1) meant to represent whether the player is included or excluded.

In [23]:
_vars = {k: LpVariable.dict(k, v, cat="Binary") for k, v in points.items()}

Now we can setup our problem. Our cost will just be our salaries indexed for the player times 0 if we are choosing that player and 1 if we're not. Same is true for our reward. And finally we have a constraint on the positions available that we had defined earlier.

In [26]:
prob = LpProblem("Fantasy", LpMaximize)
rewards = []
costs = []
position_constraints = []

# Setting up the reward
for k, v in _vars.items():
    costs += lpSum([salaries[k][i] * _vars[k][i] for i in v])
    rewards += lpSum([points[k][i] * _vars[k][i] for i in v])
    prob += lpSum([_vars[k][i] for i in v]) <= pos_num_available[k]
    prob += lpSum([pos_flex[k] * _vars[k][i] for i in v]) <= pos_flex_available
    
prob += lpSum(rewards)
prob += lpSum(costs) <= SALARY_CAP

In [27]:
prob.solve()

1

The prob object is now solved. It has a variables function that has all our variables and each variable has a varValue which will be either 0 or 1. Below is a helper function to display the results.

In [28]:
def summary(prob):
    div = '---------------------------------------\n'
    print("Variables:\n")
    score = str(prob.objective)
    constraints = [str(const) for const in prob.constraints.values()]
    for v in prob.variables():
        score = score.replace(v.name, str(v.varValue))
        constraints = [const.replace(v.name, str(v.varValue)) for const in constraints]
        if v.varValue != 0:
            print(v.name, "=", v.varValue)
    print(div)
    print("Constraints:")
    for constraint in constraints:
        constraint_pretty = " + ".join(re.findall("[0-9\.]*\*1.0", constraint))
        if constraint_pretty != "":
            print("{} = {}".format(constraint_pretty, eval(constraint_pretty)))
    print(div)
    print("Score:")
    score_pretty = " + ".join(re.findall("[0-9\.]+\*1.0", score))
    print("{} = {}".format(score_pretty, eval(score)))

In [29]:
summary(prob)

Variables:

DST_Jets_ = 1.0
QB_Ryan_Fitzpatrick = 1.0
RB_James_Conner = 1.0
RB_Melvin_Gordon_III = 1.0
TE_Jared_Cook = 1.0
TE_Jesse_James = 1.0
WR_DeSean_Jackson = 1.0
WR_JuJu_Smith_Schuster = 1.0
WR_Kenny_Golladay = 1.0
WR_Will_Fuller_V = 1.0
---------------------------------------

Constraints:
2500*1.0 + 5500*1.0 + 6700*1.0 + 7400*1.0 + 3600*1.0 + 2800*1.0 + 4900*1.0 + 6400*1.0 + 4800*1.0 + 5300*1.0 = 49900.0
---------------------------------------

Score:
13.0*1.0 + 39.6*1.0 + 28.9*1.0 + 30.6*1.0 + 19.5*1.0 + 18.4*1.0 + 30.3*1.0 + 27.0*1.0 + 21.2*1.0 + 28.3*1.0 = 256.8


## Greedy Search

How much better is our search versus what most people do? Earlier I said that I would look at the numbers and do a relative kind of comparison, or just pick the top QB in points, then the top RB in points, and so on. That's called a greedy search. To make it fair, let's consider all possible order combinations and pick the best one.

In [20]:
def eval_players(players):
    return sum([current[current.displayName == player].iloc[0].points for player in players])

In [21]:
def greedy(val):
    remaining = SALARY_CAP
    positions = current.position.unique()
    best_players = []
    best_so_far = -float("inf")
    for comb_position in permutations(positions):
        players = []
        for pos in comb_position:
            for _ in range(pos_num_available[pos]):
                available = current[(~current.displayName.isin(players)) & 
                                 (current.position == pos) & 
                                 (current.salary <= remaining)]
                if available.size > 0:
                    best = available.sort_values(val,ascending=False).iloc[0]
                    players.append(best.displayName)
                    remaining -= best.salary
        cur_eval = eval_players(players)
        if cur_eval > best_so_far:
            best_players = players
            best_so_far = cur_eval
    return best_players

How does it do?

In [22]:
greedy_points = greedy("points")
print(greedy_points)
eval_players(greedy_points)

['Alvin Kamara', 'Melvin Gordon III', 'Todd Gurley II', 'Michael Thomas', 'Tyreek Hill', 'DeSean Jackson', 'Jesse James']


210.50000000000003

About 45 points behind our optimal choice, which isn't too bad. If you notice there is only 8 choices since we ran out of money by the time we got to the end. Let's try a points per salary dollar.

In [23]:
points_per_dollar = current.points / current.salary
current["points_per_dollar"] = points_per_dollar

In [24]:
points_per_dollar = current.points / current.salary
current["points_per_dollar"] = points_per_dollar
greedy_points = greedy("points_per_dollar")
print(greedy_points)
eval_players(greedy_points)

['James Conner', 'Austin Ekeler', 'Melvin Gordon III', 'DeSean Jackson', 'Will Fuller V', 'Kenny Golladay', 'Tyreek Hill', 'Ryan Fitzpatrick', 'Jesse James']


248.2

The linear programming method is not likely to bring you into elite status of Fantasy Football, but it greatly simplifies the problem. The basic assumption we were working on was that the points generated last week will be generated this week. Now all there is left is to solve for how many points we can expect a player to generate. This is a simpler problem. We can run a regression, enrich the data with other sources and look at trailing averages. But when we have our numbers, we can run it through this selection method.