### projection model => project points per player  
- [ ]    Basic Stats  
- [ ]    Usage  
- [ ]    Percentage of team scoring  
- [ ]    Injuries  
- [ ]    Bye week
- [ ]    Injuries to Key Teammates
- [ ]    Individual Matchups
- [ ]    Opposing Team (record, defense, injuries, etc.)
- [ ]    Home/Away
- [ ]    Weather
- [ ]    Expert Picks
- [x]    Other Fantasy Projections

### selection mechanism => choose optimal team within parameters
- [ ]    maximize value (pts/salary)
- [x]    Linear Optimization
    - Inspiration: https://github.com/breeko/Fantasy_LP/blob/master/fantasy_lp_final.ipynb

In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [2]:
year = 2023
week = 14

# This Week

## Rotowire

In [3]:
# https://www.rotowire.com/daily/nfl/dfs-opportunities.php?site=DraftKings  # choose full slate
def rotowire_csv(filepath):
    df = pd.read_csv(filepath)
    df.rename(columns={'PLAYER': 'name', 'POS': 'pos', 'SAL': 'salary', 'FPTS': 'prediction'}, inplace=True)
    df.loc[df['pos']=='D', 'pos'] = 'Def'
    df = df[['name','pos','salary','prediction']]
    return df
    
rdf = rotowire_csv(f"{year}_fantasy_data/week{week}_rotowire-NFL-projected-roster-percent.csv")
rdf

Unnamed: 0,name,pos,salary,prediction
0,Justin Fields,QB,6800,20.89
1,JuJu Smith-Schuster,WR,3000,8.84
2,Russell Wilson,QB,5800,16.59
3,Desmond Ridder,QB,5000,14.29
4,Baker Mayfield,QB,5300,15.17
...,...,...,...,...
579,John FitzPatrick,TE,2500,0.00
580,Khari Blasingame,RB,4000,0.10
581,Tyler Goodson,RB,4000,0.00
582,Christian Watson,WR,4900,0.00


In [4]:
len(rdf[rdf['pos']=='Def'])

30

## Antonio's Official Data

In [7]:
sal_dir = f"/Users/adamschiller/OneDrive - BOOZ ALLEN HAMILTON/Weekly Salary Data"
df = pd.read_csv(f"{sal_dir}/week{week}_salaries.csv")

def format_antonio_salary_data(df):
    df.rename(columns={'player': 'name', 'dk_salary': 'salary', 'fantasy_score': 'prediction'}, inplace=True)
    df.loc[df['pos']=='DST', 'pos'] = 'Def'
    df = df[['name','pos','salary','prediction']]
    return df

adf = format_antonio_salary_data(df)
adf

Unnamed: 0,name,pos,salary,prediction
0,Zach Wilson,QB,4700,13.50
1,Baker Mayfield,QB,5300,15.12
2,Justin Fields,QB,6800,19.12
3,Geno Smith,QB,5700,15.67
4,Desmond Ridder,QB,5000,14.17
...,...,...,...,...
566,Trevor Siemian,QB,4700,0.00
567,C.J. Uzomah,TE,2500,0.00
568,Aaron Rodgers,QB,4000,0.00
569,Jesse Matthews,WR,3000,0.00


In [8]:
len(adf[adf['pos']=='Def'])

30

## Daily Fantasy Fuel

In [10]:
# map defense names for dff based on rotowire names
def defense_mapping(df):
    return {n.split(' ')[-1]: n for n in df[df['pos']=='Def']['name']}

defense_map = defense_mapping(adf)

def apply_defense_map(x):
    try:
        return defense_map[x]
    except KeyError:
        return x

In [11]:
# https://www.dailyfantasyfuel.com/nfl/projections/
def dff_csv(filepath):
    df = pd.read_csv(filepath)
    df['name'] = df['first_name']+' '+df['last_name']
    df.rename(columns={'position': 'pos', 'ppg_projection': 'prediction'}, inplace=True)
    # fix names for defenses based on mapping created by rotowire data above
    df.loc[df['pos']=='DST', 'name'] = df['first_name'].apply(lambda x: apply_defense_map(x))
    df.loc[df['pos']=='DST', 'pos'] = 'Def'
    df = df[['name','pos','injury_status','salary','prediction']]
    return df

dff = dff_csv(f"{year}_fantasy_data/DFF_NFL_cheatsheet_week{week}.csv")
dff

Unnamed: 0,name,pos,injury_status,salary,prediction
0,Tyreek Hill,WR,,9800,25.3
1,Christian McCaffrey,RB,,9200,24.6
2,Jalen Hurts,QB,,8200,24.4
3,Josh Allen,QB,,8300,24.0
4,Dak Prescott,QB,,7400,23.8
...,...,...,...,...,...
399,Isiah Pacheco,RB,O,6700,0.0
400,Christian Watson,WR,O,4900,0.0
401,Kenny Pickett,QB,O,5000,0.0
402,Dalton Schultz,TE,O,5000,0.0


In [12]:
len(dff[dff['pos']=='Def'])

30

## Merge sources

In [13]:
def merge_sources(df1, df2):
    df = df1.merge(df2[['name','injury_status','prediction']], on='name', how='inner', suffixes=('_df1', '_df2'))
    df = df[df['injury_status'].isna()]
    df = df[(df['prediction_df1']>0)&(df['prediction_df2']>0)]
    df['prediction'] = df[['prediction_df1', 'prediction_df2']].mean(axis=1)
    df['pred_diff'] = abs(df['prediction_df1']-df['prediction_df2'])
    df.drop(columns=['injury_status'], inplace=True)  #,'prediction_df1','prediction_df2'
    return df

In [14]:
df = merge_sources(adf, dff)
df

Unnamed: 0,name,pos,salary,prediction_df1,prediction_df2,prediction,pred_diff
0,Zach Wilson,QB,4700,13.50,11.7,12.600,1.80
1,Baker Mayfield,QB,5300,15.12,15.1,15.110,0.02
2,Justin Fields,QB,6800,19.12,21.8,20.460,2.68
4,Desmond Ridder,QB,5000,14.17,13.5,13.835,0.67
5,Brock Purdy,QB,6500,17.93,20.7,19.315,2.77
...,...,...,...,...,...,...,...
351,Davis Allen,TE,2700,0.12,0.4,0.260,0.28
353,Khari Blasingame,RB,4000,0.10,0.1,0.100,0.00
359,Godwin Igwebuike,RB,4000,0.05,0.1,0.075,0.05
362,Jalen Nailor,WR,3000,0.13,1.1,0.615,0.97


In [15]:
df.sort_values('pred_diff', ascending=False)

Unnamed: 0,name,pos,salary,prediction_df1,prediction_df2,prediction,pred_diff
280,Clyde Edwards-Helaire,RB,4200,2.28,10.4,6.340,8.12
270,Brevin Jordan,TE,3100,2.26,6.9,4.580,4.64
72,DJ Moore,WR,6500,13.57,18.2,15.885,4.63
100,Tyreek Hill,WR,9800,20.79,25.3,23.045,4.51
80,Keenan Allen,WR,8600,18.20,22.6,20.400,4.40
...,...,...,...,...,...,...,...
208,Demarcus Robinson,WR,3200,4.09,4.1,4.095,0.01
308,Devin Duvernay,WR,3000,0.90,0.9,0.900,0.00
37,Bijan Robinson,RB,6500,15.30,15.3,15.300,0.00
190,Treylon Burks,WR,3000,4.30,4.3,4.300,0.00


In [16]:
df['pos'].value_counts()

pos
WR     114
RB      78
TE      62
Def     30
QB      26
Name: count, dtype: int64

# Top Projected per Pos

In [22]:
def avg_per_dollar(df, col):
    df['Avg Value'] = df[col] / (df['salary'] / 1000)
    return df.sort_values('Avg Value', ascending=False)

pdf = avg_per_dollar(df, 'prediction')

In [23]:
top_QBs = pdf[pdf.pos=='QB']
top_QBs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
82,Joshua Dobbs,QB,5900,15.34,27.5,21.42,12.16,3.630508
76,Justin Fields,QB,7000,19.03,29.3,24.165,10.27,3.452143
1,Dak Prescott,QB,6800,20.69,22.5,21.595,1.81,3.175735
0,Brock Purdy,QB,6100,18.51,19.0,18.755,0.49,3.07459
70,Baker Mayfield,QB,5500,16.15,16.3,16.225,0.15,2.95


In [24]:
top_RBs = pdf[pdf.pos=='RB']
top_RBs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
2,Zach Charbonnet,RB,5300,14.81,14.0,14.405,0.81,2.717925
9,Christian McCaffrey,RB,8700,20.7,23.6,22.15,2.9,2.545977
8,Tony Pollard,RB,6700,16.36,16.3,16.33,0.06,2.437313
103,Rachaad White,RB,6100,13.99,15.7,14.845,1.71,2.433607
197,Khalil Herbert,RB,5100,9.54,14.8,12.17,5.26,2.386275


In [25]:
top_WRs = pdf[pdf.pos=='WR']
top_WRs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
126,DJ Moore,WR,6000,13.34,21.1,17.22,7.76,2.87
81,Drake London,WR,5000,12.82,13.1,12.96,0.28,2.592
79,Rashid Shaheed,WR,4300,11.38,10.7,11.04,0.68,2.567442
145,Jordan Addison,WR,5700,11.8,16.8,14.3,5.0,2.508772
18,CeeDee Lamb,WR,8700,19.44,23.5,21.47,4.06,2.467816


In [26]:
top_TEs = pdf[pdf.pos=='TE']
top_TEs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
7,Jake Ferguson,TE,3900,9.69,10.9,10.295,1.21,2.639744
85,Evan Engram,TE,4200,10.66,11.1,10.88,0.44,2.590476
102,T.J. Hockenson,TE,5800,13.45,14.5,13.975,1.05,2.409483
14,Logan Thomas,TE,3500,7.99,8.2,8.095,0.21,2.312857
164,Trey McBride,TE,4700,9.24,12.4,10.82,3.16,2.302128


In [27]:
# players[players.Pos == 'Def'].head(3)
top_Defs = pdf[pdf.pos=='Def']
top_Defs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
83,Kansas City Chiefs,Def,3100,8.18,8.7,8.44,0.52,2.722581
115,Miami Dolphins,Def,3500,8.01,9.7,8.855,1.69,2.53
99,Denver Broncos,Def,3200,7.72,8.4,8.06,0.68,2.51875
95,Minnesota Vikings,Def,3200,7.67,7.8,7.735,0.13,2.417187
92,New England Patriots,Def,3200,7.61,7.7,7.655,0.09,2.392187


# Team Selection

$$\begin{aligned} 
objective &= maximize\ team\ points \\ 
team\ salary &\leq salary\ cap \\
team &= 1\ QB + (2|3)\ RB + (3|4)\ WR + (1|2)\ TE + 1\ DEF \\
team &\leq 9\ total\ players \\
\end{aligned}$$

In [39]:
# %pip install pulp

In [17]:
from pulp import *

class PulpSelection():
    def __init__(self, df, pts_col="prediction", sal_col="salary", name_col="name", salary_cap=50000):
        self.df = df
        self.vars = self.populate_vars(pts_col, sal_col, name_col)
        self.model = self.optimize(salary_cap)
        self.players = self.player_names()
        self.selection = self.selection_df()
        
    def populate_vars(self, pts_col, sal_col, name_col):
        df = self.df
        salaries = {}
        points = {}
        for pos in df.pos.unique():
            available_pos = df[df.pos == pos]
            salary = list(available_pos[[name_col,sal_col]].set_index(name_col).to_dict().values())[0]
            point = list(available_pos[[name_col,pts_col]].set_index(name_col).to_dict().values())[0]
            salaries[pos] = salary
            points[pos] = point
            
        self.salaries = salaries
        self.points = points
        return {k: LpVariable.dict(k, v, cat="Binary") for k, v in points.items()}

    def player_names(self):
        players = {}
        for d in self.vars.values():
            for k,v in d.items():
                players[v] = k
        return players
            
    pos_num_available = {
        "QB": 1,
        "RB": 2,
        "WR": 3,
        "TE": 1,
        "Def": 1
    }

    def optimize(self, salary_cap):
        prob = LpProblem("FFModel", LpMaximize)
        rewards = []
        costs = []

        for pos, players in self.vars.items():
            costs += lpSum([self.salaries[pos][i] * self.vars[pos][i] for i in players])
            rewards += lpSum([self.points[pos][i] * self.vars[pos][i] for i in players])
            if pos in ['RB','WR','TE']:
                prob += lpSum([self.vars[pos][i] for i in players]) <= self.pos_num_available[pos]+1
                prob += lpSum([self.vars[pos][i] for i in players]) >= self.pos_num_available[pos]
            else:
                prob += lpSum([self.vars[pos][i] for i in players]) == self.pos_num_available[pos]
        prob += lpSum(prob.variables()) == 9    # flex -> max of 9 total players

        prob += lpSum(rewards)
        prob += lpSum(costs) <= salary_cap
        print(prob.solve())
        return prob
    
    def selection_df(self):
        selections = [self.players[p] for p in self.model.variables() if p.varValue > 0]
        team = self.df[self.df.name.isin(selections)]
        return team

In [18]:
# 2023 version
pts_col = 'prediction'
model = PulpSelection(df, pts_col=pts_col)
team = model.selection

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /usr/local/Caskroom/miniconda/base/envs/default/lib/python3.10/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/cv/pyjmd76x58dbyxrhzycj6c880000gp/T/ef4a2e03b46e4f6fa2cdb09b6d966e93-pulp.mps max timeMode elapsed branch printingOptions all solution /var/folders/cv/pyjmd76x58dbyxrhzycj6c880000gp/T/ef4a2e03b46e4f6fa2cdb09b6d966e93-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 15 COLUMNS
At line 2130 RHS
At line 2141 BOUNDS
At line 2452 ENDATA
Problem MODEL has 10 rows, 310 columns and 1184 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 128.105 - 0.00 seconds
Cgl0004I processed model has 7 rows, 294 columns (294 integer (293 of which binary)) and 882 elements
Cutoff increment increased from 1e-05 to 0.004995
Cbc0038I Initial state - 2 integers unsatisfied sum - 0.290909
Cbc0038I Solut

In [19]:
print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team['salary'])))
team


Total points: 127.61500000000002     Total salary: 49900



Unnamed: 0,name,pos,salary,prediction_df1,prediction_df2,prediction,pred_diff
2,Justin Fields,QB,6800,19.12,21.8,20.46,2.68
14,Jonathan Mingo,WR,3500,8.97,7.9,8.435,1.07
23,Ezekiel Elliott,RB,5400,13.33,13.8,13.565,0.47
24,Drake London,WR,4600,11.5,12.2,11.85,0.7
30,Atlanta Falcons,Def,2900,6.99,7.3,7.145,0.31
31,Jaxon Smith-Njigba,WR,4100,10.02,9.6,9.81,0.42
33,Christian McCaffrey,RB,9200,22.43,24.6,23.515,2.17
41,Jake Ferguson,TE,4400,10.28,11.3,10.79,1.02
44,CeeDee Lamb,WR,9000,20.59,23.5,22.045,2.91


## Save Picks to CSV

In [20]:
my_dir = f"{year}_weekly_picks/"
grp_dir = f"/Users/adamschiller/OneDrive - BOOZ ALLEN HAMILTON/Weekly Team Picks/Week {format(week, '02d')}/"
fname = f"Adam_week{week}_picks.csv"

# save version in each location
for dir in [my_dir, grp_dir]:
    team[['pos','name','salary']].to_csv(dir+fname, index=False)

# Optimal Picks per Week

In [19]:
# salary + scoring
df = df.join(week_df[['name', 'fpts']].set_index('name'), on='name')
df

Unnamed: 0,name,pos,salary,prediction,fpts
0,Patrick Mahomes,QB,8100,25.420,21.34
1,Joe Burrow,QB,6300,20.025,14.80
2,Trevor Lawrence,QB,6500,20.465,15.74
4,Sam Howell,QB,5500,16.570,18.34
5,Dak Prescott,QB,6200,18.470,
...,...,...,...,...,...
351,Jason Brownlee,WR,3000,0.225,
354,Brandon Bolden,RB,4000,0.365,
355,Davis Allen,TE,2500,0.195,
356,Mike Boone,RB,4000,0.580,1.10


In [20]:
def show_optimal_picks(df, pts_col='fpts', sal_col='salary'):
    week = df.dropna()
    model = PulpSelection(week, pts_col=pts_col)
    team = model.selection
    print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team[sal_col])))
    return team

In [21]:
show_optimal_picks(df)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /usr/local/Caskroom/miniconda/base/envs/default/lib/python3.10/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/cv/pyjmd76x58dbyxrhzycj6c880000gp/T/277c43891ee94cf3b082240ccce60552-pulp.mps max timeMode elapsed branch printingOptions all solution /var/folders/cv/pyjmd76x58dbyxrhzycj6c880000gp/T/277c43891ee94cf3b082240ccce60552-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 15 COLUMNS
At line 1690 RHS
At line 1701 BOUNDS
At line 1952 ENDATA
Problem MODEL has 10 rows, 250 columns and 947 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 242.62 - 0.00 seconds
Cgl0004I processed model has 7 rows, 230 columns (230 integer (226 of which binary)) and 690 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found of -242.62
Cbc0038I Cleaned solution of -242.62
C

Unnamed: 0,name,pos,salary,prediction,fpts
11,Drake London,WR,4800,12.345,24.5
28,Jared Goff,QB,6400,16.83,25.42
63,Raheem Mostert,RB,6400,15.74,37.2
67,Cooper Kupp,WR,9000,20.78,30.8
69,Kyren Williams,RB,6500,15.255,24.8
88,Tyreek Hill,WR,9300,22.715,31.3
90,Adam Thielen,WR,5900,13.61,31.5
92,Minnesota Vikings,Def,3000,6.075,21.0
97,Dalton Schultz,TE,3600,8.195,16.1
