### projection model => project points per player  
- [ ]    Basic Stats  
- [ ]    Usage  
- [ ]    Percentage of team scoring  
- [ ]    Injuries  
- [ ]    Bye week
- [ ]    Injuries to Key Teammates
- [ ]    Individual Matchups
- [ ]    Opposing Team (record, defense, injuries, etc.)
- [ ]    Home/Away
- [ ]    Weather
- [ ]    Expert Picks
- [x]    Other Fantasy Projections

### selection mechanism => choose optimal team within parameters
- [ ]    maximize value (pts/salary)
- [x]    Linear Optimization
    - Inspiration: https://github.com/breeko/Fantasy_LP/blob/master/fantasy_lp_final.ipynb

In [2]:
import pandas as pd
# from matplotlib import pyplot as plt
import numpy as np

In [3]:
year = 2024
week = 8

# This Week

In [32]:
primary_dir = f"/Users/adamschiller/OneDrive - BOOZ ALLEN HAMILTON/Fantasy Football/{year}"
sal_dir = f"{primary_dir}/Weekly Salary Data"

## Rotowire

In [12]:
# https://www.rotowire.com/daily/nfl/dfs-opportunities.php?site=DraftKings  # choose full slate
def rotowire_csv(filepath):
    df = pd.read_csv(filepath)
    df.rename(columns={'PLAYER': 'name', 'POS': 'pos', 'SAL': 'salary', 'FPTS': 'prediction'}, inplace=True)
    df.loc[df['pos']=='D', 'pos'] = 'Def'
    df = df[['name','pos','salary','prediction']]
    return df
    
rdf = rotowire_csv(f"{sal_dir}/week{week}_rotowire-NFL-projected-roster-percent.csv")
rdf

Unnamed: 0,name,pos,salary,prediction
0,Justin Herbert,QB,5400,16.11
1,Bo Nix,QB,5600,16.65
2,Matthew Stafford,QB,5500,15.80
3,Jalen McMillan,WR,3700,10.72
4,Brock Purdy,QB,6300,18.14
...,...,...,...,...
599,Colton Dowell,WR,3000,0.00
600,Tyler Ott,TE,2500,0.00
601,Sam Ehlinger,QB,4000,0.00
602,Brayden Willis,TE,2500,0.00


In [13]:
len(rdf[rdf['pos']=='Def'])

32

## Antonio's Official Data

In [15]:
df = pd.read_csv(f"{sal_dir}/week{week}_salaries.csv")

def format_antonio_salary_data(df):
    df.rename(columns={'PLAYER': 'name', 'SAL': 'salary', 'FPTS': 'prediction', 'POS': 'pos'}, inplace=True)
    # df.loc[df['pos']=='DST', 'pos'] = 'Def'
    df.loc[df['pos']=='D', 'pos'] = 'Def'
    df = df[['name','pos','salary','prediction']]
    return df

adf = format_antonio_salary_data(df)
adf

Unnamed: 0,name,pos,salary,prediction
0,Justin Herbert,QB,5400,16.11
1,Bo Nix,QB,5600,16.79
2,Matthew Stafford,QB,5500,15.74
3,Jalen McMillan,WR,3700,10.72
4,Brock Purdy,QB,6300,18.21
...,...,...,...,...
597,Brayden Willis,TE,2500,0.00
598,Rondale Moore,WR,3000,0.00
599,Clayton Tune,QB,4400,0.00
600,Elijah Mitchell,RB,4000,0.00


In [16]:
len(adf[adf['pos']=='Def'])

32

## Daily Fantasy Fuel

In [22]:
# map defense names for dff based on rotowire names
def defense_mapping(df):
    return {n.split(' ')[-1]: n for n in df[df['pos']=='Def']['name']}

defense_map = defense_mapping(adf)

def apply_defense_map(x):
    try:
        return defense_map[x]
    except KeyError:
        return x

In [23]:
# https://www.dailyfantasyfuel.com/nfl?platform=dk&slate=1C369
# https://www.dailyfantasyfuel.com/nfl/projections/
def dff_csv(filepath):
    df = pd.read_csv(filepath)
    df['name'] = df['first_name']+' '+df['last_name']
    df.rename(columns={'position': 'pos', 'ppg_projection': 'prediction'}, inplace=True)
    # fix names for defenses based on mapping created by rotowire data above
    df.loc[df['pos']=='DST', 'name'] = df['first_name'].apply(lambda x: apply_defense_map(x))
    df.loc[df['pos']=='DST', 'pos'] = 'Def'
    df = df[['name','pos','injury_status','salary','prediction']]
    return df

dff = dff_csv(f"{sal_dir}/week{week}_DFF_NFL_cheatsheet.csv")
dff

Unnamed: 0,name,pos,injury_status,salary,prediction
0,Lamar Jackson,QB,,8000,22.6
1,Breece Hall,RB,,7300,21.5
2,Josh Allen,QB,,7800,20.7
3,Jalen Hurts,QB,,7500,20.3
4,Justin Jefferson,WR,,8800,19.8
...,...,...,...,...,...
428,JuJu Smith-Schuster,WR,O,4500,0.0
429,Michael Mayer,TE,O,2500,0.0
430,Jordan Travis,QB,O,4000,0.0
431,Mike Evans,WR,O,7300,0.0


In [24]:
len(dff[dff['pos']=='Def'])

32

## Merge sources

In [25]:
def merge_sources(df1, df2):
    df = df1.merge(df2[['name','injury_status','prediction']], on='name', how='inner', suffixes=('_df1', '_df2'))
    df = df[df['injury_status'].isna()]
    df = df[(df['prediction_df1']>0)&(df['prediction_df2']>0)]
    df['prediction'] = df[['prediction_df1', 'prediction_df2']].mean(axis=1)
    df['pred_diff'] = abs(df['prediction_df1']-df['prediction_df2'])
    df.drop(columns=['injury_status'], inplace=True)  #,'prediction_df1','prediction_df2'
    return df

In [26]:
df = merge_sources(adf, dff)
df

Unnamed: 0,name,pos,salary,prediction_df1,prediction_df2,prediction,pred_diff
0,Justin Herbert,QB,5400,16.11,16.2,16.155,0.09
1,Bo Nix,QB,5600,16.79,19.3,18.045,2.51
2,Matthew Stafford,QB,5500,15.74,15.7,15.720,0.04
3,Jalen McMillan,WR,3700,10.72,10.4,10.560,0.32
4,Brock Purdy,QB,6300,18.21,18.7,18.455,0.49
...,...,...,...,...,...,...,...
383,Eric Saubert,TE,2500,0.26,0.5,0.380,0.24
385,Kenny McIntosh,RB,4000,0.52,0.6,0.560,0.08
386,Will Shipley,RB,4000,0.54,0.5,0.520,0.04
389,Cody Schrader,RB,4000,0.17,0.2,0.185,0.03


In [27]:
df.sort_values('pred_diff', ascending=False)

Unnamed: 0,name,pos,salary,prediction_df1,prediction_df2,prediction,pred_diff
156,Tony Pollard,RB,6500,12.16,18.4,15.280,6.24
151,J.K. Dobbins,RB,6900,13.33,19.2,16.265,5.87
354,Tyler Goodson,RB,5500,0.85,6.7,3.775,5.85
240,Kalif Raymond,WR,3400,3.79,8.2,5.995,4.41
179,Najee Harris,RB,6100,10.25,14.5,12.375,4.25
...,...,...,...,...,...,...,...
223,Devin Singletary,RB,5900,7.39,7.4,7.395,0.01
280,Ian Thomas,TE,2500,1.39,1.4,1.395,0.01
68,Seattle Seahawks,Def,2500,5.41,5.4,5.405,0.01
305,Durham Smythe,TE,2600,1.40,1.4,1.400,0.00


In [28]:
df['pos'].value_counts()

pos
WR     117
RB      88
TE      71
Def     32
QB      26
Name: count, dtype: int64

# Top Projected per Pos

In [22]:
def avg_per_dollar(df, col):
    df['Avg Value'] = df[col] / (df['salary'] / 1000)
    return df.sort_values('Avg Value', ascending=False)

pdf = avg_per_dollar(df, 'prediction')

In [23]:
top_QBs = pdf[pdf.pos=='QB']
top_QBs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
82,Joshua Dobbs,QB,5900,15.34,27.5,21.42,12.16,3.630508
76,Justin Fields,QB,7000,19.03,29.3,24.165,10.27,3.452143
1,Dak Prescott,QB,6800,20.69,22.5,21.595,1.81,3.175735
0,Brock Purdy,QB,6100,18.51,19.0,18.755,0.49,3.07459
70,Baker Mayfield,QB,5500,16.15,16.3,16.225,0.15,2.95


In [24]:
top_RBs = pdf[pdf.pos=='RB']
top_RBs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
2,Zach Charbonnet,RB,5300,14.81,14.0,14.405,0.81,2.717925
9,Christian McCaffrey,RB,8700,20.7,23.6,22.15,2.9,2.545977
8,Tony Pollard,RB,6700,16.36,16.3,16.33,0.06,2.437313
103,Rachaad White,RB,6100,13.99,15.7,14.845,1.71,2.433607
197,Khalil Herbert,RB,5100,9.54,14.8,12.17,5.26,2.386275


In [25]:
top_WRs = pdf[pdf.pos=='WR']
top_WRs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
126,DJ Moore,WR,6000,13.34,21.1,17.22,7.76,2.87
81,Drake London,WR,5000,12.82,13.1,12.96,0.28,2.592
79,Rashid Shaheed,WR,4300,11.38,10.7,11.04,0.68,2.567442
145,Jordan Addison,WR,5700,11.8,16.8,14.3,5.0,2.508772
18,CeeDee Lamb,WR,8700,19.44,23.5,21.47,4.06,2.467816


In [26]:
top_TEs = pdf[pdf.pos=='TE']
top_TEs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
7,Jake Ferguson,TE,3900,9.69,10.9,10.295,1.21,2.639744
85,Evan Engram,TE,4200,10.66,11.1,10.88,0.44,2.590476
102,T.J. Hockenson,TE,5800,13.45,14.5,13.975,1.05,2.409483
14,Logan Thomas,TE,3500,7.99,8.2,8.095,0.21,2.312857
164,Trey McBride,TE,4700,9.24,12.4,10.82,3.16,2.302128


In [27]:
# players[players.Pos == 'Def'].head(3)
top_Defs = pdf[pdf.pos=='Def']
top_Defs.head(5)

Unnamed: 0,name,pos,salary,prediction_rdf,prediction_dff,prediction,pred_diff,Avg Value
83,Kansas City Chiefs,Def,3100,8.18,8.7,8.44,0.52,2.722581
115,Miami Dolphins,Def,3500,8.01,9.7,8.855,1.69,2.53
99,Denver Broncos,Def,3200,7.72,8.4,8.06,0.68,2.51875
95,Minnesota Vikings,Def,3200,7.67,7.8,7.735,0.13,2.417187
92,New England Patriots,Def,3200,7.61,7.7,7.655,0.09,2.392187


# Team Selection

$$\begin{aligned} 
objective &= maximize\ team\ points \\ 
team\ salary &\leq salary\ cap \\
team &= 1\ QB + (2|3)\ RB + (3|4)\ WR + (1|2)\ TE + 1\ DEF \\
team &\leq 9\ total\ players \\
\end{aligned}$$

In [39]:
# %pip install pulp

In [17]:
from pulp import *

class PulpSelection():
    def __init__(self, df, pts_col="prediction", sal_col="salary", name_col="name", salary_cap=50000):
        self.df = df
        self.vars = self.populate_vars(pts_col, sal_col, name_col)
        self.model = self.optimize(salary_cap)
        self.players = self.player_names()
        self.selection = self.selection_df()
        
    def populate_vars(self, pts_col, sal_col, name_col):
        df = self.df
        salaries = {}
        points = {}
        for pos in df.pos.unique():
            available_pos = df[df.pos == pos]
            salary = list(available_pos[[name_col,sal_col]].set_index(name_col).to_dict().values())[0]
            point = list(available_pos[[name_col,pts_col]].set_index(name_col).to_dict().values())[0]
            salaries[pos] = salary
            points[pos] = point
            
        self.salaries = salaries
        self.points = points
        return {k: LpVariable.dict(k, v, cat="Binary") for k, v in points.items()}

    def player_names(self):
        players = {}
        for d in self.vars.values():
            for k,v in d.items():
                players[v] = k
        return players
            
    pos_num_available = {
        "QB": 1,
        "RB": 2,
        "WR": 3,
        "TE": 1,
        "Def": 1
    }

    def optimize(self, salary_cap):
        prob = LpProblem("FFModel", LpMaximize)
        rewards = []
        costs = []

        for pos, players in self.vars.items():
            costs += lpSum([self.salaries[pos][i] * self.vars[pos][i] for i in players])
            rewards += lpSum([self.points[pos][i] * self.vars[pos][i] for i in players])
            if pos in ['RB','WR','TE']:
                prob += lpSum([self.vars[pos][i] for i in players]) <= self.pos_num_available[pos]+1
                prob += lpSum([self.vars[pos][i] for i in players]) >= self.pos_num_available[pos]
            else:
                prob += lpSum([self.vars[pos][i] for i in players]) == self.pos_num_available[pos]
        prob += lpSum(prob.variables()) == 9    # flex -> max of 9 total players

        prob += lpSum(rewards)
        prob += lpSum(costs) <= salary_cap
        print(prob.solve())
        return prob
    
    def selection_df(self):
        selections = [self.players[p] for p in self.model.variables() if p.varValue > 0]
        team = self.df[self.df.name.isin(selections)]
        return team

In [29]:
pts_col = 'prediction'
model = PulpSelection(df, pts_col=pts_col)
team = model.selection

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/adamschiller/bah_projects/model_football/.venv/lib/python3.12/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/pl/yvv0vk397x34b_lzgcmsd43w0000gp/T/1b99aa2416814e108301053b7f48aa81-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /var/folders/pl/yvv0vk397x34b_lzgcmsd43w0000gp/T/1b99aa2416814e108301053b7f48aa81-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 15 COLUMNS
At line 2296 RHS
At line 2307 BOUNDS
At line 2642 ENDATA
Problem MODEL has 10 rows, 334 columns and 1278 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 133.152 - 0.00 seconds
Cgl0004I processed model has 7 rows, 314 columns (314 integer (313 of which binary)) and 942 elements
Cutoff increment increased from 1e-05 to 0.004995
Cbc0038I Initial state - 2 integers unsatisfied sum - 0.210526
Cbc

In [20]:
print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team['salary'])))
team


Total points: 130.63     Total salary: 50000



Unnamed: 0,name,pos,salary,prediction
3,Jalen McMillan,WR,3700,10.72
6,Kyler Murray,QB,6400,18.66
8,Cade Otton,TE,3500,9.99
22,Tyreek Hill,WR,7000,19.13
27,Jaylen Waddle,WR,5400,13.77
29,Los Angeles Chargers,Def,3300,8.23
32,Breece Hall,RB,7300,18.49
38,De'Von Achane,RB,6200,14.87
50,Bijan Robinson,RB,7200,16.77


In [30]:
print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team['salary'])))
team


Total points: 132.72     Total salary: 50000



Unnamed: 0,name,pos,salary,prediction_df1,prediction_df2,prediction,pred_diff
1,Bo Nix,QB,5600,16.79,19.3,18.045,2.51
3,Jalen McMillan,WR,3700,10.72,10.4,10.56,0.32
8,Cade Otton,TE,3500,9.99,10.2,10.095,0.21
21,Tyreek Hill,WR,7000,19.13,18.7,18.915,0.43
28,Los Angeles Chargers,Def,3300,8.23,8.5,8.365,0.27
31,Breece Hall,RB,7300,18.49,21.5,19.995,3.01
52,Aaron Jones,RB,6600,14.85,18.5,16.675,3.65
72,Tee Higgins,WR,6500,14.48,15.1,14.79,0.62
156,Tony Pollard,RB,6500,12.16,18.4,15.28,6.24


## Save Picks to CSV

In [36]:
picks_dir = f"{primary_dir}/Weekly Team Picks"
fname = f"{picks_dir}/Adam_week{week}_picks.csv"

# save version in each location
for dir in [my_dir, grp_dir]:
    team[['pos','name','salary']].to_csv(fname, index=False)

# Optimal Picks per Week

In [19]:
# salary + scoring
df = df.join(week_df[['name', 'fpts']].set_index('name'), on='name')
df

Unnamed: 0,name,pos,salary,prediction,fpts
0,Patrick Mahomes,QB,8100,25.420,21.34
1,Joe Burrow,QB,6300,20.025,14.80
2,Trevor Lawrence,QB,6500,20.465,15.74
4,Sam Howell,QB,5500,16.570,18.34
5,Dak Prescott,QB,6200,18.470,
...,...,...,...,...,...
351,Jason Brownlee,WR,3000,0.225,
354,Brandon Bolden,RB,4000,0.365,
355,Davis Allen,TE,2500,0.195,
356,Mike Boone,RB,4000,0.580,1.10


In [20]:
def show_optimal_picks(df, pts_col='fpts', sal_col='salary'):
    week = df.dropna()
    model = PulpSelection(week, pts_col=pts_col)
    team = model.selection
    print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team[sal_col])))
    return team

In [21]:
show_optimal_picks(df)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /usr/local/Caskroom/miniconda/base/envs/default/lib/python3.10/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/cv/pyjmd76x58dbyxrhzycj6c880000gp/T/277c43891ee94cf3b082240ccce60552-pulp.mps max timeMode elapsed branch printingOptions all solution /var/folders/cv/pyjmd76x58dbyxrhzycj6c880000gp/T/277c43891ee94cf3b082240ccce60552-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 15 COLUMNS
At line 1690 RHS
At line 1701 BOUNDS
At line 1952 ENDATA
Problem MODEL has 10 rows, 250 columns and 947 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 242.62 - 0.00 seconds
Cgl0004I processed model has 7 rows, 230 columns (230 integer (226 of which binary)) and 690 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found of -242.62
Cbc0038I Cleaned solution of -242.62
C

Unnamed: 0,name,pos,salary,prediction,fpts
11,Drake London,WR,4800,12.345,24.5
28,Jared Goff,QB,6400,16.83,25.42
63,Raheem Mostert,RB,6400,15.74,37.2
67,Cooper Kupp,WR,9000,20.78,30.8
69,Kyren Williams,RB,6500,15.255,24.8
88,Tyreek Hill,WR,9300,22.715,31.3
90,Adam Thielen,WR,5900,13.61,31.5
92,Minnesota Vikings,Def,3000,6.075,21.0
97,Dalton Schultz,TE,3600,8.195,16.1
