In [None]:
# model -> project points per player
#-    Stats
#     Usage
#     Percentage of team scoring
#-    Injuries
#-    Bye week
#     Injuries to Key Teammates / Individual Matchups
#-    Opposing Team (record, defense, injuries, etc.)
#-    Home/Away
#     Weather
#     Expert Picks
#     Other Fantasy Projections

# maximize value (pts/salary)

In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [3]:
def format_stats(df):
    df = df.dropna()
    df = df[df['Week'] != 'Week']  # remove 'filler' rows
    # update historical teams
    m = {'oak':'lvr', 'sdg':'lac', 'stl':'lar'}
    df.replace({'Oppt': m, 'Team': m}, inplace=True)
    df = df[(df.Oppt != '-') & (df.Pos != 'PK')]
    df = df.astype({'Week':int, 'Year':int, 'GID':int, 'Name':'string', 'Pos':'string', 'Team':'string',
                    'h/a':'string', 'Oppt':'string', 'FD points':float, 'FD salary':int})
    return df.rename(columns={'FD points': 'Points', 'FD salary': 'Salary'})

In [4]:
def add_averages(df):
    df.sort_values(['Year','Week'], inplace=True)
    df['prev_pts'] = df.groupby(['Year','GID'])['Points'].shift() #.fillna(0.0)
    df['avg_pts'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.expanding(3).mean())
    df['pts_std'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.expanding(3).std())
#     df['last3_avg'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.rolling(3,3).mean())
    df['ewm5'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.ewm(span=5, min_periods=3).mean())
    df['avg_rank'] = df.groupby(['Year','Week','Pos'])['avg_pts'].rank("dense", ascending=False, na_option='bottom')
    df['prev_rank'] = df.groupby(['Year','Week','Pos'])['prev_pts'].rank("dense", ascending=False, na_option='bottom')
    return df

In [5]:
# fix name differences over time
def use_most_recent_name(df):
    newest_names = df.groupby('GID').agg({'Name':'last'}).astype('string')
    df = df.set_index('GID')
    df.update(newest_names)
    return df.astype({'Name':'string'}).reset_index()

In [6]:
def opp_pos_norm(df):
    o = df.groupby(['Year','Week','Oppt','Pos']).agg({'Points': 'sum'})
    o['prev_pts'] = o.groupby(['Year','Oppt','Pos'])['Points'].shift() #.fillna(0.0)
    o['running_avg'] = o.groupby(['Year','Oppt','Pos'])['prev_pts'].transform(lambda x: x.expanding().mean())
    o['running_league_avg'] = o.groupby(['Year','Week','Pos'])['running_avg'].transform('mean')
    o['running_league_std'] = o.groupby(['Year','Week','Pos'])['running_avg'].transform('std')
    o['norm'] = (o['running_avg']-o['running_league_avg'])/o['running_league_std']
    ndf = df.join(o[['norm']], on=['Year','Week','Oppt','Pos'], how='left')
    return ndf.astype({'Pos':'string', 'Oppt':'string'})

In [7]:
def points_adjusted(df, col, mult_factor=2):
    df['Pts Adjusted'] = df[col] + (mult_factor*df['norm'])
    return df

In [75]:
# def remove_potential_injuries(df):
#     df['inj'] = df.groupby(['Year','GID'])['Points'].transform(lambda x: x.shift(-1)==0.0)
#     return df[~df.inj].drop('inj', axis=1)

In [8]:
def add_gp_and_season_totals(df):
    df['gp'] = df.groupby(['Year','GID'])['Points'].transform(lambda x: x.shift()>0.0)
    df['gp'] = df.groupby(['Year','GID'])['gp'].transform(lambda x: x.expanding().sum())
    df['season_points'] = df.groupby(['Year','GID'])['Points'].transform(lambda x: x.shift().expanding().sum())
    return df

In [9]:
def enrich_df(res):
    res = res.copy()
    res = add_averages(res)
    res = opp_pos_norm(res)
#     res = remove_potential_injuries(res)
    res = add_gp_and_season_totals(res)
    res = res[res.Week > 4]
    res = use_most_recent_name(res)
    res = res.fillna(0.0)
    res = points_adjusted(res, 'ewm5', mult_factor=1.5)
    return res

## Historical DF

In [None]:
# # last 10 years
# for year in range(2010,2021):
#     f = open(f"historical_fantasy_data/{year}_fantasy_points.txt", "w", newline='')
#     for week in range(1,18):
#         url = "http://rotoguru1.com/cgi-bin/fyday.pl?week="+str(week)+"&game=fd&year="+str(year)+"&scsv=1"
#         r = requests.get(url)
#         soup = BeautifulSoup(r.content, 'html.parser')
#         data = soup.find_all('pre')[-1]
#         f.write(data.text)
#     f.close()

In [9]:
# enrich and combine into one dataframe
def historical_df():
    res = pd.DataFrame()
    for year in range(2010,2021):
        df = format_stats(pd.read_csv(f"historical_fantasy_data/{year}_fantasy_points.txt", delimiter=';'))
        res = res.append(df)
    return res

In [92]:
res = historical_df()
rdf = enrich_df(res)
rdf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,Pts Adjusted
0,1232,5,2011,"Roethlisberger, Ben",QB,pit,h,ten,28.12,7800,8.34,12.305000,4.206166,11.841538,22.0,23.0,-1.216577,10.016673
1,1241,5,2011,"Cassel, Matt",QB,kan,a,ind,27.48,7500,16.40,9.680000,7.210455,11.754462,24.0,18.0,-0.265659,11.355973
2,1155,5,2011,"Vick, Michael",QB,phi,a,buf,25.60,9600,31.14,19.660000,9.572906,21.121538,6.0,3.0,1.467186,23.322317
3,1378,5,2011,"Newton, Cam",QB,car,h,nor,24.66,8500,33.46,27.435000,8.427216,27.418154,3.0,2.0,0.327539,27.909462
4,1225,5,2011,"Manning, Eli",QB,nyg,h,sea,24.30,8300,18.54,18.760000,4.994851,19.539385,8.0,10.0,-0.394480,18.947665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51102,7011,17,2020,Detroit,Def,det,h,min,-1.00,3400,4.00,2.400000,3.112188,2.003185,28.0,9.0,0.664761,3.000326
51103,7015,17,2020,Kansas City,Def,kan,h,lac,-1.00,4200,7.00,7.333333,7.016986,6.604938,7.0,6.0,0.294807,7.047149
51104,7002,17,2020,Atlanta,Def,atl,a,tam,-1.00,3300,5.00,4.866667,6.801961,5.482783,19.0,8.0,-0.753396,4.352689
51105,7006,17,2020,Chicago,Def,chi,h,gnb,-1.00,4100,6.00,6.200000,5.414267,6.656157,14.0,7.0,-1.709110,4.092492


In [76]:
# check for null values
rdf[rdf.isna().any(axis=1)]

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,Pts Adjusted


## Encode Categorical Variables

In [26]:
# numerical encoding for categorical variables
def encode_cats(res):
    res = res.copy()
    team_idx = {p:i for i,p in enumerate(sorted(res.Team.unique()))}
    pos_idx = {p:i for i,p in enumerate(sorted(res.Pos.unique()))}
    res.replace({'h/a': {'h': 1, 'a': 0}, 'Pos': pos_idx, 'Team': team_idx, 'Oppt': team_idx}, inplace=True)
    return res

In [84]:
tdf = encode_cats(rdf)

In [44]:
# null value check per column
tdf.isna().any()

GID             False
Week            False
Year            False
Name            False
Pos             False
Team            False
h/a             False
Oppt            False
Points          False
Salary          False
prev_pts        False
avg_pts         False
pts_std         False
ewm5            False
avg_rank        False
prev_rank       False
norm            False
Pts Adjusted    False
dtype: bool

In [None]:
# res['Year'] = pd.Categorical(res['Year'])
# res['Week'] = pd.Categorical(res['Week'])
# res['GID'] = pd.Categorical(res['GID'])
# res['Team'] = pd.Categorical(res['Team'])
# res['h/a'] = pd.Categorical(res['h/a'])
# res['Oppt'] = pd.Categorical(res['Oppt'])
# res['Pos'] = pd.Categorical(res['Pos'])

### Plot Skewness

In [None]:
import seaborn as sns
from scipy import stats

In [None]:
# Figure
plt.figure(figsize=(12, 4))
plt.suptitle("Visualising the skewness of the Points target variable")

# Distribution Plot
plt.subplot(1, 2, 1)
sns.histplot(res["Points"], stat = "density", kde = True)
plt.title('Distribution Plot')

# Probability Plot
plt.subplot(1, 2, 2)
stats.probplot(res['Points'], plot=plt)

plt.tight_layout()
plt.show()
plt.clf()

## Regression Model Comparison

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error#, mean_squared_log_error as MSLE

from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LassoCV, LinearRegression, RidgeCV, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, BaggingRegressor, \
                             ExtraTreesRegressor, GradientBoostingRegressor

from xgboost import XGBRegressor

SEED = 42

In [85]:
tdf = tdf.set_index('Name')

y = tdf.Points
X = tdf.drop(['Points'], axis=1)

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.20)

In [47]:
# current heuristic
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, test_X['Pts Adjusted']))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, test_X['Pts Adjusted']))

Mean Absolute Error using Decision Tress : 4.181625237478876
Mean Squared Error using Decision Tress : 34.95471941283859


In [None]:
model = DecisionTreeRegressor(criterion='mse')  
model.fit(train_X, train_y) 
preds = model.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
feat_importance = {col: imp for col,imp in zip(test_X.columns, model.feature_importances_)}
feat_importance

In [48]:
model = RandomForestRegressor(criterion='mse')  
model.fit(train_X, train_y) 
preds = model.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.977296380356094
Mean Squared Error using Decision Tress : 30.96290914318137


In [66]:
# Only Imp Feats
model = RandomForestRegressor(criterion='mse')  
model.fit(train_X, train_y) 
preds = model.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.9499088045392288
Mean Squared Error using Decision Tress : 30.320428562758753


In [67]:
feat_importance = {col: imp for col,imp in zip(test_X.columns, model.feature_importances_)}
feat_importance

{'GID': 0.08669241900429432,
 'Salary': 0.27739472636389734,
 'prev_pts': 0.07393348051399191,
 'avg_pts': 0.21744934257092455,
 'pts_std': 0.07873289599756898,
 'ewm5': 0.10022417996242758,
 'norm': 0.07760390047039901,
 'Pts Adjusted': 0.08796905511649634}

In [None]:
# Ridge Regression (L2 Regularization)
ridge = RidgeCV(normalize=False)
ridge.fit(train_X, train_y)
best_alpha = ridge.alpha_
print(best_alpha)
preds = ridge.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
# Lasso Regression (L1 Regularization)
lasso = LassoCV(alphas=None, max_iter=100000, normalize=True)
lasso.fit(train_X, train_y)
best_alpha = lasso.alpha_
print(best_alpha)
preds = lasso.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
forest = RandomForestRegressor(random_state = SEED)
forest.fit(train_X, train_y)
preds = forest.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
ada = AdaBoostRegressor(random_state = SEED)
ada.fit(train_X, train_y)
preds = ada.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
bagging = BaggingRegressor(random_state = SEED)
bagging.fit(train_X, train_y)
preds = bagging.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
ETR = ExtraTreesRegressor(random_state = SEED)
ETR.fit(train_X, train_y)
preds = ETR.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [86]:
GBR = GradientBoostingRegressor(random_state = SEED)
GBR.fit(train_X, train_y)
preds = GBR.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.8428768582090345
Mean Squared Error using Decision Tress : 29.371090708613796


In [80]:
XGB = XGBRegressor(random_state = SEED)
XGB.fit(train_X, train_y)
preds = XGB.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.864580542043452
Mean Squared Error using Decision Tress : 30.434276268944544


## This Year

In [10]:
from bs4 import BeautifulSoup
import requests

In [11]:
year = 2021

f = open(f"2021_fantasy_data/{year}_fantasy_points.txt", "w", newline='')
for week in range(1,19):
    url = "http://rotoguru1.com/cgi-bin/fyday.pl?week="+str(week)+"&game=fd&year="+str(year)+"&scsv=1"
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    data = soup.find_all('pre')[-1]
    f.write(data.text)
f.close()

In [12]:
year_df = format_stats(pd.read_csv(f"2021_fantasy_data/{year}_fantasy_points.txt", delimiter=';'))
year_df

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,Points,Salary
0,1,2021,1537,"Murray, Kyler",QB,ari,a,ten,34.56,8400
1,1,2021,1523,"Mahomes II, Patrick",QB,kan,h,cle,33.28,8800
2,1,2021,1490,"Goff, Jared",QB,det,h,sfo,29.92,6500
3,1,2021,1465,"Winston, Jameis",QB,nor,h,gnb,29.62,6700
4,1,2021,1131,"Brady, Tom",QB,tam,h,dal,29.16,7800
...,...,...,...,...,...,...,...,...,...,...
5733,13,2021,7032,Houston,Def,hou,h,ind,2.00,3300
5734,13,2021,7003,Baltimore,Def,bal,a,pit,2.00,4800
5735,13,2021,7021,New York J,Def,nyj,h,phi,1.00,3500
5736,13,2021,7006,Chicago,Def,chi,h,ari,0.00,3600


### Test historical regression predictions on this year's data

In [354]:
ydf = encode_cats(enrich_df(year_df))

tst_y = ydf.Points
tst_X = ydf.drop(['Points'], axis=1).set_index('Name')

In [355]:
# current heuristic
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(tst_y, tst_X['Pts Adjusted']))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(tst_y, tst_X['Pts Adjusted']))

Mean Absolute Error using Decision Tress : 4.194664500279587
Mean Squared Error using Decision Tress : 34.22182014092551


In [356]:
# Gradient Boosting Regression
preds = GBR.predict(tst_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(tst_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(tst_y, preds))

Mean Absolute Error using Decision Tress : 3.8960175521083538
Mean Squared Error using Decision Tress : 29.02786616611834


## Re-train regression on all data

In [357]:
full = res.append(year_df)
full = full.sort_values(['Year','Week']).reset_index()
full

Unnamed: 0,index,Week,Year,GID,Name,Pos,Team,h/a,Oppt,Points,Salary
0,0,1,2011,1131,"Brady, Tom",QB,nwe,a,mia,35.98,9200
1,1,1,2011,1309,"Henne, Chad",QB,mia,h,nwe,35.54,6800
2,2,1,2011,1378,"Newton, Cam",QB,car,a,ari,31.68,6700
3,3,1,2011,1151,"Brees, Drew",QB,nor,a,gnb,29.06,8900
4,4,1,2011,1242,"Fitzpatrick, Ryan",QB,buf,a,kan,24.62,7900
...,...,...,...,...,...,...,...,...,...,...,...
72243,4874,11,2021,7031,Washington,Def,was,a,car,1.00,3500
72244,4875,11,2021,7017,Minnesota,Def,min,h,gnb,1.00,3400
72245,4876,11,2021,7030,Tennessee,Def,ten,h,hou,0.00,4600
72246,4877,11,2021,7026,LA Chargers,Def,lac,h,pit,-1.00,4200


In [354]:
fdf = encode_cats(enrich_df(year_df))

full_y = fdf.Points
full_X = fdf.drop(['Points'], axis=1).set_index('Name')

In [355]:
# train GBR on full training set
GBR = GradientBoostingRegressor(random_state = SEED)
GBR.fit(full_X, full_y)

GradientBoostingRegressor(random_state=42)

# Upcoming week

## Download and Format data

In [13]:
wk = max(year_df.Week)+1
wk

14

In [18]:
# !pip install selenium
# download chrome driver from: https://chromedriver.chromium.org/home
# unzip chromedriver.zip
# $ mv <Path>/chromedriver /usr/local/bin/.
# $ xattr -d com.apple.quarantine /usr/local/bin/chromedriver 

In [14]:
from selenium import webdriver

In [15]:
driver = webdriver.Chrome()
driver.get("https://www.dailyfantasycafe.com/tools/salary-exploitation/nfl")
elem = driver.find_element('id', "DataTables_Table_0")
week_df = pd.read_html(elem.get_attribute('outerHTML'))[0]
driver.close()

In [16]:
week_df

Unnamed: 0,Name,Position,Team,Opponent Opp,DraftKings,FanDuel,Yahoo
0,A.J. Brown,WR,TEN,JAX,"$6,800","$6,500",-
1,A.J. Green,WR,ARI,LAR,"$5,000","$5,600",$15
2,Aaron Fuller,WR,SEA,@HOU,"$3,000","$4,500",-
3,Aaron Jones,RB,GB,CHI,"$6,200","$7,500",$28
4,Aaron Rodgers,QB,GB,CHI,"$7,200","$7,900",$31
...,...,...,...,...,...,...,...
607,Zach Ertz,TE,ARI,LAR,"$5,300","$5,300",$19
608,Zach Gentry,TE,PIT,@MIN,"$2,800","$4,600",$10
609,Zach Wilson,QB,NYJ,NO,"$5,300","$6,600",$22
610,Zack Moss,RB,BUF,@TB,"$4,500","$5,400",$10


In [17]:
def format_salary_df(df):
    df = df[df.FanDuel!='-']
    df = df[df.Position.isin(['QB','WR','RB','TE','D'])]
    df = df.rename(columns={'Position':'Pos', 'Opponent  Opp':'Oppt'})
    df.loc[df.Pos=='D', 'Pos'] = 'Def'
    df['Salary'] = df['FanDuel'].str.replace('[\$,]', '', regex=True).astype(int)
    return df[['Name','Pos','Team','Oppt','Salary']]

def last_name_first(df):
    s = df[df['Pos']!='Def']['Name']
    s = s.str.split(' ', expand=True).fillna('')
    s['last_name'] = s.iloc[:, 1:].agg(' '.join, axis=1).str.strip()
    s['join_name'] = s[['last_name',0]].agg(', '.join, axis=1)
    df.loc[df.Pos!='Def', 'Name'] = s.join_name
    return df

def add_home_away_update_team_names(df):
    team_names = {'gb':'gnb','jax':'jac','kc':'kan','lv':'lvr','ne':'nwe','no':'nor','sf':'sfo','tb':'tam'}
    df['h/a'] = df['Oppt'].apply(lambda x: 'a' if '@' in x else 'h')
    df['Oppt'] = df.Oppt.str.replace('@','').str.lower().replace(team_names)
    df['Team'] = df.Team.str.lower().replace(team_names)
    return df

def add_GIDs(salary, df):    
    d = df[df.Pos=='Def'].groupby(['GID'], as_index=False).agg('last')
    s = salary[salary.Pos=='Def'].drop(columns=['Name']).copy()
    s = s.join(d[['GID','Team','Name']].set_index('Team'), on='Team', how='inner')

    d = df[df.Pos!='Def'].groupby(['GID'], as_index=False).agg('last')
    p = salary[salary.Pos!='Def'].copy()
    p = p.join(d[['GID','Name']].set_index('Name'), on='Name', how='inner')
    return pd.concat([p,s])

def reformat_next_week_df(week_df, year_df, week, year='2021'):
    df = week_df.copy()
    df = format_salary_df(df)
    df = last_name_first(df)
    df = add_home_away_update_team_names(df)
    df = add_GIDs(df, year_df)
    df['Week'] = week
    df['Year'] = year
    df['Points'] = 0.0
    df = df.astype({'Week':int, 'Year':int, 'GID':int, 'Name':'string', 'Pos':'string', 'Team':'string',
                    'h/a':'string', 'Oppt':'string', 'Points':float, 'Salary':int})
    return df[['Week','Year','GID','Name','Pos','Team','h/a','Oppt','Points','Salary']]

In [18]:
wdf = reformat_next_week_df(week_df, year_df, wk)
wdf

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,Points,Salary
0,14,2021,5728,"Brown, A.J.",WR,ten,h,jac,0.0,6500
1,14,2021,5091,"Green, A.J.",WR,ari,h,lar,0.0,5600
3,14,2021,5543,"Jones, Aaron",RB,gnb,h,chi,0.0,7500
4,14,2021,1252,"Rodgers, Aaron",QB,gnb,h,chi,0.0,7900
5,14,2021,5412,"Humphries, Adam",WR,was,h,dal,0.0,5000
...,...,...,...,...,...,...,...,...,...,...
518,14,2021,7027,San Francisco,Def,sfo,a,cin,0.0,3900
525,14,2021,7028,Seattle,Def,sea,a,hou,0.0,4500
541,14,2021,7029,Tampa Bay,Def,tam,h,buf,0.0,3800
550,14,2021,7030,Tennessee,Def,ten,h,jac,0.0,4700


## Add upcoming week to year

In [19]:
ydf = pd.concat([year_df, wdf], ignore_index=True)
ydf = enrich_df(ydf)
ydf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted
0,1552,5,2021,"Herbert, Justin",QB,lac,h,cle,42.82,7700,21.28,20.805000,7.277699,22.236308,14.0,12.0,-0.445161,4.0,83.22,21.568566
1,1527,5,2021,"Jackson, Lamar",QB,bal,h,ind,41.88,8200,19.44,22.995000,7.568785,22.231385,9.0,16.0,0.286384,4.0,91.98,22.660961
2,1131,5,2021,"Brady, Tom",QB,tam,h,mia,37.74,8400,11.06,24.635000,9.058482,21.597231,4.0,27.0,-0.378998,4.0,98.54,21.028734
3,1529,5,2021,"Allen, Josh",QB,buf,a,kan,36.50,8800,21.02,23.275000,9.451312,24.415692,7.0,13.0,1.746978,4.0,93.10,27.036159
4,1465,5,2021,"Winston, Jameis",QB,nor,a,was,26.76,7000,13.04,16.630000,8.772389,14.715077,21.0,25.0,2.084038,4.0,66.52,17.841134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4070,7027,14,2021,San Francisco,Def,sfo,a,cin,0.00,3900,9.00,6.500000,4.602371,8.261497,12.0,6.0,0.097820,11.0,78.00,8.408227
4071,7028,14,2021,Seattle,Def,sea,a,hou,0.00,4500,9.00,5.500000,3.580249,7.000827,17.0,6.0,1.467307,11.0,66.00,9.201788
4072,7029,14,2021,Tampa Bay,Def,tam,h,buf,0.00,3800,11.00,8.666667,6.813534,10.211943,4.0,4.0,-1.057684,10.0,104.00,8.625417
4073,7030,14,2021,Tennessee,Def,ten,h,jac,0.00,4700,-2.00,6.166667,6.603489,3.815495,13.0,15.0,1.595697,9.0,74.00,6.209040


In [23]:
zdf = ydf[ydf.Week==wk].copy()

### Regression

In [27]:
tmp = encode_cats(ydf)

trn = tmp[tmp.Week<wk]
tst = tmp[tmp.Week==wk]

In [28]:
trn_y = trn.Points
trn_x = trn.drop(['Points'], axis=1).set_index('Name')

tst_x = tst.drop(['Points'], axis=1).set_index('Name')

# train GBR on full training set
GBR = GradientBoostingRegressor(random_state = SEED)
GBR.fit(trn_x, trn_y)

GradientBoostingRegressor(random_state=42)

In [29]:
zdf['Predictions'] = GBR.predict(tst_x)
zdf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Predictions
3587,5728,14,2021,"Brown, A.J.",WR,ten,h,jac,0.0,6500,...,10.350000,8.608297,9.261523,29.0,37.0,0.761455,10.0,103.50,10.403707,10.660973
3588,5091,14,2021,"Green, A.J.",WR,ari,h,lar,0.0,5600,...,8.245455,5.650197,5.654595,39.0,70.0,-0.015995,11.0,90.70,5.630603,5.823628
3589,5543,14,2021,"Jones, Aaron",RB,gnb,h,chi,0.0,7500,...,13.154545,10.427403,8.413905,17.0,50.0,-0.450640,11.0,144.70,7.737945,11.018229
3590,1252,14,2021,"Rodgers, Aaron",QB,gnb,h,chi,0.0,7900,...,20.801818,8.280060,23.963711,7.0,3.0,0.333010,11.0,228.82,24.463225,18.422904
3591,5412,14,2021,"Humphries, Adam",WR,was,h,dal,0.0,5000,...,3.766667,2.754610,3.978453,90.0,43.0,0.740506,12.0,45.20,5.089212,5.463619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4070,7027,14,2021,San Francisco,Def,sfo,a,cin,0.0,3900,...,6.500000,4.602371,8.261497,12.0,6.0,0.097820,11.0,78.00,8.408227,7.807352
4071,7028,14,2021,Seattle,Def,sea,a,hou,0.0,4500,...,5.500000,3.580249,7.000827,17.0,6.0,1.467307,11.0,66.00,9.201788,9.589268
4072,7029,14,2021,Tampa Bay,Def,tam,h,buf,0.0,3800,...,8.666667,6.813534,10.211943,4.0,4.0,-1.057684,10.0,104.00,8.625417,8.382038
4073,7030,14,2021,Tennessee,Def,ten,h,jac,0.0,4700,...,6.166667,6.603489,3.815495,13.0,15.0,1.595697,9.0,74.00,6.209040,7.925892


### Remove injured players

Download injury report here:

https://www.rotowire.com/football/injury-report.php

In [20]:
def load_injury_report():
    inj = pd.read_csv('2021_fantasy_data/nfl-injury-report.csv')
    inj = inj[inj['Pos'].isin(['QB','RB','WR','TE'])]
    inj = inj[inj['Team']!='FA']
    inj.rename(columns={"Player": "Name"}, inplace=True)
    return last_name_first(inj)

In [21]:
def remove_injuries(df, inj):
    df = df.join(inj[['Name','Status']].set_index('Name'), on='Name', how='left')
    df = df[df['Status'].isna()]
    return df.drop('Status', axis=1)

In [30]:
inj = load_injury_report()
zdf = remove_injuries(zdf, inj)
zdf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Predictions
3588,5091,14,2021,"Green, A.J.",WR,ari,h,lar,0.0,5600,...,8.245455,5.650197,5.654595,39.0,70.0,-0.015995,11.0,90.7,5.630603,5.823628
3589,5543,14,2021,"Jones, Aaron",RB,gnb,h,chi,0.0,7500,...,13.154545,10.427403,8.413905,17.0,50.0,-0.450640,11.0,144.7,7.737945,11.018229
3591,5412,14,2021,"Humphries, Adam",WR,was,h,dal,0.0,5000,...,3.766667,2.754610,3.978453,90.0,43.0,0.740506,12.0,45.2,5.089212,5.463619
3594,2553,14,2021,"Peterson, Adrian",RB,sea,a,hou,0.0,5600,...,6.150000,2.868798,6.233846,46.0,27.0,0.884006,4.0,24.6,7.559855,6.796841
3595,5825,14,2021,"Dillon, AJ",RB,gnb,h,chi,0.0,7500,...,9.658333,7.507264,14.838009,29.0,13.0,-0.450640,11.0,115.9,14.162050,12.542300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4070,7027,14,2021,San Francisco,Def,sfo,a,cin,0.0,3900,...,6.500000,4.602371,8.261497,12.0,6.0,0.097820,11.0,78.0,8.408227,7.807352
4071,7028,14,2021,Seattle,Def,sea,a,hou,0.0,4500,...,5.500000,3.580249,7.000827,17.0,6.0,1.467307,11.0,66.0,9.201788,9.589268
4072,7029,14,2021,Tampa Bay,Def,tam,h,buf,0.0,3800,...,8.666667,6.813534,10.211943,4.0,4.0,-1.057684,10.0,104.0,8.625417,8.382038
4073,7030,14,2021,Tennessee,Def,ten,h,jac,0.0,4700,...,6.166667,6.603489,3.815495,13.0,15.0,1.595697,9.0,74.0,6.209040,7.925892


### Filter by avg points scored or games played

In [31]:
def final_filter(df):
    df = df[df.season_points>df.season_points.mean()] # scored less than the average season points
#     df = df[df.gp>df.gp.mean()] # played less than average games
    return df

In [32]:
zdf = final_filter(zdf)
len(zdf)

144

### Regression Predict

In [392]:
preds = GBR.predict(encode_cats(zdf).drop(['Points'], axis=1).set_index('Name'))

pdf['Predictions'] = preds
pdf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,total_avg,last3_avg,ewm5,norm,Pts Adjusted,Predictions
3035,5820,12,2021,"Taylor, Jonathan",RB,ind,h,tam,0.0,9800,51.90,22.581818,35.500000,33.139542,-0.753587,32.009162,4.589930
3054,1552,12,2021,"Herbert, Justin",QB,lac,a,den,0.0,7800,35.28,23.378000,26.640000,25.578624,-1.231152,23.731896,11.773828
3089,1527,12,2021,"Jackson, Lamar",QB,bal,h,cle,0.0,8500,16.42,24.420000,24.046667,23.217690,0.199085,23.516318,7.347566
2994,1554,12,2021,"Hurts, Jalen",QB,phi,a,nyg,0.0,8400,30.78,22.985455,22.293333,23.010275,0.235510,23.363540,12.017624
3043,1529,12,2021,"Allen, Josh",QB,buf,a,nor,0.0,8800,18.16,24.044000,17.220000,21.197866,0.526066,21.986966,6.320569
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2870,4792,12,2021,"Thompson, Colin",TE,car,a,mia,0.0,4000,0.00,0.000000,0.000000,0.000000,0.271924,0.407886,6.114157
3209,2820,12,2021,"Jones, Taiwan",RB,buf,a,nor,0.0,4500,0.00,0.000000,0.000000,0.000000,-1.372363,-2.058545,5.873935
2829,4754,12,2021,"Dillon, Brandon",TE,min,a,sfo,0.0,4000,0.00,0.000000,0.000000,0.000000,-1.090116,-1.635173,5.780014
3099,4532,12,2021,"Stocker, Luke",TE,min,a,sfo,0.0,4100,0.00,0.440000,0.333333,0.307109,-1.090116,-1.328064,7.222000


#### Check non-matching columns

In [None]:
m = df.merge(salary, on='Name', how='outer', suffixes=['', '_'], indicator=True)

In [None]:
x = m[m._merge=='right_only']
x[~x['Points'].isna()]

### Top Projected per Pos

In [183]:
def avg_per_dollar(df, col):
    df['Avg Value'] = df[col] / (df['Salary'] / 1000)
    return df.sort_values('Avg Value', ascending=False)

In [184]:
# pdf.sort_values('Predictions', ascending=False, inplace=True)
pdf = avg_per_dollar(zdf, 'Pts Adjusted')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Avg Value'] = df[col] / (df['Salary'] / 1000)


In [122]:
top_QBs = pdf[pdf.Pos=='QB']
top_QBs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
3450,1552,13,2021,"Herbert, Justin",QB,lac,a,cin,0.0,8200,...,23.227273,10.344418,24.277372,3.0,7.0,-0.484674,11.0,255.5,23.550362,2.871995
3325,1441,13,2021,"Carr, Derek",QB,lvr,h,was,0.0,7500,...,18.078182,5.803271,16.853468,11.0,10.0,2.571777,11.0,198.86,20.711133,2.761484
3286,1501,13,2021,"Prescott, Dak",QB,dal,a,nor,0.0,8200,...,20.968,8.242325,19.998999,8.0,2.0,0.971128,10.0,209.68,21.45569,2.616548
3475,1415,13,2021,"Cousins, Kirk",QB,min,a,det,0.0,7700,...,20.038182,5.797706,19.488781,9.0,16.0,-0.086749,11.0,220.42,19.358657,2.514111
3635,1400,13,2021,"Taylor, Tyrod",QB,hou,h,ind,0.0,7000,...,16.888,5.354337,16.335924,16.0,17.0,0.693435,5.0,84.44,17.376077,2.482297
3364,1426,13,2021,"Smith, Geno",QB,sea,h,sfo,0.0,6100,...,11.056,8.016825,14.665592,28.0,5.0,0.24933,4.0,55.28,15.039588,2.465506
3510,1340,13,2021,"Stafford, Matthew",QB,lar,h,jac,0.0,7800,...,21.576364,5.474302,19.666174,7.0,3.0,-0.317855,11.0,237.34,19.189392,2.460178
3601,1131,13,2021,"Brady, Tom",QB,tam,a,atl,0.0,8100,...,23.129091,8.348626,18.267627,4.0,24.0,1.011002,11.0,254.42,19.784129,2.442485
3616,1488,13,2021,"Siemian, Trevor",QB,nor,h,dal,0.0,7000,...,16.484,6.804563,16.479526,17.0,35.0,0.219221,5.0,82.42,16.808358,2.401194
3595,1439,13,2021,"Bridgewater, Teddy",QB,den,a,kan,0.0,7100,...,15.965455,5.346947,15.124293,19.0,19.0,1.118417,11.0,175.62,16.801918,2.366467


In [123]:
top_RBs = pdf[pdf.Pos=='RB']
top_RBs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
3488,5534,13,2021,"Fournette, Leonard",RB,tam,a,atl,0.0,7700,...,15.536364,10.556256,21.945997,7.0,1.0,0.591025,11.0,170.9,22.832535,2.965264
3427,5535,13,2021,"Mixon, Joe",RB,cin,h,lac,0.0,9400,...,18.418182,7.924496,24.5491,3.0,2.0,0.955815,11.0,202.6,25.982823,2.76413
3433,5820,13,2021,"Taylor, Jonathan",RB,ind,a,hou,0.0,10500,...,22.175,12.469536,27.953054,1.0,10.0,0.400579,12.0,266.1,28.553922,2.719421
3346,5947,13,2021,"Mitchell, Elijah",RB,sfo,a,sea,0.0,7600,...,13.8875,6.714895,16.505964,12.0,4.0,1.949689,8.0,111.1,19.430497,2.556644
3400,5537,13,2021,"Conner, James",RB,ari,a,chi,0.0,7300,...,14.527273,9.445644,18.554835,11.0,8.0,-0.507234,11.0,159.8,17.793984,2.437532
3221,5575,13,2021,"Ekeler, Austin",RB,lac,a,cin,0.0,9000,...,19.745455,9.464393,21.70376,2.0,6.0,0.100751,11.0,217.2,21.854886,2.428321
3530,5789,13,2021,"Gaskin, Myles",RB,mia,h,nyg,0.0,6400,...,10.908333,7.578493,13.871889,20.0,9.0,0.866977,12.0,130.9,15.172355,2.37068
3279,5208,13,2021,"Patterson, Cordarrelle",RB,atl,h,tam,0.0,7400,...,16.56,8.278781,17.177456,5.0,3.0,-0.865361,10.0,165.6,15.879414,2.145867
3411,5905,13,2021,"Williams, Javonte",RB,den,a,kan,0.0,5700,...,9.6,4.142704,11.851924,25.0,7.0,-0.448934,11.0,105.6,11.178523,1.961144
3334,2930,13,2021,"Freeman, Devonta",RB,bal,a,pit,0.0,6100,...,7.79,5.808509,10.149355,35.0,28.0,0.100751,10.0,77.9,10.300481,1.688603


In [124]:
top_WRs = pdf[pdf.Pos=='WR']
top_WRs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
3413,5926,13,2021,"Waddle, Jaylen",WR,mia,h,nyg,0.0,6900,...,11.891667,6.965037,16.174466,20.0,1.0,-0.107022,12.0,142.7,16.013932,2.32086
3374,5761,13,2021,"Renfrow, Hunter",WR,lvr,h,was,0.0,6400,...,11.136364,4.246945,12.434619,23.0,8.0,1.161286,11.0,122.5,14.176548,2.215086
3302,5857,13,2021,"Mooney, Darnell",WR,chi,h,ari,0.0,6700,...,10.827273,6.380767,14.991797,26.0,15.0,-0.283654,11.0,119.1,14.566315,2.174077
3452,5835,13,2021,"Jefferson, Justin",WR,min,a,det,0.0,8000,...,15.690909,7.766005,17.520757,5.0,26.0,-0.240635,11.0,172.6,17.159805,2.144976
3277,5552,13,2021,"Kupp, Cooper",WR,lar,h,jac,0.0,9000,...,21.018182,8.111698,18.196918,1.0,14.0,0.303432,11.0,231.2,18.652065,2.072452
3201,5333,13,2021,"Thielen, Adam",WR,min,a,det,0.0,7600,...,14.572727,7.213334,16.095134,7.0,2.0,-0.240635,11.0,160.3,15.734182,2.070287
3337,5754,13,2021,"Johnson, Diontae",WR,pit,h,bal,0.0,7200,...,14.05,3.50563,14.190414,11.0,17.0,0.083274,10.0,140.5,14.315326,1.98824
3633,5485,13,2021,"Hill, Tyreek",WR,kan,h,den,0.0,8500,...,17.327273,11.795261,16.489071,3.0,12.0,-0.033131,11.0,190.6,16.439376,1.934044
3637,5844,13,2021,"Jefferson, Van",WR,lar,h,jac,0.0,5800,...,8.954545,5.861632,10.659039,37.0,10.0,0.303432,11.0,98.5,11.114186,1.916239
3347,5927,13,2021,"Moore, Elijah",WR,nyj,h,phi,0.0,6100,...,9.91,8.781983,13.735747,34.0,39.0,-1.566218,9.0,99.1,11.38642,1.866626


In [125]:
top_TEs = pdf[pdf.Pos=='TE']
top_TEs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
3640,4569,13,2021,"Ertz, Zach",TE,ari,a,chi,0.0,5500,...,8.745455,6.944979,12.969694,9.0,1.0,-0.978455,11.0,96.2,11.502011,2.091275
3607,4582,13,2021,"Kelce, Travis",TE,kan,h,den,0.0,7300,...,13.636364,6.019514,13.932032,1.0,4.0,-1.297353,11.0,150.0,11.986002,1.641918
3499,4721,13,2021,"Andrews, Mark",TE,bal,a,pit,0.0,7300,...,12.736364,8.736049,13.03415,2.0,6.0,-0.714876,11.0,140.1,11.961836,1.638608
3312,4750,13,2021,"Knox, Dawson",TE,buf,h,nwe,0.0,5900,...,11.222222,5.984099,11.929618,4.0,3.0,-1.539781,9.0,101.0,9.619946,1.630499
3567,4494,13,2021,"Gronkowski, Rob",TE,tam,a,atl,0.0,7000,...,12.716667,8.752466,11.541053,3.0,5.0,-0.311373,5.0,76.3,11.073993,1.581999
3365,4703,13,2021,"Kittle, George",TE,sfo,a,sea,0.0,6300,...,9.6125,5.336515,8.443172,6.0,41.0,0.313407,8.0,76.9,8.913282,1.414807
3245,4656,13,2021,"Uzomah, C.J.",TE,cin,h,lac,0.0,5200,...,7.1,8.360861,4.919997,17.0,38.0,1.406771,10.0,78.1,7.030154,1.351953
3288,4742,13,2021,"Schultz, Dalton",TE,dal,a,nor,0.0,6200,...,9.381818,6.335427,9.232804,7.0,8.0,-0.711622,11.0,103.2,8.16537,1.316995
3407,4437,13,2021,"Cook, Jared",TE,lac,a,cin,0.0,5100,...,6.690909,4.287529,6.524239,18.0,16.0,-0.011999,11.0,73.6,6.50624,1.275733
3366,4705,13,2021,"Everett, Gerald",TE,sea,h,sfo,0.0,4900,...,6.1,4.04753,7.855146,23.0,9.0,-1.199732,9.0,54.9,6.055548,1.235826


In [126]:
# players[players.Pos == 'Def'].head(3)
top_Defs = pdf[pdf.Pos=='Def']
top_Defs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
3662,7016,13,2021,Miami,Def,mia,h,nyg,0.0,4000,...,7.5,7.868233,13.649916,9.0,1.0,0.274114,10.0,90.0,14.061086,3.515272
3658,7015,13,2021,Kansas City,Def,kan,h,den,0.0,3300,...,5.727273,4.713617,9.536074,16.0,4.0,0.416982,9.0,63.0,10.161546,3.079256
3672,7029,13,2021,Tampa Bay,Def,tam,a,atl,0.0,4300,...,8.454545,7.104416,9.813306,5.0,7.0,2.179019,9.0,93.0,13.081834,3.042287
3664,7018,13,2021,New England,Def,nwe,a,buf,0.0,5000,...,11.25,8.136618,16.369166,1.0,5.0,-1.106943,12.0,135.0,14.708752,2.94175
3666,7020,13,2021,New York G,Def,nyg,a,mia,0.0,3800,...,6.454545,5.627853,9.071263,13.0,6.0,0.670969,11.0,71.0,10.077716,2.65203
3653,7010,13,2021,Denver,Def,den,a,kan,0.0,4100,...,8.272727,6.512924,9.989863,6.0,3.0,0.131246,11.0,91.0,10.186731,2.484569
3651,7007,13,2021,Cincinnati,Def,cin,h,lac,0.0,3900,...,6.818182,5.564498,10.143787,12.0,2.0,-0.344981,10.0,75.0,9.626316,2.468286
3668,7023,13,2021,Philadelphia,Def,phi,a,nyj,0.0,4800,...,7.0,6.980492,8.187098,11.0,12.0,1.512302,10.0,84.0,10.455551,2.17824
3655,7032,13,2021,Houston,Def,hou,h,ind,0.0,3300,...,5.545455,5.888355,8.332258,18.0,11.0,-1.118848,9.0,61.0,6.653985,2.016359
3656,7013,13,2021,Indianapolis,Def,ind,a,hou,0.0,4500,...,8.0,4.242641,7.130895,7.0,15.0,1.226566,12.0,96.0,8.970745,1.993499


# Team

In [42]:
from pulp import *

class PulpSelection():
    def __init__(self, df, pts_col="Pts Adjusted", sal_col="Salary", name_col="Name", salary_cap=60000):
        self.df = df
        self.vars = self.populate_vars(pts_col, sal_col, name_col)
        self.model = self.optimize(salary_cap)
        self.players = self.player_names()
        self.selection = self.selection_df()
        
    def populate_vars(self, pts_col, sal_col, name_col):
        df = self.df
        salaries = {}
        points = {}
        for pos in df.Pos.unique():
            available_pos = df[df.Pos == pos]
            salary = list(available_pos[[name_col,sal_col]].set_index(name_col).to_dict().values())[0]
            point = list(available_pos[[name_col,pts_col]].set_index(name_col).to_dict().values())[0]
            salaries[pos] = salary
            points[pos] = point
            
        self.salaries = salaries
        self.points = points
        return {k: LpVariable.dict(k, v, cat="Binary") for k, v in points.items()}

    def player_names(self):
        players = {}
        for d in self.vars.values():
            for k,v in d.items():
                players[v] = k
        return players
            
    pos_num_available = {
        "QB": 1,
        "RB": 2,
        "WR": 3,
        "TE": 1,
        "Def": 1
    }

    def optimize(self, salary_cap):
        prob = LpProblem("FFModel", LpMaximize)
        rewards = []
        costs = []

        for pos, players in self.vars.items():
            costs += lpSum([self.salaries[pos][i] * self.vars[pos][i] for i in players])
            rewards += lpSum([self.points[pos][i] * self.vars[pos][i] for i in players])
            if pos in ['RB','WR','TE']:
                prob += lpSum([self.vars[pos][i] for i in players]) <= self.pos_num_available[pos]+1
                prob += lpSum([self.vars[pos][i] for i in players]) >= self.pos_num_available[pos]
            else:
                prob += lpSum([self.vars[pos][i] for i in players]) == self.pos_num_available[pos]
        prob += lpSum(prob.variables()) <= 9    # flex -> max of 9 total players

        prob += lpSum(rewards)
        prob += lpSum(costs) <= salary_cap
        print(prob.solve())
        return prob
    
    def selection_df(self):
        selections = [self.players[p] for p in self.model.variables() if p.varValue > 0]
        team = self.df[self.df.Name.isin(selections)]
        return team

In [44]:
pts_col = 'Pts Adjusted'
team = PulpSelection(zdf, pts_col=pts_col).selection
print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team['Salary'])))
team

1

Total points: 156.5019790722527     Total salary: 59600



Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Predictions
3601,5562,14,2021,"Kamara, Alvin",RB,nor,a,nyj,0.0,9000,...,17.75,7.664575,19.266646,3.0,9.0,3.281758,8.0,142.0,24.189282,18.610539
3652,5831,14,2021,"Lamb, CeeDee",WR,dal,a,was,0.0,7800,...,13.972727,9.491269,13.34443,11.0,14.0,0.966293,11.0,153.7,14.793869,13.132159
3744,5754,14,2021,"Johnson, Diontae",WR,pit,a,min,0.0,7500,...,15.181818,5.015141,18.341602,7.0,2.0,1.89737,11.0,167.0,21.187657,12.677518
3774,4703,14,2021,"Kittle, George",TE,sfo,a,cin,0.0,7100,...,12.444444,9.85382,17.566089,3.0,1.0,-0.022663,9.0,112.0,17.532094,8.932914
3852,1552,14,2021,"Herbert, Justin",QB,lac,h,nyg,0.0,8400,...,23.481667,9.902306,24.9501,3.0,3.0,-0.073922,12.0,281.78,24.839218,19.221315
3854,5835,14,2021,"Jefferson, Justin",WR,min,h,pit,0.0,8500,...,16.891667,8.49293,21.746407,3.0,1.0,0.368075,12.0,202.7,22.298519,15.206543
3891,5534,14,2021,"Fournette, Leonard",RB,tam,h,buf,0.0,7600,...,15.8,10.106344,20.855594,7.0,8.0,-0.537737,12.0,189.6,20.048988,17.751004
4061,7015,14,2021,Kansas City,Def,kan,h,lvr,0.0,3700,...,6.666667,5.548683,12.043374,11.0,2.0,-0.287348,10.0,80.0,11.612352,7.813064


In [36]:
pts_col = 'Predictions'
team = PulpSelection(zdf, pts_col=pts_col).selection
print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team['Salary'])))
team

1

Total points: 128.62797146158192     Total salary: 59700



Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Predictions
3683,5208,14,2021,"Patterson, Cordarrelle",RB,atl,a,car,0.0,7800,...,16.063636,8.024621,15.127943,5.0,20.0,-1.185848,11.0,176.7,13.349172,17.750691
3724,5206,14,2021,"Hopkins, DeAndre",WR,ari,h,lar,0.0,7300,...,13.144444,6.23741,11.678963,16.0,28.0,-0.015995,9.0,118.3,11.654971,12.882616
3840,1529,14,2021,"Allen, Josh",QB,buf,a,tam,0.0,8800,...,23.403333,8.796048,19.68741,4.0,27.0,0.119781,12.0,280.84,19.867081,24.57041
3854,5835,14,2021,"Jefferson, Justin",WR,min,h,pit,0.0,8500,...,16.891667,8.49293,21.746407,3.0,1.0,0.368075,12.0,202.7,22.298519,15.206543
3891,5534,14,2021,"Fournette, Leonard",RB,tam,h,buf,0.0,7600,...,15.8,10.106344,20.855594,7.0,8.0,-0.537737,12.0,189.6,20.048988,17.751004
3964,4494,14,2021,"Gronkowski, Rob",TE,tam,h,buf,0.0,7000,...,13.728571,8.426489,14.465177,1.0,2.0,-1.41734,6.0,96.1,12.339167,13.21588
3988,5424,14,2021,"Diggs, Stefon",WR,buf,a,tam,0.0,8200,...,13.941667,5.356298,13.656105,12.0,38.0,-0.507138,12.0,167.3,12.895398,17.66156
4071,7028,14,2021,Seattle,Def,sea,a,hou,0.0,4500,...,5.5,3.580249,7.000827,17.0,6.0,1.467307,11.0,66.0,9.201788,9.589268


In [46]:
team[['Pos','Name','Salary']].to_csv(f'weekly_picks/Adam_week{wk}_picks.csv', index=False)