In [None]:
# model -> project points per player
#-    Stats
#     Usage
#     Percentage of team scoring
#-    Injuries
#-    Bye week
#     Injuries to Key Teammates / Individual Matchups
#-    Opposing Team (record, defense, injuries, etc.)
#-    Home/Away
#     Weather
#     Expert Picks
#     Other Fantasy Projections

# maximize value (pts/salary)

In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [3]:
def format_stats(df):
    df = df.dropna()
    df = df[df['Week'] != 'Week']  # remove 'filler' rows
    # update historical teams
    m = {'oak':'lvr', 'sdg':'lac', 'stl':'lar'}
    df.replace({'Oppt': m, 'Team': m}, inplace=True)
    df = df[(df.Oppt != '-') & (df.Pos != 'PK')]
    df = df.astype({'Week':int, 'Year':int, 'GID':int, 'Name':'string', 'Pos':'string', 'Team':'string',
                    'h/a':'string', 'Oppt':'string', 'FD points':float, 'FD salary':int})
    return df.rename(columns={'FD points': 'Points', 'FD salary': 'Salary'})

In [4]:
def add_averages(df):
    df.sort_values(['Year','Week'], inplace=True)
    df['prev_pts'] = df.groupby(['Year','GID'])['Points'].shift() #.fillna(0.0)
    df['avg_pts'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.expanding(3).mean())
    df['pts_std'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.expanding(3).std())
#     df['last3_avg'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.rolling(3,3).mean())
    df['ewm5'] = df.groupby(['Year','GID'])['prev_pts'].transform(lambda x: x.ewm(span=5, min_periods=3).mean())
    df['avg_rank'] = df.groupby(['Year','Week','Pos'])['avg_pts'].rank("dense", ascending=False, na_option='bottom')
    df['prev_rank'] = df.groupby(['Year','Week','Pos'])['prev_pts'].rank("dense", ascending=False, na_option='bottom')
    return df

In [5]:
# fix name differences over time
def use_most_recent_name(df):
    newest_names = df.groupby('GID').agg({'Name':'last'}).astype('string')
    df = df.set_index('GID')
    df.update(newest_names)
    return df.astype({'Name':'string'}).reset_index()

In [6]:
def opp_pos_norm(df):
    o = df.groupby(['Year','Week','Oppt','Pos']).agg({'Points': 'sum'})
    o['prev_pts'] = o.groupby(['Year','Oppt','Pos'])['Points'].shift() #.fillna(0.0)
    o['running_avg'] = o.groupby(['Year','Oppt','Pos'])['prev_pts'].transform(lambda x: x.expanding().mean())
    o['running_league_avg'] = o.groupby(['Year','Week','Pos'])['running_avg'].transform('mean')
    o['running_league_std'] = o.groupby(['Year','Week','Pos'])['running_avg'].transform('std')
    o['norm'] = (o['running_avg']-o['running_league_avg'])/o['running_league_std']
    ndf = df.join(o[['norm']], on=['Year','Week','Oppt','Pos'], how='left')
    return ndf.astype({'Pos':'string', 'Oppt':'string'})

In [7]:
def points_adjusted(df, col, mult_factor=2):
    df['Pts Adjusted'] = df[col] + (mult_factor*df['norm'])
    return df

In [75]:
# def remove_potential_injuries(df):
#     df['inj'] = df.groupby(['Year','GID'])['Points'].transform(lambda x: x.shift(-1)==0.0)
#     return df[~df.inj].drop('inj', axis=1)

In [8]:
def add_gp_and_season_totals(df):
    df['gp'] = df.groupby(['Year','GID'])['Points'].transform(lambda x: x.shift()>0.0)
    df['gp'] = df.groupby(['Year','GID'])['gp'].transform(lambda x: x.expanding().sum())
    df['season_points'] = df.groupby(['Year','GID'])['Points'].transform(lambda x: x.shift().expanding().sum())
    return df

In [9]:
def enrich_df(res):
    res = res.copy()
    res = add_averages(res)
    res = opp_pos_norm(res)
#     res = remove_potential_injuries(res)
    res = add_gp_and_season_totals(res)
    res = res[res.Week > 4]
    res = use_most_recent_name(res)
    res = res.fillna(0.0)
    res = points_adjusted(res, 'ewm5', mult_factor=1.5)
    return res

## Historical DF

In [None]:
# # last 10 years
# for year in range(2010,2021):
#     f = open(f"historical_fantasy_data/{year}_fantasy_points.txt", "w", newline='')
#     for week in range(1,18):
#         url = "http://rotoguru1.com/cgi-bin/fyday.pl?week="+str(week)+"&game=fd&year="+str(year)+"&scsv=1"
#         r = requests.get(url)
#         soup = BeautifulSoup(r.content, 'html.parser')
#         data = soup.find_all('pre')[-1]
#         f.write(data.text)
#     f.close()

In [9]:
# enrich and combine into one dataframe
def historical_df():
    res = pd.DataFrame()
    for year in range(2010,2021):
        df = format_stats(pd.read_csv(f"historical_fantasy_data/{year}_fantasy_points.txt", delimiter=';'))
        res = res.append(df)
    return res

In [92]:
res = historical_df()
rdf = enrich_df(res)
rdf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,Pts Adjusted
0,1232,5,2011,"Roethlisberger, Ben",QB,pit,h,ten,28.12,7800,8.34,12.305000,4.206166,11.841538,22.0,23.0,-1.216577,10.016673
1,1241,5,2011,"Cassel, Matt",QB,kan,a,ind,27.48,7500,16.40,9.680000,7.210455,11.754462,24.0,18.0,-0.265659,11.355973
2,1155,5,2011,"Vick, Michael",QB,phi,a,buf,25.60,9600,31.14,19.660000,9.572906,21.121538,6.0,3.0,1.467186,23.322317
3,1378,5,2011,"Newton, Cam",QB,car,h,nor,24.66,8500,33.46,27.435000,8.427216,27.418154,3.0,2.0,0.327539,27.909462
4,1225,5,2011,"Manning, Eli",QB,nyg,h,sea,24.30,8300,18.54,18.760000,4.994851,19.539385,8.0,10.0,-0.394480,18.947665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51102,7011,17,2020,Detroit,Def,det,h,min,-1.00,3400,4.00,2.400000,3.112188,2.003185,28.0,9.0,0.664761,3.000326
51103,7015,17,2020,Kansas City,Def,kan,h,lac,-1.00,4200,7.00,7.333333,7.016986,6.604938,7.0,6.0,0.294807,7.047149
51104,7002,17,2020,Atlanta,Def,atl,a,tam,-1.00,3300,5.00,4.866667,6.801961,5.482783,19.0,8.0,-0.753396,4.352689
51105,7006,17,2020,Chicago,Def,chi,h,gnb,-1.00,4100,6.00,6.200000,5.414267,6.656157,14.0,7.0,-1.709110,4.092492


In [76]:
# check for null values
rdf[rdf.isna().any(axis=1)]

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,Pts Adjusted


## Encode Categorical Variables

In [26]:
# numerical encoding for categorical variables
def encode_cats(res):
    res = res.copy()
    team_idx = {p:i for i,p in enumerate(sorted(res.Team.unique()))}
    pos_idx = {p:i for i,p in enumerate(sorted(res.Pos.unique()))}
    res.replace({'h/a': {'h': 1, 'a': 0}, 'Pos': pos_idx, 'Team': team_idx, 'Oppt': team_idx}, inplace=True)
    return res

In [84]:
tdf = encode_cats(rdf)

In [44]:
# null value check per column
tdf.isna().any()

GID             False
Week            False
Year            False
Name            False
Pos             False
Team            False
h/a             False
Oppt            False
Points          False
Salary          False
prev_pts        False
avg_pts         False
pts_std         False
ewm5            False
avg_rank        False
prev_rank       False
norm            False
Pts Adjusted    False
dtype: bool

In [None]:
# res['Year'] = pd.Categorical(res['Year'])
# res['Week'] = pd.Categorical(res['Week'])
# res['GID'] = pd.Categorical(res['GID'])
# res['Team'] = pd.Categorical(res['Team'])
# res['h/a'] = pd.Categorical(res['h/a'])
# res['Oppt'] = pd.Categorical(res['Oppt'])
# res['Pos'] = pd.Categorical(res['Pos'])

### Plot Skewness

In [None]:
import seaborn as sns
from scipy import stats

In [None]:
# Figure
plt.figure(figsize=(12, 4))
plt.suptitle("Visualising the skewness of the Points target variable")

# Distribution Plot
plt.subplot(1, 2, 1)
sns.histplot(res["Points"], stat = "density", kde = True)
plt.title('Distribution Plot')

# Probability Plot
plt.subplot(1, 2, 2)
stats.probplot(res['Points'], plot=plt)

plt.tight_layout()
plt.show()
plt.clf()

## Regression Model Comparison

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error#, mean_squared_log_error as MSLE

from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LassoCV, LinearRegression, RidgeCV, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, BaggingRegressor, \
                             ExtraTreesRegressor, GradientBoostingRegressor

from xgboost import XGBRegressor

SEED = 42

In [85]:
tdf = tdf.set_index('Name')

y = tdf.Points
X = tdf.drop(['Points'], axis=1)

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.20)

In [47]:
# current heuristic
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, test_X['Pts Adjusted']))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, test_X['Pts Adjusted']))

Mean Absolute Error using Decision Tress : 4.181625237478876
Mean Squared Error using Decision Tress : 34.95471941283859


In [None]:
model = DecisionTreeRegressor(criterion='mse')  
model.fit(train_X, train_y) 
preds = model.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
feat_importance = {col: imp for col,imp in zip(test_X.columns, model.feature_importances_)}
feat_importance

In [48]:
model = RandomForestRegressor(criterion='mse')  
model.fit(train_X, train_y) 
preds = model.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.977296380356094
Mean Squared Error using Decision Tress : 30.96290914318137


In [66]:
# Only Imp Feats
model = RandomForestRegressor(criterion='mse')  
model.fit(train_X, train_y) 
preds = model.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.9499088045392288
Mean Squared Error using Decision Tress : 30.320428562758753


In [67]:
feat_importance = {col: imp for col,imp in zip(test_X.columns, model.feature_importances_)}
feat_importance

{'GID': 0.08669241900429432,
 'Salary': 0.27739472636389734,
 'prev_pts': 0.07393348051399191,
 'avg_pts': 0.21744934257092455,
 'pts_std': 0.07873289599756898,
 'ewm5': 0.10022417996242758,
 'norm': 0.07760390047039901,
 'Pts Adjusted': 0.08796905511649634}

In [None]:
# Ridge Regression (L2 Regularization)
ridge = RidgeCV(normalize=False)
ridge.fit(train_X, train_y)
best_alpha = ridge.alpha_
print(best_alpha)
preds = ridge.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
# Lasso Regression (L1 Regularization)
lasso = LassoCV(alphas=None, max_iter=100000, normalize=True)
lasso.fit(train_X, train_y)
best_alpha = lasso.alpha_
print(best_alpha)
preds = lasso.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
forest = RandomForestRegressor(random_state = SEED)
forest.fit(train_X, train_y)
preds = forest.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
ada = AdaBoostRegressor(random_state = SEED)
ada.fit(train_X, train_y)
preds = ada.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
bagging = BaggingRegressor(random_state = SEED)
bagging.fit(train_X, train_y)
preds = bagging.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [None]:
ETR = ExtraTreesRegressor(random_state = SEED)
ETR.fit(train_X, train_y)
preds = ETR.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

In [86]:
GBR = GradientBoostingRegressor(random_state = SEED)
GBR.fit(train_X, train_y)
preds = GBR.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.8428768582090345
Mean Squared Error using Decision Tress : 29.371090708613796


In [80]:
XGB = XGBRegressor(random_state = SEED)
XGB.fit(train_X, train_y)
preds = XGB.predict(test_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(test_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(test_y, preds))

Mean Absolute Error using Decision Tress : 3.864580542043452
Mean Squared Error using Decision Tress : 30.434276268944544


## This Year

In [10]:
from bs4 import BeautifulSoup
import requests

In [11]:
year = 2021

f = open(f"2021_fantasy_data/{year}_fantasy_points.txt", "w", newline='')
for week in range(1,19):
    url = "http://rotoguru1.com/cgi-bin/fyday.pl?week="+str(week)+"&game=fd&year="+str(year)+"&scsv=1"
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    data = soup.find_all('pre')[-1]
    f.write(data.text)
f.close()

In [12]:
year_df = format_stats(pd.read_csv(f"2021_fantasy_data/{year}_fantasy_points.txt", delimiter=';'))
year_df

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,Points,Salary
0,1,2021,1537,"Murray, Kyler",QB,ari,a,ten,34.56,8400
1,1,2021,1523,"Mahomes II, Patrick",QB,kan,h,cle,33.28,8800
2,1,2021,1490,"Goff, Jared",QB,det,h,sfo,29.92,6500
3,1,2021,1465,"Winston, Jameis",QB,nor,h,gnb,29.62,6700
4,1,2021,1131,"Brady, Tom",QB,tam,h,dal,29.16,7800
...,...,...,...,...,...,...,...,...,...,...
6148,14,2021,7024,Pittsburgh,Def,pit,a,min,0.00,3800
6149,14,2021,7022,Las Vegas,Def,lvr,a,kan,-1.00,3100
6150,14,2021,7032,Houston,Def,hou,h,sea,-1.00,3400
6151,14,2021,7020,New York G,Def,nyg,a,lac,-2.00,3800


### Test historical regression predictions on this year's data

In [354]:
ydf = encode_cats(enrich_df(year_df))

tst_y = ydf.Points
tst_X = ydf.drop(['Points'], axis=1).set_index('Name')

In [355]:
# current heuristic
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(tst_y, tst_X['Pts Adjusted']))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(tst_y, tst_X['Pts Adjusted']))

Mean Absolute Error using Decision Tress : 4.194664500279587
Mean Squared Error using Decision Tress : 34.22182014092551


In [356]:
# Gradient Boosting Regression
preds = GBR.predict(tst_X)
print ("Mean Absolute Error using Decision Tress :", mean_absolute_error(tst_y, preds))
print ("Mean Squared Error using Decision Tress :", mean_squared_error(tst_y, preds))

Mean Absolute Error using Decision Tress : 3.8960175521083538
Mean Squared Error using Decision Tress : 29.02786616611834


## Re-train regression on all data

In [357]:
full = res.append(year_df)
full = full.sort_values(['Year','Week']).reset_index()
full

Unnamed: 0,index,Week,Year,GID,Name,Pos,Team,h/a,Oppt,Points,Salary
0,0,1,2011,1131,"Brady, Tom",QB,nwe,a,mia,35.98,9200
1,1,1,2011,1309,"Henne, Chad",QB,mia,h,nwe,35.54,6800
2,2,1,2011,1378,"Newton, Cam",QB,car,a,ari,31.68,6700
3,3,1,2011,1151,"Brees, Drew",QB,nor,a,gnb,29.06,8900
4,4,1,2011,1242,"Fitzpatrick, Ryan",QB,buf,a,kan,24.62,7900
...,...,...,...,...,...,...,...,...,...,...,...
72243,4874,11,2021,7031,Washington,Def,was,a,car,1.00,3500
72244,4875,11,2021,7017,Minnesota,Def,min,h,gnb,1.00,3400
72245,4876,11,2021,7030,Tennessee,Def,ten,h,hou,0.00,4600
72246,4877,11,2021,7026,LA Chargers,Def,lac,h,pit,-1.00,4200


In [354]:
fdf = encode_cats(enrich_df(year_df))

full_y = fdf.Points
full_X = fdf.drop(['Points'], axis=1).set_index('Name')

In [355]:
# train GBR on full training set
GBR = GradientBoostingRegressor(random_state = SEED)
GBR.fit(full_X, full_y)

GradientBoostingRegressor(random_state=42)

# Upcoming week

## Download and Format data

In [13]:
wk = max(year_df.Week)+1
wk

15

In [18]:
# !pip install selenium
# download chrome driver from: https://chromedriver.chromium.org/home
# unzip chromedriver.zip
# $ mv <Path>/chromedriver /usr/local/bin/.
# $ xattr -d com.apple.quarantine /usr/local/bin/chromedriver 

In [14]:
from selenium import webdriver

In [15]:
driver = webdriver.Chrome()
driver.get("https://www.dailyfantasycafe.com/tools/salary-exploitation/nfl")
elem = driver.find_element('id', "DataTables_Table_0")
week_df = pd.read_html(elem.get_attribute('outerHTML'))[0]
driver.close()

In [16]:
week_df

Unnamed: 0,Name,Position,Team,Opponent Opp,DraftKings,FanDuel,Yahoo
0,A.J. Brown,WR,TEN,@PIT,"$6,700","$6,500",-
1,A.J. Green,WR,ARI,@DET,"$4,900","$5,700",$14
2,Aaron Fuller,WR,SEA,@LAR,"$3,000","$4,500",-
3,Aaron Jones,RB,GB,@BAL,"$6,600","$7,500",$27
4,Aaron Rodgers,QB,GB,@BAL,"$7,500","$7,600",$32
...,...,...,...,...,...,...,...
703,Zach Gentry,TE,PIT,TEN,"$2,600","$4,300",$10
704,Zach Pascal,WR,IND,NE,"$3,400","$5,300",$10
705,Zach Wilson,QB,NYJ,@MIA,"$5,200","$6,500",$20
706,Zack Moss,RB,BUF,CAR,"$4,400","$5,200",$10


In [17]:
def format_salary_df(df):
    df = df[df.FanDuel!='-']
    df = df[df.Position.isin(['QB','WR','RB','TE','D'])]
    df = df.rename(columns={'Position':'Pos', 'Opponent  Opp':'Oppt'})
    df.loc[df.Pos=='D', 'Pos'] = 'Def'
    df['Salary'] = df['FanDuel'].str.replace('[\$,]', '', regex=True).astype(int)
    return df[['Name','Pos','Team','Oppt','Salary']]

def last_name_first(df):
    s = df[df['Pos']!='Def']['Name']
    s = s.str.split(' ', expand=True).fillna('')
    s['last_name'] = s.iloc[:, 1:].agg(' '.join, axis=1).str.strip()
    s['join_name'] = s[['last_name',0]].agg(', '.join, axis=1)
    df.loc[df.Pos!='Def', 'Name'] = s.join_name
    return df

def add_home_away_update_team_names(df):
    team_names = {'gb':'gnb','jax':'jac','kc':'kan','lv':'lvr','ne':'nwe','no':'nor','sf':'sfo','tb':'tam'}
    df['h/a'] = df['Oppt'].apply(lambda x: 'a' if '@' in x else 'h')
    df['Oppt'] = df.Oppt.str.replace('@','').str.lower().replace(team_names)
    df['Team'] = df.Team.str.lower().replace(team_names)
    return df

def add_GIDs(salary, df):    
    d = df[df.Pos=='Def'].groupby(['GID'], as_index=False).agg('last')
    s = salary[salary.Pos=='Def'].drop(columns=['Name']).copy()
    s = s.join(d[['GID','Team','Name']].set_index('Team'), on='Team', how='inner')

    d = df[df.Pos!='Def'].groupby(['GID'], as_index=False).agg('last')
    p = salary[salary.Pos!='Def'].copy()
    p = p.join(d[['GID','Name']].set_index('Name'), on='Name', how='inner')
    return pd.concat([p,s])

def reformat_next_week_df(week_df, year_df, week, year='2021'):
    df = week_df.copy()
    df = format_salary_df(df)
    df = last_name_first(df)
    df = add_home_away_update_team_names(df)
    df = add_GIDs(df, year_df)
    df['Week'] = week
    df['Year'] = year
    df['Points'] = 0.0
    df = df.astype({'Week':int, 'Year':int, 'GID':int, 'Name':'string', 'Pos':'string', 'Team':'string',
                    'h/a':'string', 'Oppt':'string', 'Points':float, 'Salary':int})
    return df[['Week','Year','GID','Name','Pos','Team','h/a','Oppt','Points','Salary']]

In [18]:
wdf = reformat_next_week_df(week_df, year_df, wk)
wdf

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,Points,Salary
0,15,2021,5728,"Brown, A.J.",WR,ten,a,pit,0.0,6500
1,15,2021,5091,"Green, A.J.",WR,ari,a,det,0.0,5700
3,15,2021,5543,"Jones, Aaron",RB,gnb,a,bal,0.0,7500
4,15,2021,1252,"Rodgers, Aaron",QB,gnb,a,bal,0.0,7600
5,15,2021,5412,"Humphries, Adam",WR,was,a,phi,0.0,5200
...,...,...,...,...,...,...,...,...,...,...
609,15,2021,7027,San Francisco,Def,sfo,h,atl,0.0,4300
616,15,2021,7028,Seattle,Def,sea,a,lar,0.0,3600
632,15,2021,7029,Tampa Bay,Def,tam,h,nor,0.0,4300
640,15,2021,7030,Tennessee,Def,ten,a,pit,0.0,4100


## Add upcoming week to year

In [19]:
ydf = pd.concat([year_df, wdf], ignore_index=True)
ydf = enrich_df(ydf)
ydf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted
0,1552,5,2021,"Herbert, Justin",QB,lac,h,cle,42.82,7700,21.28,20.805000,7.277699,22.236308,14.0,12.0,-0.445161,4.0,83.22,21.568566
1,1527,5,2021,"Jackson, Lamar",QB,bal,h,ind,41.88,8200,19.44,22.995000,7.568785,22.231385,9.0,16.0,0.286384,4.0,91.98,22.660961
2,1131,5,2021,"Brady, Tom",QB,tam,h,mia,37.74,8400,11.06,24.635000,9.058482,21.597231,4.0,27.0,-0.378998,4.0,98.54,21.028734
3,1529,5,2021,"Allen, Josh",QB,buf,a,kan,36.50,8800,21.02,23.275000,9.451312,24.415692,7.0,13.0,1.746978,4.0,93.10,27.036159
4,1465,5,2021,"Winston, Jameis",QB,nor,a,was,26.76,7000,13.04,16.630000,8.772389,14.715077,21.0,25.0,2.084038,4.0,66.52,17.841134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4542,7027,15,2021,San Francisco,Def,sfo,h,atl,0.00,4300,9.00,6.692308,4.460654,8.508936,14.0,8.0,1.729120,12.0,87.00,11.102616
4543,7028,15,2021,Seattle,Def,sea,a,lar,0.00,3600,6.00,5.538462,3.430631,6.665495,18.0,11.0,-0.874274,12.0,72.00,5.354083
4544,7029,15,2021,Tampa Bay,Def,tam,h,nor,0.00,4300,5.00,8.384615,6.602253,8.465656,5.0,12.0,-0.330281,11.0,109.00,7.970234
4545,7030,15,2021,Tennessee,Def,ten,a,pit,0.00,4100,21.00,7.307692,7.543039,9.573248,11.0,3.0,-0.174855,10.0,95.00,9.310965


In [21]:
this_week_df = ydf[ydf.Week==wk].copy()

### Regression

In [27]:
tmp = encode_cats(ydf)

trn = tmp[tmp.Week<wk]
tst = tmp[tmp.Week==wk]

In [28]:
trn_y = trn.Points
trn_x = trn.drop(['Points'], axis=1).set_index('Name')

tst_x = tst.drop(['Points'], axis=1).set_index('Name')

# train GBR on full training set
GBR = GradientBoostingRegressor(random_state = SEED)
GBR.fit(trn_x, trn_y)

GradientBoostingRegressor(random_state=42)

In [29]:
this_week_df['Predictions'] = GBR.predict(tst_x)
this_week_df

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Predictions
3587,5728,14,2021,"Brown, A.J.",WR,ten,h,jac,0.0,6500,...,10.350000,8.608297,9.261523,29.0,37.0,0.761455,10.0,103.50,10.403707,10.660973
3588,5091,14,2021,"Green, A.J.",WR,ari,h,lar,0.0,5600,...,8.245455,5.650197,5.654595,39.0,70.0,-0.015995,11.0,90.70,5.630603,5.823628
3589,5543,14,2021,"Jones, Aaron",RB,gnb,h,chi,0.0,7500,...,13.154545,10.427403,8.413905,17.0,50.0,-0.450640,11.0,144.70,7.737945,11.018229
3590,1252,14,2021,"Rodgers, Aaron",QB,gnb,h,chi,0.0,7900,...,20.801818,8.280060,23.963711,7.0,3.0,0.333010,11.0,228.82,24.463225,18.422904
3591,5412,14,2021,"Humphries, Adam",WR,was,h,dal,0.0,5000,...,3.766667,2.754610,3.978453,90.0,43.0,0.740506,12.0,45.20,5.089212,5.463619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4070,7027,14,2021,San Francisco,Def,sfo,a,cin,0.0,3900,...,6.500000,4.602371,8.261497,12.0,6.0,0.097820,11.0,78.00,8.408227,7.807352
4071,7028,14,2021,Seattle,Def,sea,a,hou,0.0,4500,...,5.500000,3.580249,7.000827,17.0,6.0,1.467307,11.0,66.00,9.201788,9.589268
4072,7029,14,2021,Tampa Bay,Def,tam,h,buf,0.0,3800,...,8.666667,6.813534,10.211943,4.0,4.0,-1.057684,10.0,104.00,8.625417,8.382038
4073,7030,14,2021,Tennessee,Def,ten,h,jac,0.0,4700,...,6.166667,6.603489,3.815495,13.0,15.0,1.595697,9.0,74.00,6.209040,7.925892


### Remove injured players

Download injury report here:

https://www.rotowire.com/football/injury-report.php

In [22]:
def load_injury_report():
    inj = pd.read_csv('2021_fantasy_data/nfl-injury-report.csv')
    inj = inj[inj['Pos'].isin(['QB','RB','WR','TE'])]
    inj = inj[inj['Team']!='FA']
    inj.rename(columns={"Player": "Name"}, inplace=True)
    return last_name_first(inj)

In [23]:
def remove_injuries(df, inj):
    df = df.join(inj[['Name','Status']].set_index('Name'), on='Name', how='left')
    df = df[df['Status'].isna()]
    return df.drop('Status', axis=1)

In [24]:
inj = load_injury_report()
this_week_df = remove_injuries(this_week_df, inj)
this_week_df

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted
3976,5091,15,2021,"Green, A.J.",WR,ari,a,det,0.0,5700,13.7,8.700000,5.612648,8.357227,38.0,24.0,-0.256484,12.0,104.4,7.972501
3977,5543,15,2021,"Jones, Aaron",RB,gnb,a,bal,0.0,7500,20.0,13.725000,10.136624,12.305934,15.0,10.0,-0.758740,12.0,164.7,11.167824
3979,5412,15,2021,"Humphries, Adam",WR,was,a,phi,0.0,5200,5.4,3.892308,2.675961,4.454749,97.0,59.0,-1.429476,13.0,50.6,2.310535
3983,2553,15,2021,"Peterson, Adrian",RB,sea,a,lar,0.0,5000,7.6,6.150000,2.868798,6.233846,53.0,36.0,-0.365775,4.0,24.6,5.685183
3984,5825,15,2021,"Dillon, AJ",RB,gnb,a,bal,0.0,7500,7.1,9.461538,7.222596,12.245351,33.0,38.0,-0.758740,12.0,123.0,11.107241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4542,7027,15,2021,San Francisco,Def,sfo,h,atl,0.0,4300,9.0,6.692308,4.460654,8.508936,14.0,8.0,1.729120,12.0,87.0,11.102616
4543,7028,15,2021,Seattle,Def,sea,a,lar,0.0,3600,6.0,5.538462,3.430631,6.665495,18.0,11.0,-0.874274,12.0,72.0,5.354083
4544,7029,15,2021,Tampa Bay,Def,tam,h,nor,0.0,4300,5.0,8.384615,6.602253,8.465656,5.0,12.0,-0.330281,11.0,109.0,7.970234
4545,7030,15,2021,Tennessee,Def,ten,a,pit,0.0,4100,21.0,7.307692,7.543039,9.573248,11.0,3.0,-0.174855,10.0,95.0,9.310965


### Filter by avg points scored or games played

In [25]:
def final_filter(df):
    df = df[df.season_points>df.season_points.mean()] # scored less than the average season points
#     df = df[df.gp>df.gp.mean()] # played less than average games
    return df

In [26]:
this_week_df = final_filter(this_week_df)
len(this_week_df)

166

### Regression Predict

In [392]:
preds = GBR.predict(encode_cats(zdf).drop(['Points'], axis=1).set_index('Name'))

pdf['Predictions'] = preds
pdf

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,prev_pts,total_avg,last3_avg,ewm5,norm,Pts Adjusted,Predictions
3035,5820,12,2021,"Taylor, Jonathan",RB,ind,h,tam,0.0,9800,51.90,22.581818,35.500000,33.139542,-0.753587,32.009162,4.589930
3054,1552,12,2021,"Herbert, Justin",QB,lac,a,den,0.0,7800,35.28,23.378000,26.640000,25.578624,-1.231152,23.731896,11.773828
3089,1527,12,2021,"Jackson, Lamar",QB,bal,h,cle,0.0,8500,16.42,24.420000,24.046667,23.217690,0.199085,23.516318,7.347566
2994,1554,12,2021,"Hurts, Jalen",QB,phi,a,nyg,0.0,8400,30.78,22.985455,22.293333,23.010275,0.235510,23.363540,12.017624
3043,1529,12,2021,"Allen, Josh",QB,buf,a,nor,0.0,8800,18.16,24.044000,17.220000,21.197866,0.526066,21.986966,6.320569
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2870,4792,12,2021,"Thompson, Colin",TE,car,a,mia,0.0,4000,0.00,0.000000,0.000000,0.000000,0.271924,0.407886,6.114157
3209,2820,12,2021,"Jones, Taiwan",RB,buf,a,nor,0.0,4500,0.00,0.000000,0.000000,0.000000,-1.372363,-2.058545,5.873935
2829,4754,12,2021,"Dillon, Brandon",TE,min,a,sfo,0.0,4000,0.00,0.000000,0.000000,0.000000,-1.090116,-1.635173,5.780014
3099,4532,12,2021,"Stocker, Luke",TE,min,a,sfo,0.0,4100,0.00,0.440000,0.333333,0.307109,-1.090116,-1.328064,7.222000


#### Check non-matching columns

In [None]:
m = df.merge(salary, on='Name', how='outer', suffixes=['', '_'], indicator=True)

In [None]:
x = m[m._merge=='right_only']
x[~x['Points'].isna()]

### Top Projected per Pos

In [27]:
def avg_per_dollar(df, col):
    df['Avg Value'] = df[col] / (df['Salary'] / 1000)
    return df.sort_values('Avg Value', ascending=False)

In [28]:
# pdf.sort_values('Predictions', ascending=False, inplace=True)
pdf = avg_per_dollar(this_week_df, 'Pts Adjusted')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Avg Value'] = df[col] / (df['Salary'] / 1000)


In [29]:
top_QBs = pdf[pdf.Pos=='QB']
top_QBs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
4013,1232,15,2021,"Roethlisberger, Ben",QB,pit,h,ten,0.0,6600,...,15.72,4.89416,19.615035,24.0,5.0,0.744787,12.0,188.64,20.732215,3.141245
4282,1552,15,2021,"Herbert, Justin",QB,lac,h,kan,0.0,8400,...,23.590769,9.488894,24.933314,3.0,6.0,0.688865,13.0,306.68,25.966611,3.091263
4466,1131,15,2021,"Brady, Tom",QB,tam,h,nor,0.0,8500,...,24.166154,8.031286,24.774602,2.0,2.0,0.468372,13.0,314.16,25.47716,2.997313
4248,1452,15,2021,"Garoppolo, Jimmy",QB,sfo,h,atl,0.0,6700,...,16.315,5.332829,17.46701,19.0,13.0,1.559651,12.0,195.78,19.806485,2.956192
4357,1340,15,2021,"Stafford, Matthew",QB,lar,h,sea,0.0,7700,...,21.909231,5.064067,21.912062,5.0,7.0,-0.264207,13.0,284.82,21.515752,2.794253
4312,1415,15,2021,"Cousins, Kirk",QB,min,a,chi,0.0,7300,...,19.743077,5.408431,18.492115,9.0,22.0,0.831865,13.0,256.66,19.739913,2.704098
4482,1488,15,2021,"Siemian, Trevor",QB,nor,a,tam,0.0,7000,...,16.484,6.804563,16.479526,18.0,37.0,0.880597,5.0,82.42,17.800422,2.542917
4320,1537,15,2021,"Murray, Kyler",QB,ari,a,det,0.0,8800,...,23.498,8.174342,21.647521,4.0,14.0,-0.100435,10.0,234.98,21.496869,2.442826
4459,1530,15,2021,"Hill, Taysom",QB,nor,a,tam,0.0,7400,...,9.755556,10.451793,16.742518,39.0,4.0,0.880597,9.0,87.8,18.063414,2.441002
4433,1412,15,2021,"Wilson, Russell",QB,sea,a,lar,0.0,7200,...,17.418,6.690633,17.503772,12.0,11.0,-0.322525,10.0,174.18,17.019984,2.363887


In [30]:
top_RBs = pdf[pdf.Pos=='RB']
top_RBs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
4235,5905,15,2021,"Williams, Javonte",RB,den,h,cin,0.0,7000,...,11.784615,6.651171,18.188972,20.0,8.0,-0.136931,13.0,153.2,17.983575,2.569082
4260,5820,15,2021,"Taylor, Jonathan",RB,ind,h,nwe,0.0,10200,...,22.338462,11.95321,26.72908,2.0,4.0,-0.435122,13.0,290.4,26.076398,2.55651
4093,5559,15,2021,"Cook, Dalvin",RB,min,a,chi,0.0,9000,...,16.77,8.277956,21.802635,6.0,1.0,-0.183162,10.0,167.7,21.527892,2.391988
4068,5818,15,2021,"Edwards-Helaire, Clyde",RB,kan,a,lac,0.0,6500,...,11.3125,6.313124,13.174259,25.0,11.0,1.139049,8.0,90.5,14.882832,2.289666
3991,5562,15,2021,"Kamara, Alvin",RB,nor,a,tam,0.0,9400,...,18.277778,7.342305,20.373215,4.0,6.0,-0.601554,9.0,164.5,19.470884,2.071371
4079,5208,15,2021,"Patterson, Cordarrelle",RB,atl,a,sfo,0.0,7000,...,15.8,7.705488,14.379527,10.0,19.0,0.038747,12.0,189.6,14.437648,2.062521
4118,5713,15,2021,"Montgomery, David",RB,chi,h,min,0.0,6500,...,11.71,7.609125,12.971835,22.0,22.0,0.23523,9.0,117.1,13.324679,2.049951
4108,5708,15,2021,"Williams, Darrel",RB,kan,a,lac,0.0,6500,...,10.069231,7.153482,10.974267,30.0,21.0,1.139049,13.0,130.9,12.682841,1.951206
4146,2930,15,2021,"Freeman, Devonta",RB,bal,h,gnb,0.0,5900,...,8.816667,6.052923,11.801334,35.0,26.0,-0.601554,12.0,105.8,10.899003,1.847289
4384,5622,15,2021,"Chubb, Nick",RB,cle,h,lvr,0.0,8000,...,15.13,8.33174,12.434919,12.0,32.0,1.451109,10.0,151.3,14.611583,1.826448


In [31]:
top_WRs = pdf[pdf.Pos=='WR']
top_WRs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
4150,5754,15,2021,"Johnson, Diontae",WR,pit,h,ten,0.0,7200,...,14.983333,4.830929,16.480053,7.0,27.0,1.860096,12.0,179.8,19.270198,2.676416
4284,5835,15,2021,"Jefferson, Justin",WR,min,a,chi,0.0,8600,...,16.9,8.131416,20.1561,4.0,11.0,0.849418,13.0,219.7,21.430227,2.491887
4115,5263,15,2021,"Adams, Davante",WR,gnb,a,bal,0.0,8500,...,17.283333,8.796366,20.789287,3.0,1.0,0.083294,12.0,207.4,20.914228,2.460497
4498,5386,15,2021,"Lockett, Tyler",WR,sea,a,lar,0.0,7300,...,12.715385,9.059327,16.517427,18.0,3.0,0.120086,13.0,165.3,16.697555,2.287336
4077,5552,15,2021,"Kupp, Cooper",WR,lar,h,sea,0.0,9000,...,21.453846,7.490952,21.45984,1.0,2.0,-0.814845,13.0,278.9,20.237572,2.248619
4127,5726,15,2021,"Samuel, Deebo",WR,sfo,h,atl,0.0,8200,...,18.108333,7.651317,16.794173,2.0,29.0,0.925165,12.0,217.3,18.18192,2.217307
4461,5839,15,2021,"Higgins, Tee",WR,cin,a,den,0.0,6800,...,12.154545,6.182291,15.396765,21.0,22.0,-0.405813,11.0,133.7,14.788045,2.174713
4194,5761,15,2021,"Renfrow, Hunter",WR,lvr,a,cle,0.0,7500,...,12.261538,4.99175,16.212576,20.0,5.0,-0.433948,13.0,159.4,15.561655,2.074887
4366,5253,15,2021,"Evans, Mike",WR,tam,h,nor,0.0,7400,...,14.384615,8.146251,14.164665,9.0,8.0,0.654641,13.0,187.0,15.146627,2.046841
4238,5926,15,2021,"Waddle, Jaylen",WR,mia,h,nyj,0.0,7100,...,12.015385,6.683418,15.278373,22.0,26.0,-0.57159,13.0,156.2,14.420988,2.031125


In [32]:
top_TEs = pdf[pdf.Pos=='TE']
top_TEs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
4184,4703,15,2021,"Kittle, George",TE,sfo,h,atl,0.0,7800,...,13.96,10.453622,20.969751,1.0,1.0,0.079091,10.0,139.6,21.088387,2.703639
4344,4721,15,2021,"Andrews, Mark",TE,bal,h,gnb,0.0,7400,...,13.084615,8.659566,15.02541,3.0,3.0,-0.338662,13.0,170.1,14.517417,1.961813
4091,4725,15,2021,"Goedert, Dallas",TE,phi,h,was,0.0,6000,...,8.841667,6.618632,11.893968,8.0,2.0,-0.250232,12.0,106.1,11.518619,1.91977
4122,4750,15,2021,"Knox, Dawson",TE,buf,h,car,0.0,6500,...,10.809091,6.170325,10.991203,5.0,4.0,-0.350859,11.0,118.9,10.464915,1.609987
4509,4569,15,2021,"Ertz, Zach",TE,ari,a,det,0.0,5300,...,8.030769,6.660629,8.307017,12.0,23.0,0.146175,13.0,104.4,8.52628,1.608732
4424,4494,15,2021,"Gronkowski, Rob",TE,tam,h,nor,0.0,7000,...,13.1,8.001428,12.465424,2.0,15.0,-0.884485,7.0,104.8,11.138697,1.591242
4494,4680,15,2021,"Higbee, Tyler",TE,lar,h,sea,0.0,5200,...,6.625,3.373998,5.957043,20.0,20.0,1.213427,12.0,79.5,7.777183,1.495612
4397,4802,15,2021,"Freiermuth, Pat",TE,pit,h,ten,0.0,5300,...,7.923077,4.982829,9.207077,13.0,12.0,-1.262597,13.0,103.0,7.313182,1.379846
4474,4582,15,2021,"Kelce, Travis",TE,kan,a,lac,0.0,7500,...,12.184615,6.538584,8.497423,4.0,31.0,1.15549,13.0,158.4,10.230659,1.364088
4185,4705,15,2021,"Everett, Gerald",TE,sea,a,lar,0.0,4900,...,5.763636,4.452701,6.45274,24.0,14.0,-0.030683,10.0,63.4,6.406715,1.307493


In [33]:
# players[players.Pos == 'Def'].head(3)
top_Defs = pdf[pdf.Pos=='Def']
top_Defs.head(10)

Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
4530,7015,15,2021,Kansas City,Def,kan,a,lac,0.0,3700,...,8.0,7.164728,16.0495,7.0,1.0,-0.679991,11.0,104.0,15.029514,4.062031
4523,7009,15,2021,Dallas,Def,dal,a,nyg,0.0,4600,...,10.692308,6.700555,15.009877,2.0,2.0,0.252568,13.0,139.0,15.38873,3.345376
4526,7012,15,2021,Green Bay,Def,gnb,a,bal,0.0,4500,...,7.846154,5.683986,10.87026,8.0,5.0,1.107414,12.0,102.0,12.531381,2.784751
4534,7016,15,2021,Miami,Def,mia,h,nyj,0.0,5000,...,7.615385,7.544738,12.091939,9.0,8.0,1.146271,11.0,99.0,13.811345,2.762269
4536,7018,15,2021,New England,Def,nwe,a,ind,0.0,4500,...,11.0,7.842194,13.565036,1.0,9.0,-1.223984,13.0,143.0,11.72906,2.606458
4542,7027,15,2021,San Francisco,Def,sfo,h,atl,0.0,4300,...,6.692308,4.460654,8.508936,14.0,8.0,1.72912,12.0,87.0,11.102616,2.582004
4528,7013,15,2021,Indianapolis,Def,ind,h,nwe,0.0,4100,...,8.769231,4.918568,10.772642,3.0,4.0,-0.641135,13.0,114.0,9.810941,2.392912
4521,7007,15,2021,Cincinnati,Def,cin,a,den,0.0,3400,...,6.615385,5.107812,7.494738,15.0,12.0,0.330281,12.0,86.0,7.990161,2.350047
4545,7030,15,2021,Tennessee,Def,ten,a,pit,0.0,4100,...,7.307692,7.543039,9.573248,11.0,3.0,-0.174855,10.0,95.0,9.310965,2.270967
4524,7010,15,2021,Denver,Def,den,h,cin,0.0,4000,...,8.076923,6.089082,8.655304,6.0,7.0,0.252568,13.0,105.0,9.034156,2.258539


# Team

In [39]:
from pulp import *

class PulpSelection():
    def __init__(self, df, pts_col="Pts Adjusted", sal_col="Salary", name_col="Name", salary_cap=60000):
        self.df = df
        self.vars = self.populate_vars(pts_col, sal_col, name_col)
        self.model = self.optimize(salary_cap)
        self.players = self.player_names()
        self.selection = self.selection_df()
        
    def populate_vars(self, pts_col, sal_col, name_col):
        df = self.df
        salaries = {}
        points = {}
        for pos in df.Pos.unique():
            available_pos = df[df.Pos == pos]
            salary = list(available_pos[[name_col,sal_col]].set_index(name_col).to_dict().values())[0]
            point = list(available_pos[[name_col,pts_col]].set_index(name_col).to_dict().values())[0]
            salaries[pos] = salary
            points[pos] = point
            
        self.salaries = salaries
        self.points = points
        return {k: LpVariable.dict(k, v, cat="Binary") for k, v in points.items()}

    def player_names(self):
        players = {}
        for d in self.vars.values():
            for k,v in d.items():
                players[v] = k
        return players
            
    pos_num_available = {
        "QB": 1,
        "RB": 2,
        "WR": 3,
        "TE": 1,
        "Def": 1
    }

    def optimize(self, salary_cap):
        prob = LpProblem("FFModel", LpMaximize)
        rewards = []
        costs = []

        for pos, players in self.vars.items():
            costs += lpSum([self.salaries[pos][i] * self.vars[pos][i] for i in players])
            rewards += lpSum([self.points[pos][i] * self.vars[pos][i] for i in players])
            if pos in ['RB','WR','TE']:
                prob += lpSum([self.vars[pos][i] for i in players]) <= self.pos_num_available[pos]+1
                prob += lpSum([self.vars[pos][i] for i in players]) >= self.pos_num_available[pos]
            else:
                prob += lpSum([self.vars[pos][i] for i in players]) == self.pos_num_available[pos]
        prob += lpSum(prob.variables()) <= 9    # flex -> max of 9 total players

        prob += lpSum(rewards)
        prob += lpSum(costs) <= salary_cap
        print(prob.solve())
        return prob
    
    def selection_df(self):
        selections = [self.players[p] for p in self.model.variables() if p.varValue > 0]
        team = self.df[self.df.Name.isin(selections)]
        return team

In [44]:
pts_col = 'Pts Adjusted'
team = PulpSelection(this_week_df, pts_col=pts_col).selection
print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team['Salary'])))
team

1

Total points: 162.52474233325853     Total salary: 59600



Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Avg Value
4013,1232,15,2021,"Roethlisberger, Ben",QB,pit,h,ten,0.0,6600,...,15.72,4.89416,19.615035,24.0,5.0,0.744787,12.0,188.64,20.732215,3.141245
4115,5263,15,2021,"Adams, Davante",WR,gnb,a,bal,0.0,8500,...,17.283333,8.796366,20.789287,3.0,1.0,0.083294,12.0,207.4,20.914228,2.460497
4150,5754,15,2021,"Johnson, Diontae",WR,pit,h,ten,0.0,7200,...,14.983333,4.830929,16.480053,7.0,27.0,1.860096,12.0,179.8,19.270198,2.676416
4184,4703,15,2021,"Kittle, George",TE,sfo,h,atl,0.0,7800,...,13.96,10.453622,20.969751,1.0,1.0,0.079091,10.0,139.6,21.088387,2.703639
4235,5905,15,2021,"Williams, Javonte",RB,den,h,cin,0.0,7000,...,11.784615,6.651171,18.188972,20.0,8.0,-0.136931,13.0,153.2,17.983575,2.569082
4260,5820,15,2021,"Taylor, Jonathan",RB,ind,h,nwe,0.0,10200,...,22.338462,11.95321,26.72908,2.0,4.0,-0.435122,13.0,290.4,26.076398,2.55651
4284,5835,15,2021,"Jefferson, Justin",WR,min,a,chi,0.0,8600,...,16.9,8.131416,20.1561,4.0,11.0,0.849418,13.0,219.7,21.430227,2.491887
4530,7015,15,2021,Kansas City,Def,kan,a,lac,0.0,3700,...,8.0,7.164728,16.0495,7.0,1.0,-0.679991,11.0,104.0,15.029514,4.062031


In [36]:
pts_col = 'Predictions'
team = PulpSelection(this_week_df, pts_col=pts_col).selection
print("\nTotal points: {}     Total salary: {}\n".format(sum(team[pts_col]), sum(team['Salary'])))
team

1

Total points: 128.62797146158192     Total salary: 59700



Unnamed: 0,GID,Week,Year,Name,Pos,Team,h/a,Oppt,Points,Salary,...,avg_pts,pts_std,ewm5,avg_rank,prev_rank,norm,gp,season_points,Pts Adjusted,Predictions
3683,5208,14,2021,"Patterson, Cordarrelle",RB,atl,a,car,0.0,7800,...,16.063636,8.024621,15.127943,5.0,20.0,-1.185848,11.0,176.7,13.349172,17.750691
3724,5206,14,2021,"Hopkins, DeAndre",WR,ari,h,lar,0.0,7300,...,13.144444,6.23741,11.678963,16.0,28.0,-0.015995,9.0,118.3,11.654971,12.882616
3840,1529,14,2021,"Allen, Josh",QB,buf,a,tam,0.0,8800,...,23.403333,8.796048,19.68741,4.0,27.0,0.119781,12.0,280.84,19.867081,24.57041
3854,5835,14,2021,"Jefferson, Justin",WR,min,h,pit,0.0,8500,...,16.891667,8.49293,21.746407,3.0,1.0,0.368075,12.0,202.7,22.298519,15.206543
3891,5534,14,2021,"Fournette, Leonard",RB,tam,h,buf,0.0,7600,...,15.8,10.106344,20.855594,7.0,8.0,-0.537737,12.0,189.6,20.048988,17.751004
3964,4494,14,2021,"Gronkowski, Rob",TE,tam,h,buf,0.0,7000,...,13.728571,8.426489,14.465177,1.0,2.0,-1.41734,6.0,96.1,12.339167,13.21588
3988,5424,14,2021,"Diggs, Stefon",WR,buf,a,tam,0.0,8200,...,13.941667,5.356298,13.656105,12.0,38.0,-0.507138,12.0,167.3,12.895398,17.66156
4071,7028,14,2021,Seattle,Def,sea,a,hou,0.0,4500,...,5.5,3.580249,7.000827,17.0,6.0,1.467307,11.0,66.0,9.201788,9.589268


In [45]:
team[['Pos','Name','Salary']].to_csv(f'weekly_picks/Adam_week{wk}_picks.csv', index=False)