In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost 

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 100)

import xgboost
from sklearn.linear_model import LassoLars
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import TweedieRegressor

from sklearn.feature_selection import SelectKBest, f_regression, RFE
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer

from prepare import x_y_split, rmse, select_kbest, rfe

from sklearn.model_selection import GridSearchCV

from hypopt import GridSearch

# Importing Data

In [2]:
df = pd.read_csv('season1.csv', index_col=0)

In [3]:
s_2018 = df[df['year']>2017]

In [4]:
passing = pd.read_csv('passing.csv')
rec = pd.read_csv('receive.csv')
rush = pd.read_csv('rush.csv')

In [463]:
def pass_rec_rush():
    
    passing = pd.read_csv('passing.csv', index_col=0)
    rec = pd.read_csv('receive.csv', index_col=0)
    rush = pd.read_csv('rush.csv', index_col=0)

    passing['date'] = pd.to_datetime(passing['date'])
    rush['date'] = pd.to_datetime(rush['date'])
    rec['date'] = pd.to_datetime(rec['date'])
    
    passing['year'] = passing['date'].dt.year
    rec['year'] = rec['date'].dt.year
    rush['year'] = rush['date'].dt.year
    
    return passing, rec, rush

In [5]:
passing['date'] = pd.to_datetime(passing['date'])
rush['date'] = pd.to_datetime(rush['date'])
rec['date'] = pd.to_datetime(rec['date'])

In [6]:
passing['year'] = passing['date'].dt.year
rec['year'] = rec['date'].dt.year
rush['year'] = rush['date'].dt.year

In [7]:
passing.drop(columns=['Unnamed: 0'], inplace=True)
rush.drop(columns=['Unnamed: 0'], inplace=True)
rec.drop(columns=['Unnamed: 0'], inplace=True)

In [88]:
def find_pts_averages(player):
    pts_var_avg = player.pts_var.abs().mean()
    return pts_var_avg

In [90]:
def var_avg_df(passing, rec, rush):
    weekly = pd.DataFrame(columns=['player', 'pts_var'])
    pts_var_avg = pd.DataFrame(columns=['player', 'pts_var', 'year'])

    for year in range(2022, 2017, -1):
        res = rec[rec['year']==year].groupby('player')['tgt'].mean()> 5
        res = res[res==True].index.to_list()

        rec_temp = rec[(rec['year']==year) & (rec['player'].isin(res))]
        weekly = weekly.append(rec_temp[['player'] + ['pts_var']])

        qbs = passing[passing['year']==year].groupby('player')['att'].mean()> 5
        qbs = qbs[qbs==True].index.to_list()

        pass_temp = passing[(passing['year']==year) & (passing['player'].isin(qbs))]
        weekly = weekly.append(pass_temp[['player'] + ['pts_var']])

        rbs = rush[rush['year']==year].groupby('player')['att'].mean()> 5
        rbs = rbs[rbs==True].index.to_list()

        rush_temp = rush[(rush['year']==year) & (rush['player'].isin(rbs))]
        weekly = weekly.append(rush_temp[['player'] + ['pts_var']])

        pts_avg = weekly.groupby('player', group_keys=False).apply(find_pts_averages)
        weekly_temp = pd.DataFrame(pts_avg, columns=['pts_var_avg'])

        weekly_temp['year'] = year

        pts_var_avg = pts_var_avg.append(weekly_temp)
    
    pts_var_avg = pts_var_avg.reset_index()
    
    pts_var_avg.drop(columns=['player','pts_var'], inplace=True)
    pts_var_avg.rename(columns={'index':'player'}, inplace=True)
    
    df1 = pd.merge(left=s_2018, right=pts_var_avg, on=['player','year'], how='left')
    df1.rename(columns={'pts_var':'pts_var_avg'}, inplace=True)
    
    qb_df = df1[df1['pos']=='QB']
    rb_df = df1[df1['pos']=='RB']
    wr_df = df1[df1['pos']=='WR']
    te_df = df1[df1['pos']=='TE']
    
    return qb_df, rb_df, wr_df, te_df

In [93]:
pts_var_avg = pts_var_avg.reset_index()

In [95]:
pts_var_avg.drop(columns=['player','pts_var'],inplace=True)

In [98]:
pts_var_avg.rename(columns={'index':'player'}, inplace=True)

In [101]:
df1 = pd.merge(left=s_2018, right=pts_var_avg, on=['player','year'], how='left')
df1.rename(columns={'pts_var':'pts_var_avg'}, inplace=True)

In [102]:
qb_df = df1[df1['pos']=='QB']
rb_df = df1[df1['pos']=='RB']
wr_df = df1[df1['pos']=='WR']
te_df = df1[df1['pos']=='TE']

# Cleaning Data

In [103]:
qb_df.drop(columns=['rk','pos','tgt','rec','rec_yards','y/r','rec_tds','vbd', 'team'], inplace=True)

In [104]:
qb_df['rating'] = round(((((((qb_df['cmp']/qb_df['pass_att'])-.3)*5) + 
                  ((qb_df['pass_yds']/qb_df['pass_att']-3)*.25) +
                  ((qb_df['pass_tds']/qb_df['pass_att'])*20) +
                  (2.375-((qb_df['int']/qb_df['pass_att'])*25)))/6)*100),2)

In [105]:
def add_target(group):
    group['target'] = group['ppr_pts'].shift(-1)
    group = group.fillna(0)
    return group

In [106]:
qb_df = add_target(qb_df)

In [107]:
qb_df = qb_df[(qb_df['player']!= 'Tom Brady') & (qb_df['player']!='Marcus Mariota')]

In [108]:
qb_df['comp%'] = round((qb_df['cmp'] / qb_df['pass_att']) * 100, 2)
qb_df['int%'] = round((qb_df['int'] / qb_df['pass_att']) * 100, 2)

In [109]:
qb_df.drop(columns=['cmp','pass_att','int'], inplace=True)

In [118]:
rb_df.drop(columns=['rk','team','pos','cmp','pass_att','pass_yds','pass_tds','int','vbd'], inplace=True)

In [119]:
rb_df = add_target(rb_df)

In [121]:
rb_df['rec%'] = round(((rb_df['rec']/rb_df['tgt'])*100),2)

In [127]:
wr_df.drop(columns=['rk','team','pos','cmp','pass_tds','pass_att','pass_yds','int','rush_att','rush_yard',
                    'y/a','rush_tds','vbd'],inplace=True)

In [128]:
wr_df = add_target(wr_df)

In [130]:
wr_df['rec%'] = round(((wr_df['rec']/wr_df['tgt'])*100),2)

In [131]:
wr_df = wr_df[wr_df['player']!='KaVontae Turpin']

In [138]:
te_df = add_target(te_df)

In [140]:
te_df['rec%'] = round(((te_df['rec']/te_df['tgt'])*100),2)

In [142]:
te_df = te_df[(te_df['player']!='Richard Rodgers') & (te_df['player']!='Feleipe Franks')]

In [246]:
qb_df.sort_values('ppr_pts',ascending=False).head(10)

Unnamed: 0,player,age,g,gs,pass_yds,pass_tds,rush_att,rush_yard,y/a,rush_tds,fmb,fl,rush_rec_tds,ppr_pts,pos_rank,year,adp,adp_by_pos,success,round,pts_var_avg,rating,target,comp%,int%
2282,Patrick Mahomes,27,17,17,5250.0,41.0,61.0,358.0,5.87,4.0,5.0,0.0,4,417.4,1,2022,50.0,5,1.0,5.0,4.738125,105.16,395.5,67.13,1.85
0,Patrick Mahomes,23,16,16,5097.0,50.0,60.0,272.0,4.53,2.0,9.0,2.0,2,417.1,1,2018,121.0,15,1.0,11.0,5.339379,113.84,355.0,66.03,2.07
550,Lamar Jackson,22,15,15,3127.0,36.0,176.0,1206.0,6.85,7.0,9.0,2.0,7,415.7,1,2019,101.0,11,1.0,9.0,4.871558,113.34,337.8,66.08,1.5
1686,Josh Allen,25,17,17,4407.0,36.0,122.0,763.0,6.25,6.0,8.0,3.0,6,402.6,1,2021,35.0,2,1.0,3.0,5.209105,92.17,380.8,63.31,2.32
1108,Josh Allen,24,16,16,4544.0,37.0,102.0,421.0,4.13,8.0,9.0,6.0,9,396.1,1,2020,106.0,11,1.0,9.0,5.19744,107.15,383.3,69.23,1.75
2283,Josh Allen,26,16,16,4283.0,35.0,124.0,762.0,6.15,7.0,13.0,5.0,7,395.5,2,2022,19.0,1,1.0,2.0,6.202891,96.61,378.0,63.32,2.47
1109,Aaron Rodgers,37,16,16,4299.0,48.0,38.0,149.0,3.92,3.0,4.0,2.0,3,383.3,2,2020,96.0,10,1.0,8.0,4.583357,121.53,378.7,70.72,0.95
1687,Justin Herbert,23,17,17,5014.0,38.0,63.0,302.0,4.79,3.0,1.0,1.0,3,380.8,2,2021,70.0,7,1.0,6.0,5.206312,97.66,374.7,65.92,2.23
1110,Kyler Murray,23,16,16,3971.0,26.0,133.0,819.0,6.16,11.0,9.0,4.0,11,378.7,3,2020,66.0,5,1.0,6.0,4.244581,94.31,374.4,67.2,2.15
2284,Jalen Hurts,24,15,15,3701.0,22.0,165.0,760.0,4.61,13.0,9.0,2.0,13,378.0,3,2022,41.0,2,1.0,4.0,5.688711,101.55,350.7,66.52,1.3


In [247]:
rb_df.sort_values('ppr_pts',ascending=False).head(10)

Unnamed: 0,player,age,g,gs,rush_att,rush_yard,y/a,rush_tds,tgt,rec,rec_yards,y/r,rec_tds,fmb,fl,rush_rec_tds,ppr_pts,pos_rank,year,adp,adp_by_pos,success,round,pts_var_avg,target,rec%
621,Christian McCaffrey,23,16,16,287.0,1387.0,4.83,15.0,142.0,116.0,1005.0,8.66,4.0,1.0,0.0,19,471.2,1,2019,3.0,3,1.0,1.0,5.383024,314.8,81.69
74,Saquon Barkley,21,16,16,261.0,1307.0,5.01,11.0,121.0,91.0,721.0,7.92,4.0,0.0,0.0,15,385.8,1,2018,7.0,5,1.0,1.0,4.700141,385.5,75.21
75,Christian McCaffrey,22,16,16,219.0,1098.0,5.01,7.0,124.0,107.0,867.0,8.1,6.0,4.0,1.0,13,385.5,2,2018,13.0,9,1.0,2.0,5.373108,372.1,86.29
1190,Alvin Kamara,25,15,10,187.0,932.0,4.98,16.0,107.0,83.0,756.0,9.11,5.0,1.0,0.0,21,377.8,1,2020,4.0,4,1.0,1.0,5.538449,337.8,77.57
1769,Jonathan Taylor,22,17,17,332.0,1811.0,5.45,18.0,51.0,40.0,360.0,9.0,2.0,4.0,2.0,20,373.1,1,2021,15.0,11,1.0,2.0,6.93986,343.8,78.43
2365,Austin Ekeler,27,17,17,204.0,915.0,4.49,13.0,127.0,107.0,722.0,6.75,5.0,5.0,3.0,18,372.7,1,2022,8.0,6,1.0,1.0,5.0,356.4,84.25
76,Todd Gurley,24,14,14,256.0,1251.0,4.89,17.0,81.0,59.0,580.0,9.83,4.0,1.0,1.0,21,372.1,3,2018,1.0,1,1.0,1.0,5.2375,354.2,72.84
2366,Christian McCaffrey,26,17,16,244.0,1139.0,4.67,8.0,108.0,85.0,741.0,8.72,5.0,1.0,0.0,13,356.4,2,2022,3.0,3,1.0,1.0,4.661333,328.3,78.7
77,Alvin Kamara,23,15,13,194.0,883.0,4.55,14.0,105.0,81.0,709.0,8.75,4.0,1.0,0.0,18,354.2,4,2018,6.0,4,1.0,1.0,5.143218,329.1,77.14
1770,Austin Ekeler,26,16,16,206.0,911.0,4.42,12.0,94.0,70.0,647.0,9.24,8.0,4.0,3.0,20,343.8,2,2021,9.0,7,1.0,1.0,4.914137,300.7,74.47


In [248]:
wr_df.sort_values('ppr_pts', ascending=False).head(10)

Unnamed: 0,player,age,g,gs,tgt,rec,rec_yards,y/r,rec_tds,fmb,fl,rush_rec_tds,ppr_pts,pos_rank,year,adp,adp_by_pos,success,round,pts_var_avg,target,rec%
1937,Cooper Kupp,28,17,17,191.0,145.0,1947.0,13.43,16.0,0.0,0.0,16,439.5,1,2021,43.0,16,1.0,4.0,7.0712,344.3,75.92
773,Michael Thomas,26,16,15,185.0,149.0,1725.0,11.58,9.0,1.0,0.0,9,374.6,1,2019,11.0,4,1.0,1.0,5.780724,276.1,80.54
2527,Justin Jefferson,23,17,17,184.0,128.0,1809.0,14.13,8.0,0.0,0.0,9,368.7,1,2022,6.0,2,1.0,1.0,9.1875,347.2,69.57
1353,Davante Adams,28,14,14,149.0,115.0,1374.0,11.95,18.0,1.0,1.0,18,358.4,1,2020,10.0,2,1.0,1.0,9.79393,328.9,77.18
2528,Tyreek Hill,28,17,17,170.0,119.0,1710.0,14.37,7.0,1.0,0.0,9,347.2,2,2022,24.0,8,1.0,2.0,7.964063,335.5,70.0
1938,Davante Adams,29,16,16,169.0,123.0,1553.0,12.63,11.0,0.0,0.0,11,344.3,2,2021,6.0,1,1.0,1.0,9.476429,339.0,72.78
1939,Deebo Samuel,25,16,15,121.0,77.0,1405.0,18.25,6.0,4.0,2.0,14,339.0,3,2021,92.0,35,1.0,8.0,6.639316,330.4,63.64
2529,Davante Adams,30,17,17,180.0,100.0,1516.0,15.16,14.0,1.0,0.0,14,335.5,3,2022,13.0,5,1.0,2.0,10.409375,316.6,55.56
228,Tyreek Hill,24,16,16,137.0,87.0,1479.0,17.0,12.0,0.0,0.0,14,334.0,1,2018,24.0,8,1.0,2.0,9.196425,333.5,63.5
229,DeAndre Hopkins,26,16,16,163.0,115.0,1572.0,13.67,11.0,2.0,2.0,11,333.5,2,2018,8.0,2,1.0,1.0,6.224722,329.6,70.55


In [250]:
te_df.sort_values('ppr_pts',ascending=False).head(10)

Unnamed: 0,rk,player,team,pos,age,g,gs,cmp,pass_att,pass_yds,pass_tds,int,rush_att,rush_yard,y/a,rush_tds,tgt,rec,rec_yards,y/r,rec_tds,fmb,fl,rush_rec_tds,ppr_pts,vbd,pos_rank,year,adp,adp_by_pos,success,round,pts_var_avg,target,rec%
2745,8,Travis Kelce,KAN,TE,33,17,17,0.0,0.0,0.0,0.0,0.0,2.0,5.0,2.5,0.0,152.0,110.0,1338.0,12.16,12.0,1.0,1.0,12,316.3,114.0,1,2022,17.0,1,1.0,2.0,5.707813,215.4,72.37
1572,5,Travis Kelce,KAN,TE,31,15,15,1.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,145.0,105.0,1416.0,13.49,11.0,1.0,1.0,11,312.8,117.0,1,2020,21.0,1,1.0,2.0,5.993762,278.6,72.41
2164,8,Mark Andrews,BAL,TE,26,17,9,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,153.0,107.0,1361.0,12.72,9.0,1.0,0.0,9,301.1,96.0,1,2021,54.0,5,1.0,5.0,7.512,262.8,69.93
438,8,Travis Kelce,KAN,TE,29,16,16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,103.0,1336.0,12.97,10.0,2.0,1.0,10,294.6,109.0,1,2018,28.0,2,1.0,3.0,5.953235,280.3,68.67
439,17,Zach Ertz,PHI,TE,28,16,16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,156.0,116.0,1163.0,10.03,8.0,1.0,0.0,8,280.3,81.0,2,2018,40.0,3,1.0,4.0,5.538907,258.7,74.36
1573,14,Darren Waller,LVR,TE,28,16,15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,145.0,107.0,1196.0,11.18,9.0,2.0,2.0,9,278.6,81.0,2,2020,64.0,5,1.0,6.0,6.081935,176.6,73.79
2165,15,Travis Kelce,KAN,TE,32,16,16,0.0,0.0,0.0,0.0,0.0,2.0,3.0,1.5,1.0,134.0,92.0,1125.0,12.23,9.0,1.0,1.0,10,262.8,74.0,2,2021,8.0,1,1.0,1.0,6.307976,208.8,68.66
440,16,George Kittle,SFO,TE,25,16,16,0.0,0.0,0.0,0.0,0.0,1.0,10.0,10.0,0.0,136.0,88.0,1377.0,15.65,5.0,0.0,0.0,5,258.7,88.0,3,2018,141.0,13,1.0,12.0,7.037222,222.2,64.71
992,8,Travis Kelce,KAN,TE,30,16,16,0.0,0.0,0.0,0.0,0.0,1.0,4.0,4.0,1.0,136.0,97.0,1229.0,12.67,5.0,1.0,1.0,6,254.3,80.0,1,2019,18.0,1,1.0,2.0,5.545321,222.5,71.32
993,16,George Kittle,SFO,TE,26,14,14,0.0,0.0,0.0,0.0,0.0,5.0,22.0,4.4,0.0,107.0,85.0,1053.0,12.39,5.0,1.0,0.0,5,222.5,61.0,2,2019,28.0,2,1.0,3.0,7.500084,221.0,79.44


# Creating GridSearchCV XGB Model

In [148]:
parameters = {'learning_rate': [.03, .07, .1, .15, .2],
              'max_depth': [3, 4, 5, 6, 7],
              'min_child_weight': [1, 5, 15, 200],
              'subsample': [.65, 0.7, .8, .85],
              'colsample_bytree': [0.7, .8, .85],
              'n_estimators': [250, 500],
              'gamma':[0, 1, 10]}

In [149]:
xgb = xgboost.XGBRegressor()

In [150]:
xgb_grid = GridSearchCV(xgb,
                        parameters,
                        cv = 4,
                        n_jobs = -1,
                        verbose=True,
                        scoring='neg_root_mean_squared_error')


# QBs

In [184]:
X_train = qb_df[qb_df['year']<2020]
X_val = qb_df[qb_df['year']<2022]
X_test = qb_df[qb_df['year']==2022]

y_train = X_train['target']
X_train.drop(columns=['target'], inplace = True)

y_val = X_val['target']
X_val.drop(columns=['target'], inplace = True)

X_test.drop(columns=['target'], inplace = True)

ss = StandardScaler()

X_train[qb_cols] = ss.fit_transform(X_train[qb_cols])
X_val[qb_cols] = ss.transform(X_val[qb_cols])
X_test[qb_cols] = ss.transform(X_test[qb_cols])

In [199]:
qb_cols = ['age','g','gs','pass_yds','pass_tds','rush_att','rush_yard','y/a','rush_tds',
           'fmb','fl','rush_rec_tds','pos_rank','adp','adp_by_pos','round','ppr_pts',
           'comp%', 'int%', 'rating', 'pts_var_avg']

In [186]:
xgb_grid.fit(X_train[qb_cols],
             y_train)

#print(xgb_grid.best_score_)
print(xgb_grid.best_params_)

Fitting 4 folds for each of 7200 candidates, totalling 28800 fits
{'colsample_bytree': 0.85, 'gamma': 10, 'learning_rate': 0.2, 'max_depth': 4, 'min_child_weight': 5, 'n_estimators': 250, 'subsample': 0.8}


In [154]:
qb_xgb = xgboost.XGBRegressor(colsample_bytree=.85, gamma=10, eta=.2, max_depth=4,
                           min_child_weight=5,n_estimators=250,subsample.8)

In [458]:
def qb_xgb_modeling(df, cols):
    
    X_train = df[df['year']<2020]
    X_val = df[df['year']<2022]
    X_test = df[df['year']==2022]
    
    y_train = X_train['target']
    X_train.drop(columns=['target'], inplace = True)
    
    y_val = X_val['target']
    X_val.drop(columns=['target'], inplace = True)

    X_test.drop(columns=['target'], inplace = True)
    
    ss = StandardScaler()
    
    X_train[cols] = ss.fit_transform(X_train[cols])
    X_val[cols] = ss.transform(X_val[cols])
    X_test[cols] = ss.transform(X_test[cols])
    
    cols.append('success')
    
    xgb = xgboost.XGBRegressor(colsample_bytree=.85, gamma=10, eta=.2, max_depth=4,
                                  min_child_weight=5,n_estimators=250,subsample=.8)
    
    xgb.fit(X_train[cols], y_train, eval_set=[(X_train[cols], y_train), (X_val[cols], y_val)],
           early_stopping_rounds=25, verbose=False)
    xgb_preds = xgb.predict(X_train[cols])
    
    preds = pd.DataFrame({'actual':y_train,
                          'baseline':y_train.mean(),
                          'xgb_preds':xgb_preds})
    
    xgb_val_preds = xgb.predict(X_val[cols])
    
    val_preds = pd.DataFrame({'actual':y_val,
                              'baseline':y_train.mean(),
                              'xgb_val_preds':xgb_val_preds})
    
    
    pos_2023 = pd.DataFrame({'player':X_test['player'],
                             'preds':xgb.predict(X_test[cols])})
    
    val_2022 = X_val[X_val['year']==2021]
    pos_2022 = pd.DataFrame({'player':val_2022['player'],
                             'actual':val_2022['ppr_pts'],
                             'preds':xgb.predict(val_2022[cols])})
    
    return preds, val_preds, pos_2023, pos_2022

In [201]:
qb_preds, qb_val_preds, qb_2023, qb_2022 = qb_xgb_modeling(qb_df, qb_cols)

[0]	validation_0-rmse:139.11229	validation_1-rmse:138.63344
[1]	validation_0-rmse:115.06667	validation_1-rmse:115.44909
[2]	validation_0-rmse:95.52868	validation_1-rmse:96.84830
[3]	validation_0-rmse:80.60257	validation_1-rmse:82.81397
[4]	validation_0-rmse:69.18582	validation_1-rmse:72.26628
[5]	validation_0-rmse:59.19493	validation_1-rmse:63.68204
[6]	validation_0-rmse:51.32066	validation_1-rmse:57.10375
[7]	validation_0-rmse:44.94783	validation_1-rmse:51.85849
[8]	validation_0-rmse:39.82711	validation_1-rmse:48.14203
[9]	validation_0-rmse:35.69678	validation_1-rmse:45.59392
[10]	validation_0-rmse:33.18096	validation_1-rmse:43.12865
[11]	validation_0-rmse:30.50196	validation_1-rmse:41.54134
[12]	validation_0-rmse:28.62544	validation_1-rmse:40.31520
[13]	validation_0-rmse:27.45979	validation_1-rmse:39.46749
[14]	validation_0-rmse:25.96680	validation_1-rmse:39.24768
[15]	validation_0-rmse:25.20949	validation_1-rmse:38.60954
[16]	validation_0-rmse:24.19146	validation_1-rmse:38.12504
[17

In [351]:
qb_pred_xgb = qb_2023.sort_values('preds',ascending=False).head(25)

In [352]:
qb_pred_xgb = qb_pred_xgb.reset_index(drop=True)

In [353]:
qb_pred_xgb.index += 1

In [354]:
qb_pred_xgb = qb_pred_xgb.reset_index()

In [355]:
qb_pred_xgb.rename(columns={'index':'rnk'}, inplace=True)

In [356]:
qb_pred_xgb.head()

Unnamed: 0,rnk,player,preds
0,1,Patrick Mahomes,329.406921
1,2,Jalen Hurts,329.406921
2,3,Josh Allen,327.925293
3,4,Joe Burrow,325.28418
4,5,Trevor Lawrence,291.599945


In [364]:
qb_comb_temp = qb_comb_temp[(qb_comb_temp['player']!='Tom Brady') & (qb_comb_temp['player']!='Tim Boyle')]

In [366]:
qb_comb_temp.to_csv('qb_pred_comb.csv')

In [444]:
df = pd.read_csv('qb_pred_comb.csv',index_col=0)
df = df[['player','prediction','preds']]
df = df.fillna(method = 'bfill',axis=1).fillna(method = 'ffill',axis=1)
df['combined'] = (df.prediction+df.preds)/2
df = df.sort_values('combined',ascending=False).reset_index(drop=True)
df.index+=1
df['prediction'] = df['prediction'].astype(int)
df['preds'] = df['preds'].astype(int)
df['combined'] = df['combined'].astype(int)
df = df.head(25)
df.to_csv('qb_pred_comb.csv')

In [445]:
df

Unnamed: 0,player,prediction,preds,combined
1,Patrick Mahomes,362,329,346
2,Josh Allen,343,327,335
3,Joe Burrow,309,325,317
4,Jalen Hurts,284,329,307
5,Justin Herbert,293,282,288
6,Kirk Cousins,242,289,265
7,Jared Goff,219,281,250
8,Trevor Lawrence,208,291,249
9,Geno Smith,183,290,237
10,Daniel Jones,191,272,232


In [203]:
rmse(qb_preds, 'xgb_preds')

18.91617918267365

In [446]:
qb_val = rmse(qb_val_preds, 'xgb_val_preds')

# RBs

In [453]:
rb_cols = ['age','g','gs','rush_att','rush_yard','y/a','rush_tds','tgt','rec','rec_yards','y/r','rec_tds','fmb',
           'fl','rush_rec_tds','ppr_pts','pos_rank','adp','adp_by_pos','round', 'rec%', 'pts_var_avg']

In [191]:
X_train = rb_df[rb_df['year']<2020]
X_val = rb_df[rb_df['year']<2022]
X_test = rb_df[rb_df['year']==2022]

y_train = X_train['target']
X_train.drop(columns=['target'], inplace = True)

y_val = X_val['target']
X_val.drop(columns=['target'], inplace = True)

X_test.drop(columns=['target'], inplace = True)

ss = StandardScaler()

X_train[rb_cols] = ss.fit_transform(X_train[rb_cols])
X_val[rb_cols] = ss.transform(X_val[rb_cols])
X_test[rb_cols] = ss.transform(X_test[rb_cols])

In [192]:
xgb_grid.fit(X_train[rb_cols],
             y_train)

#print(xgb_grid.best_score_)
print(xgb_grid.best_params_)

Fitting 4 folds for each of 7200 candidates, totalling 28800 fits
{'colsample_bytree': 0.7, 'gamma': 1, 'learning_rate': 0.03, 'max_depth': 6, 'min_child_weight': 1, 'n_estimators': 250, 'subsample': 0.7}


In [None]:
rb_xgb = xgboost.XGBRegressor(colsample_bytree=.7, gamma=1, eta=.03, max_depth=6,
                            min_child_weight=1,n_estimators=250,subsample=.7)

In [454]:
def rb_xgb_modeling(df, cols):
    
    X_train = df[df['year']<2020]
    X_val = df[df['year']<2022]
    X_test = df[df['year']==2022]
    
    y_train = X_train['target']
    X_train.drop(columns=['target'], inplace = True)
    
    y_val = X_val['target']
    X_val.drop(columns=['target'], inplace = True)

    X_test.drop(columns=['target'], inplace = True)
    
    ss = StandardScaler()
    
    X_train[cols] = ss.fit_transform(X_train[cols])
    X_val[cols] = ss.transform(X_val[cols])
    X_test[cols] = ss.transform(X_test[cols])
    
    cols.append('success')
    
    xgb = xgboost.XGBRegressor(colsample_bytree=.7, gamma=1, eta=.03, max_depth=6,
                                min_child_weight=1,n_estimators=250,subsample=.7)
    
    xgb.fit(X_train[cols], y_train, eval_set=[(X_train[cols], y_train), (X_val[cols], y_val)],
           early_stopping_rounds=25, verbose=False)
    xgb_preds = xgb.predict(X_train[cols])
    
    preds = pd.DataFrame({'actual':y_train,
                          'baseline':y_train.mean(),
                          'xgb_preds':xgb_preds})
    
    xgb_val_preds = xgb.predict(X_val[cols])
    
    val_preds = pd.DataFrame({'actual':y_val,
                              'baseline':y_train.mean(),
                              'xgb_val_preds':xgb_val_preds})
    
    
    pos_2023 = pd.DataFrame({'player':X_test['player'],
                             'preds':xgb.predict(X_test[cols])})
    
    val_2022 = X_val[X_val['year']==2021]
    pos_2022 = pd.DataFrame({'player':val_2022['player'],
                             'actual':val_2022['ppr_pts'],
                             'preds':xgb.predict(val_2022[cols])})
    
    return preds, val_preds, pos_2023, pos_2022

In [455]:
rb_preds, rb_val_preds, rb_2023,rb_2022 = rb_xgb_modeling(rb_df, rb_cols)

In [456]:
rb_preds_xgb = rb_2023.sort_values('preds',ascending=False).head(50)

In [413]:
rb_preds = pd.read_csv('rb_pred.csv', index_col=0)
rb_preds.drop(columns=['pos'], inplace=True)

In [414]:
rb_preds_xgb = rb_preds_xgb.reset_index(drop=True)

In [419]:
rb_preds_comb = pd.merge(left=rb_preds, right=rb_preds_xgb, on='player', how='outer')

In [420]:
rb_preds_comb.to_csv('rb_pred_comb.csv')

In [461]:
def clean_preds(df, pos):
    
    rb_preds = pd.read_csv('rb_pred.csv', index_col=0)
    rb_preds.drop(columns=['pos'], inplace=True)
    
    df = df.reset_index(drop=True)
    
    df = pd.merge(left=rb_preds, right=df, on='player', how='outer')
    
    df = df[['player','prediction','preds']]
    df = df.fillna(method = 'bfill',axis=1).fillna(method = 'ffill',axis=1)
    df['combined'] = (df.prediction+df.preds)/2
    df = df.sort_values('combined',ascending=False).reset_index(drop=True)
    df.index+=1
    df['prediction'] = df['prediction'].astype(int)
    df['preds'] = df['preds'].astype(int)
    df['combined'] = df['combined'].astype(int)
    
    if (pos == 'qb') or (pos == 'te'):
        return df.head(25)
    else:
        return df.head(50)

In [425]:
df = pd.read_csv('rb_pred_comb.csv',index_col=0)
df = df[['player','prediction','preds']]
df = df.fillna(method = 'bfill',axis=1).fillna(method = 'ffill',axis=1)
df['combined'] = (df.prediction+df.preds)/2
df = df.sort_values('combined',ascending=False).reset_index(drop=True)
df.index+=1
df['prediction'] = df['prediction'].astype(int)
df['preds'] = df['preds'].astype(int)
df['combined'] = df['combined'].astype(int)
df = df.head(50)
df.to_csv('rb_pred_comb.csv')

In [426]:
df

Unnamed: 0,player,prediction,preds,combined
1,Austin Ekeler,261,309,285
2,Christian McCaffrey,238,306,272
3,Jordan Howard,253,253,253
4,Josh Jacobs,213,283,248
5,Derrick Henry,196,276,236
6,Saquon Barkley,197,252,224
7,Nick Chubb,191,254,222
8,Joe Mixon,199,226,212
9,Aaron Jones,192,227,209
10,Tony Pollard,179,231,205


In [209]:
rmse(rb_preds, 'xgb_preds')

13.523177329729165

In [447]:
rb_val = rmse(rb_val_preds, 'xgb_val_preds')

# WRs/TEs

In [233]:
wr_cols = ['age','g','gs','tgt','rec','rec_yards','y/r','rec_tds','fmb','fl','rush_rec_tds','ppr_pts','pos_rank',
           'adp','adp_by_pos','round', 'rec%','pts_var_avg']

In [194]:
X_train = wr_df[wr_df['year']<2020]
X_val = wr_df[wr_df['year']<2022]
X_test = wr_df[wr_df['year']==2022]

y_train = X_train['target']
X_train.drop(columns=['target'], inplace = True)

y_val = X_val['target']
X_val.drop(columns=['target'], inplace = True)

X_test.drop(columns=['target'], inplace = True)

ss = StandardScaler()

X_train[wr_cols] = ss.fit_transform(X_train[wr_cols])
X_val[wr_cols] = ss.transform(X_val[wr_cols])
X_test[wr_cols] = ss.transform(X_test[wr_cols])

In [195]:
xgb_grid.fit(X_train[wr_cols],
             y_train)

#print(xgb_grid.best_score_)
print(xgb_grid.best_params_)

Fitting 4 folds for each of 7200 candidates, totalling 28800 fits
{'colsample_bytree': 0.85, 'gamma': 0, 'learning_rate': 0.07, 'max_depth': 5, 'min_child_weight': 1, 'n_estimators': 500, 'subsample': 0.85}


In [None]:
wrte_xgb = xgboost.XGBRegressor(colsample_bytree=.85, gamma=0, eta=.07, max_depth=5,
                           min_child_weight=1,n_estimators=500,subsample=.85)

In [459]:
def wrte_xgb_modeling(df, cols):
    
    X_train = df[df['year']<2020]
    X_val = df[df['year']<2022]
    X_test = df[df['year']==2022]
    
    y_train = X_train['target']
    X_train.drop(columns=['target'], inplace = True)
    
    y_val = X_val['target']
    X_val.drop(columns=['target'], inplace = True)

    X_test.drop(columns=['target'], inplace = True)
    
    ss = StandardScaler()
    
    X_train[cols] = ss.fit_transform(X_train[cols])
    X_val[cols] = ss.transform(X_val[cols])
    X_test[cols] = ss.transform(X_test[cols])
    
    cols.append('success')
    
    xgb = xgboost.XGBRegressor(colsample_bytree=.85, gamma=0, eta=.07, max_depth=5,
                               min_child_weight=1,n_estimators=500,subsample=.85)
    
    xgb.fit(X_train[cols], y_train, eval_set=[(X_train[cols], y_train), (X_val[cols], y_val)],
           early_stopping_rounds=5)
    xgb_preds = xgb.predict(X_train[cols], verbose=False)
    
    preds = pd.DataFrame({'actual':y_train,
                          'baseline':y_train.mean(),
                          'xgb_preds':xgb_preds})
    
    xgb_val_preds = xgb.predict(X_val[cols])
    
    val_preds = pd.DataFrame({'actual':y_val,
                              'baseline':y_train.mean(),
                              'xgb_val_preds':xgb_val_preds})
    
    
    pos_2023 = pd.DataFrame({'player':X_test['player'],
                             'preds':xgb.predict(X_test[cols])})
    
    val_2022 = X_val[X_val['year']==2021]
    pos_2022 = pd.DataFrame({'player':val_2022['player'],
                             'actual':val_2022['ppr_pts'],
                             'preds':xgb.predict(val_2022[cols])})
    
    return preds, val_preds, pos_2023, pos_2022

In [235]:
wr_preds, wr_val_preds, wr_2023, wr_2022 = wrte_xgb_modeling(wr_df, wr_cols)

[0]	validation_0-rmse:108.36294	validation_1-rmse:109.89212
[1]	validation_0-rmse:101.43283	validation_1-rmse:103.09740
[2]	validation_0-rmse:94.86782	validation_1-rmse:96.68743
[3]	validation_0-rmse:88.67631	validation_1-rmse:90.66108
[4]	validation_0-rmse:83.05522	validation_1-rmse:85.00340
[5]	validation_0-rmse:77.66100	validation_1-rmse:79.78135
[6]	validation_0-rmse:72.56065	validation_1-rmse:74.64810
[7]	validation_0-rmse:67.83953	validation_1-rmse:70.09741
[8]	validation_0-rmse:63.43357	validation_1-rmse:65.71172
[9]	validation_0-rmse:59.39306	validation_1-rmse:61.67533
[10]	validation_0-rmse:55.55921	validation_1-rmse:57.83311
[11]	validation_0-rmse:51.98138	validation_1-rmse:54.27967
[12]	validation_0-rmse:48.64443	validation_1-rmse:50.96037
[13]	validation_0-rmse:45.54544	validation_1-rmse:47.86546
[14]	validation_0-rmse:42.65242	validation_1-rmse:44.99921
[15]	validation_0-rmse:39.99246	validation_1-rmse:42.37022
[16]	validation_0-rmse:37.50025	validation_1-rmse:39.91308
[17

[142]	validation_0-rmse:0.68331	validation_1-rmse:7.58791
[143]	validation_0-rmse:0.68016	validation_1-rmse:7.58830
[144]	validation_0-rmse:0.67150	validation_1-rmse:7.58820
[145]	validation_0-rmse:0.66871	validation_1-rmse:7.58663
[146]	validation_0-rmse:0.66321	validation_1-rmse:7.58659
[147]	validation_0-rmse:0.65705	validation_1-rmse:7.58445
[148]	validation_0-rmse:0.65431	validation_1-rmse:7.58343
[149]	validation_0-rmse:0.64996	validation_1-rmse:7.58264
[150]	validation_0-rmse:0.64700	validation_1-rmse:7.58096
[151]	validation_0-rmse:0.64165	validation_1-rmse:7.58031
[152]	validation_0-rmse:0.63916	validation_1-rmse:7.57977
[153]	validation_0-rmse:0.63381	validation_1-rmse:7.57978
[154]	validation_0-rmse:0.62838	validation_1-rmse:7.57892
[155]	validation_0-rmse:0.62295	validation_1-rmse:7.57853
[156]	validation_0-rmse:0.62064	validation_1-rmse:7.57830
[157]	validation_0-rmse:0.61794	validation_1-rmse:7.57821
[158]	validation_0-rmse:0.61536	validation_1-rmse:7.57724
[159]	validati

In [430]:
wr_preds_xgb = wr_2023.sort_values('preds',ascending=False).head(50)

In [431]:
wr_preds = pd.read_csv('wr_pred.csv', index_col=0)
wr_preds.drop(columns=['pos'], inplace=True)

In [432]:
wr_preds_comb = pd.merge(left=wr_preds, right=wr_preds_xgb, on='player', how='outer')

In [433]:
wr_preds_comb.to_csv('wr_pred_comb.csv')

In [434]:
df = pd.read_csv('wr_pred_comb.csv',index_col=0)
df = df[['player','prediction','preds']]
df = df.fillna(method = 'bfill',axis=1).fillna(method = 'ffill',axis=1)
df['combined'] = (df.prediction+df.preds)/2
df = df.sort_values('combined',ascending=False).reset_index(drop=True)
df.index+=1
df['prediction'] = df['prediction'].astype(int)
df['preds'] = df['preds'].astype(int)
df['combined'] = df['combined'].astype(int)
df = df.head(50)
df.to_csv('wr_pred_comb.csv')

In [435]:
df

Unnamed: 0,player,prediction,preds,combined
1,Tyreek Hill,268,322,295
2,Justin Jefferson,296,286,291
3,Stefon Diggs,245,312,279
4,Davante Adams,253,292,272
5,CeeDee Lamb,247,295,271
6,A.J. Brown,221,298,259
7,Amon-Ra St. Brown,220,263,241
8,Ja'Marr Chase,236,242,239
9,Jaylen Waddle,214,251,233
10,DeVonta Smith,193,248,220


In [237]:
rmse(wr_preds, 'xgb_preds')

0.4640998924589167

In [448]:
wr_val = rmse(wr_val_preds, 'xgb_val_preds')

In [239]:
te_cols = ['age','g','gs','tgt','rec','rec_yards','y/r','rec_tds','fmb','fl','rush_rec_tds','ppr_pts','pos_rank',
           'adp','adp_by_pos','round', 'rec%','pts_var_avg']

In [240]:
te_preds, te_val_preds, te_2023, te_2022 = wrte_xgb_modeling(te_df, te_cols)

[0]	validation_0-rmse:74.08398	validation_1-rmse:75.84581
[1]	validation_0-rmse:69.93834	validation_1-rmse:71.61931
[2]	validation_0-rmse:65.88765	validation_1-rmse:67.44133
[3]	validation_0-rmse:62.00945	validation_1-rmse:63.57528
[4]	validation_0-rmse:58.43131	validation_1-rmse:60.06154
[5]	validation_0-rmse:55.20065	validation_1-rmse:56.87655
[6]	validation_0-rmse:52.12962	validation_1-rmse:53.91140
[7]	validation_0-rmse:49.03821	validation_1-rmse:51.01485
[8]	validation_0-rmse:46.18208	validation_1-rmse:48.31129
[9]	validation_0-rmse:43.61430	validation_1-rmse:45.79328
[10]	validation_0-rmse:41.21801	validation_1-rmse:43.51333
[11]	validation_0-rmse:38.99994	validation_1-rmse:41.45489
[12]	validation_0-rmse:36.95866	validation_1-rmse:39.46019
[13]	validation_0-rmse:34.96949	validation_1-rmse:37.70848
[14]	validation_0-rmse:33.18301	validation_1-rmse:36.02322
[15]	validation_0-rmse:31.38439	validation_1-rmse:34.49426
[16]	validation_0-rmse:29.81877	validation_1-rmse:33.05830
[17]	va

In [436]:
te_preds_xgb = te_2023.sort_values('preds',ascending=False).head(25)

In [437]:
te_preds = pd.read_csv('te_pred.csv', index_col=0)
te_preds.drop(columns=['pos'], inplace=True)

In [438]:
te_preds_comb = pd.merge(left=te_preds, right=te_preds_xgb, on='player', how='outer')

In [439]:
te_preds_comb.to_csv('te_pred_comb.csv')

In [440]:
df = pd.read_csv('te_pred_comb.csv',index_col=0)
df = df[['player','prediction','preds']]
df = df.fillna(method = 'bfill',axis=1).fillna(method = 'ffill',axis=1)
df['combined'] = (df.prediction+df.preds)/2
df = df.sort_values('combined',ascending=False).reset_index(drop=True)
df.index+=1
df['prediction'] = df['prediction'].astype(int)
df['preds'] = df['preds'].astype(int)
df['combined'] = df['combined'].astype(int)
df = df.head(25)
df.to_csv('te_pred_comb.csv')

In [441]:
df

Unnamed: 0,player,prediction,preds,combined
1,Travis Kelce,228,243,236
2,Mark Andrews,191,180,185
3,T.J. Hockenson,161,202,182
4,George Kittle,168,169,169
5,Tommy Sweeney,152,152,152
6,Jacob Harris,152,152,152
7,Pat Freiermuth,131,161,146
8,Evan Engram,113,170,142
9,Dallas Goedert,140,131,135
10,Tyler Higbee,104,166,135


In [242]:
rmse(te_preds, 'xgb_preds')

6.741357861979092

In [449]:
te_val = rmse(te_val_preds, 'xgb_val_preds')

In [450]:
(qb_val + rb_val + wr_val + te_val)/4

22.026818849869027