<a href="https://colab.research.google.com/github/brianlawrence2/fantasy_football/blob/main/xgboost_optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

%load_ext google.colab.data_table

Authenticated


In [None]:
%pip install optuna

import numpy as np
import pandas as pd
import optuna

import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
import xgboost as xgb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
%%bigquery --project fantasyvbd df
SELECT * FROM `fantasyvbd.fantasy_football.view_training_data`

In [None]:
df.head()



Unnamed: 0,player_id,display_name,season,prior_season,fantasy_points_per_game,prior_season_fantasy_points_per_game,games_played,passing_yards_per_game,passing_touchdowns_per_game,interceptions_per_game,...,career_yards_per_target,career_rushing_attempts_per_game,career_rushing_yards_per_game,career_rushing_touchdowns_per_game,career_rushing_attempt_to_touchdown_rate,career_rushing_yards_per_attempt,position_QB,position_RB,position_TE,position_WR
0,00-0027973,Andy Dalton,2017,2016,15.91875,18.04,16,262.875,1.125,0.5,...,9.0,3.311828,10.215054,0.193548,20.791667,3.084416,1,0,0,0
1,00-0023460,Jason Campbell,2010,2009,13.821538,16.145,16,226.125,1.3125,0.9375,...,,2.961538,15.230769,0.057692,32.75,5.142857,1,0,0,0
2,00-0031345,Jimmy Garoppolo,2019,2018,18.7075,18.673333,3,239.333333,1.666667,1.0,...,-1.5,2.0,2.25,0.041667,3.0,1.125,1,0,0,0
3,00-0033106,Jared Goff,2017,2016,21.150667,9.308571,7,155.571429,0.857143,1.0,...,,1.142857,2.285714,0.142857,8.0,2.0,1,0,0,0
4,00-0020531,Drew Brees,2016,2015,25.52,24.146667,15,324.666667,2.133333,0.733333,...,9.714286,1.769585,3.24424,0.064516,14.611111,1.833333,1,0,0,0


In [None]:
df = pd.get_dummies(df,columns=['position'])
X, y = df.loc[:, ~df.columns.isin(['display_name','season','prior_season','fantasy_points_per_game','games_played','player_id'])],df.iloc[:,4]

In [None]:

def objective(trial):
    train_x, valid_x, train_y, valid_y = train_test_split(X, y, test_size=0.25)
    dtrain = xgb.DMatrix(train_x, label=train_y)
    dvalid = xgb.DMatrix(valid_x, label=valid_y)

    param = {
        "verbosity": 0,
        "objective": "reg:linear",
        # use exact for small dataset.
        "tree_method": "exact",
        # defines booster, gblinear for linear functions.
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        # L2 regularization weight.
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        # L1 regularization weight.
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        # sampling ratio for training data.
        "subsample": trial.suggest_float("subsample", 0.2, 1.0),
        # sampling according to each tree.
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
    }

    if param["booster"] in ["gbtree", "dart"]:
        # maximum depth of the tree, signifies complexity of the tree.
        param["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2)
        # minimum child weight, larger the term more conservative the tree.
        param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        # defines how selective algorithm is.
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    bst = xgb.train(param, dtrain)
    preds = bst.predict(dvalid)
    pred_labels = np.rint(preds)
    rmse = sklearn.metrics.mean_squared_error(valid_y, pred_labels, squared=False)
    return rmse

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=1000, timeout=600)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-08-24 22:24:48,494][0m A new study created in memory with name: no-name-995ab27e-284f-4322-a2e1-782f36e96069[0m
[32m[I 2022-08-24 22:24:48,785][0m Trial 0 finished with value: 4.895092057290282 and parameters: {'booster': 'gbtree', 'lambda': 0.0043872213842106695, 'alpha': 0.793669337236669, 'subsample': 0.8209833804959963, 'colsample_bytree': 0.43110448254248923, 'max_depth': 7, 'min_child_weight': 8, 'eta': 0.1088552987341349, 'gamma': 4.727016995243628e-08, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 4.895092057290282.[0m
[32m[I 2022-08-24 22:24:48,965][0m Trial 1 finished with value: 8.910817628907997 and parameters: {'booster': 'gbtree', 'lambda': 1.0306955305780164e-08, 'alpha': 0.0050341280879103055, 'subsample': 0.5702565961701276, 'colsample_bytree': 0.5757999156096001, 'max_depth': 3, 'min_child_weight': 8, 'eta': 0.0004976886544049378, 'gamma': 0.00034229457170872256, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 4.895092057290282.

Number of finished trials:  1000
Best trial:
  Value: 3.8163996726799487
  Params: 
    booster: gblinear
    lambda: 0.002006492333077248
    alpha: 1.9685317417493798e-05
    subsample: 0.3642607826092408
    colsample_bytree: 0.7574269939182978


In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
params = study.best_trial.params
dtrain = xgb.DMatrix(X,y)

In [None]:
from xgboost.sklearn import XGBRegressor
xgb_model = xgb.XGBRegressor(**params)
xgb_model.fit(X,y)



XGBRegressor(alpha=1.9685317417493798e-05, booster='gblinear',
             colsample_bytree=0.7574269939182978, lambda=0.002006492333077248,
             subsample=0.3642607826092408)

In [None]:
sklearn.metrics.mean_squared_error(y,xgb_model.predict(X), squared=False), sklearn.metrics.mean_absolute_error(y,xgb_model.predict(X))

(3.982654257097364, 2.9695025805834896)

In [72]:
%%bigquery --project fantasyvbd pred_df
select * from fantasyvbd.fantasy_football.view_2022_predictions

In [76]:
pred_X = pred_df.loc[:, ~pred_df.columns.isin(['display_name','season','prior_season','fantasy_points_per_game','games_played','player_id'])]
pred_X = pd.get_dummies(pred_X,columns=['position'])

#pred_X = xgb.DMatrix(pred_X)

In [91]:
pred_X = pred_X[xgb_model.get_booster().feature_names]
pred_X.head()



Unnamed: 0,prior_season_fantasy_points_per_game,passing_yards_per_game,passing_touchdowns_per_game,interceptions_per_game,passing_air_yards_per_game,passing_air_yard_rate,passing_adot,attempts_per_game,completions_per_game,completion_rate,...,career_yards_per_target,career_rushing_attempts_per_game,career_rushing_yards_per_game,career_rushing_touchdowns_per_game,career_rushing_attempt_to_touchdown_rate,career_rushing_yards_per_attempt,position_QB,position_RB,position_TE,position_WR
0,26.867059,312.705882,2.529412,0.705882,342.411765,0.913245,8.095967,42.294118,28.529412,0.674548,...,21.666667,2.08805,3.534591,0.091195,16.304167,1.692771,1,0,0,0
1,25.45625,257.1875,2.3125,0.25,254.875,1.009073,7.679849,33.1875,22.875,0.689266,...,-3.666667,3.239437,15.802817,0.159624,16.409804,4.878261,1,0,0,0
2,3.415385,,,,,,,,,,...,7.81451,,,,,,0,0,1,0
3,14.771765,233.411765,1.176471,0.705882,235.352941,0.991752,7.144643,32.941176,22.058824,0.669643,...,5.0,2.220721,6.635135,0.058559,15.142857,2.98783,1,0,0,0
4,14.91,169.0,1.5,0.0,168.0,1.005952,8.0,21.0,13.5,0.642857,...,17.5,2.044944,4.848315,0.089888,14.642857,2.370879,1,0,0,0


In [92]:
preds = xgb_model.predict(pred_X)

In [93]:
pred_df['preds'] = preds

In [94]:
pred_df.loc[pred_df.position == 'WR',['display_name','position','season','preds']].sort_values('preds', ascending=False)

Unnamed: 0,display_name,position,season,preds
188,Cooper Kupp,WR,2022,18.944038
416,Justin Jefferson,WR,2022,17.497499
357,Deebo Samuel,WR,2022,16.494938
52,Davante Adams,WR,2022,16.400221
495,Ja'Marr Chase,WR,2022,15.892972
...,...,...,...,...
367,Maurice Ffrench,WR,2022,1.872285
470,Racey McMath,WR,2022,1.715156
198,Rodney Adams,WR,2022,1.373369
336,Alex Bachman,WR,2022,1.273622


In [95]:
QB_replacement = pred_df.loc[pred_df.position == 'QB',['preds']].sort_values('preds', ascending=False).iloc[10,0]
RB_replacement = pred_df.loc[pred_df.position == 'RB',['preds']].sort_values('preds', ascending=False).iloc[30,0]
WR_replacement = pred_df.loc[pred_df.position == 'WR',['preds']].sort_values('preds', ascending=False).iloc[30,0]
TE_replacement = pred_df.loc[pred_df.position == 'TE',['preds']].sort_values('preds', ascending=False).iloc[10,0]

In [96]:
QB_replacement, RB_replacement, WR_replacement, TE_replacement

(18.569784, 10.024582, 11.775611, 8.7368)

In [97]:
pred_df.loc[pred_df.position == 'QB','VBD'] = pred_df.loc[pred_df.position == 'QB','preds'] - QB_replacement
pred_df.loc[pred_df.position == 'RB','VBD'] = pred_df.loc[pred_df.position == 'RB','preds'] - RB_replacement
pred_df.loc[pred_df.position == 'WR','VBD'] = pred_df.loc[pred_df.position == 'WR','preds'] - WR_replacement
pred_df.loc[pred_df.position == 'TE','VBD'] = pred_df.loc[pred_df.position == 'TE','preds'] - TE_replacement

In [98]:
pred_df.loc[:,['display_name','position','season','preds','VBD']].sort_values('VBD', ascending=False)

Unnamed: 0,display_name,position,season,preds,VBD
134,Christian McCaffrey,RB,2022,17.253036,7.228455
188,Cooper Kupp,WR,2022,18.944038,7.168427
390,Jonathan Taylor,RB,2022,16.425636,6.401054
105,Derrick Henry,RB,2022,15.941985,5.917403
187,Alvin Kamara,RB,2022,15.910965,5.886383
...,...,...,...,...,...
490,Sam Ehlinger,QB,2022,1.856993,-16.712791
482,Feleipe Franks,QB,2022,1.589596,-16.980188
206,Kurt Benkert,QB,2022,1.579033,-16.990751
238,Logan Woodside,QB,2022,1.442193,-17.127592
