In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor

pd.set_option('display.max_columns', None)

In [59]:
rb_train_orig = pd.read_csv('../../data/final/rb_train_updated_2024.csv')
rb_test_orig = pd.read_csv('../../data/final/rb_test_updated_2024.csv')

In [60]:
### filtering for season data year prior to contract

rb_train = rb_train_orig.sort_values('season').groupby(['player_display_name', 'year_signed']).tail(1)
rb_test = rb_test_orig.sort_values('season').groupby(['player_display_name', 'year_signed']).tail(1)

rb_train = rb_train.dropna()
rb_test = rb_test.dropna()

In [65]:
preds = [col for col in rb_train.columns if col not in ['season', 'player_display_name', 'salary_per_year', 'smoothed_salary_per_year', 'year_signed', 'mean_adj_salary_per_year', 'med_adj_salary_per_year', 'passing_epa']]
rb_train = rb_train.groupby(['player_display_name', 'year_signed'])[preds].mean().reset_index()
rb_test = rb_test.groupby(['player_display_name', 'year_signed'])[preds].mean().reset_index()

rb_train = rb_train.merge(
    rb_train_orig[['player_display_name', 'year_signed', 'salary_per_year', 'smoothed_salary_per_year']],
    on=['player_display_name', 'year_signed'],
    how='right'
)

rb_test = rb_test.merge(
    rb_test_orig[['player_display_name', 'year_signed', 'salary_per_year', 'smoothed_salary_per_year']],
    on=['player_display_name', 'year_signed'],
    how='right'
)

rb_test = rb_test.drop_duplicates(subset=['player_display_name', 'year_signed'])
rb_train = rb_train.drop_duplicates(subset=['player_display_name', 'year_signed'])

In [68]:
rb_train = rb_train.dropna()
rb_test = rb_test.dropna()

In [69]:
print(rb_train.shape)
print(rb_test.shape)

(635, 19)
(69, 19)


In [70]:
preds = [col for col in rb_train.columns if col not in ['season', 'player_display_name', 'salary_per_year', 'smoothed_salary_per_year']]

X = rb_train[preds]
y = rb_train['salary_per_year']

gbm = GradientBoostingRegressor(n_estimators=200, learning_rate=.05,
                                max_depth=2, random_state = 40, subsample = .8, min_samples_leaf = 5, min_samples_split = 5)
gbm.fit(X, y)

In [71]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

X_test = rb_test[preds]
X_test = X_test.fillna(0)

y_test = rb_test['salary_per_year'].values.ravel()
y_pred = gbm.predict(X_test)
y_train_pred = gbm.predict(X)

mse_train = mean_squared_error(y, y_train_pred)
mae_train = mean_absolute_error(y, y_train_pred)
r2_train = r2_score(y, y_train_pred)

mse_test = mean_squared_error(y_test, y_pred)
mae_test = mean_absolute_error(y_test, y_pred)
r2_test = r2_score(y_test, y_pred)

print("Training Set:")
print(f"  MSE: {mse_train:.4f}")
print(f"  MAE: {mae_train:.4f}")
print(f"  R²:  {r2_train:.4f}\n")

print("Test Set:")
print(f"  MSE: {mse_test:.4f}")
print(f"  MAE: {mae_test:.4f}")
print(f"  R²:  {r2_test:.4f}")

Training Set:
  MSE: 1.5018
  MAE: 0.7771
  R²:  0.8000

Test Set:
  MSE: 4.4852
  MAE: 1.2282
  R²:  0.6765


In [72]:
rb_test.shape

(69, 19)

In [75]:
rb_test['predicted_salary'] = y_pred
rb_test[(rb_test.predicted_salary - rb_test.salary_per_year) > 1]

Unnamed: 0,player_display_name,year_signed,games,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,receptions,targets,receiving_yards_after_catch,receiving_first_downs,receiving_epa,target_share,fantasy_points,salary_per_year,smoothed_salary_per_year,predicted_salary
4,J.K. Dobbins,2024.0,1.0,8.0,22.0,1.0,0.0,0.0,1.0,-1.840595,2.0,3.0,15.0,0.0,0.5192,0.142857,9.7,1.61,0.91506,3.353612
62,Raheem Mostert,2024.0,16.0,217.0,1045.0,18.0,2.0,1.0,61.0,16.812971,26.0,34.0,216.0,6.0,-3.754444,0.070833,245.7,4.13,4.192311,7.037725
65,Ezekiel Elliott,2024.0,17.0,184.0,642.0,3.0,1.0,0.0,33.0,-27.282745,51.0,65.0,374.0,12.0,-10.1075,0.138004,123.5,2.0,1.422253,3.668581
69,Samaje Perine,2024.0,17.0,53.0,238.0,1.0,1.0,1.0,14.0,-9.491748,50.0,56.0,462.0,23.0,12.130848,0.1174,71.3,1.5,0.772005,3.053163
79,Gus Edwards,2024.0,19.0,211.0,870.0,13.0,2.0,2.0,53.0,-11.409124,14.0,16.0,160.0,6.0,8.979267,0.055363,182.5,3.25,3.047874,4.334043
122,Deon Jackson,2024.0,2.0,14.0,16.0,0.0,1.0,1.0,1.0,-11.443214,5.0,6.0,20.0,0.0,-8.510521,0.153846,-1.0,1.055,0.193284,2.812002


In [80]:
rb_test[(rb_test.salary_per_year - rb_test.predicted_salary) > 4]

Unnamed: 0,player_display_name,year_signed,games,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,receptions,targets,receiving_yards_after_catch,receiving_first_downs,receiving_epa,target_share,fantasy_points,salary_per_year,smoothed_salary_per_year,predicted_salary
3,Alvin Kamara,2024.0,13.0,180.0,694.0,5.0,0.0,0.0,42.0,-3.14502,75.0,86.0,501.0,23.0,2.29824,0.189845,158.0,12.25,14.752343,6.164464
28,Chuba Hubbard,2024.0,17.0,238.0,902.0,5.0,0.0,0.0,59.0,-21.515964,39.0,44.0,269.0,9.0,-10.740265,0.086614,143.5,8.3,9.615382,3.23636
32,Rhamondre Stevenson,2024.0,12.0,156.0,619.0,4.0,1.0,1.0,36.0,-12.854979,38.0,51.0,261.0,8.0,-18.016631,0.1275,107.7,9.0,10.525729,3.650834
36,James Conner,2024.0,13.0,208.0,1040.0,7.0,0.0,0.0,57.0,13.943058,27.0,33.0,221.0,11.0,4.219234,0.09375,174.5,9.5,11.175977,5.237823
88,Saquon Barkley,2024.0,14.0,247.0,962.0,6.0,2.0,2.0,51.0,-50.373962,41.0,60.0,226.0,16.0,-6.571396,0.153453,182.2,12.583333,15.185841,5.063215
98,Josh Jacobs,2024.0,13.0,233.0,805.0,6.0,2.0,1.0,34.0,-44.319292,37.0,54.0,293.0,15.0,-0.396549,0.141732,144.1,12.0,14.427219,4.843614


In [81]:
importances = gbm.feature_importances_
feature_names = X.columns
indices = np.argsort(importances)[::-1]

print("Feature ranking:")
for i in range(len(importances)):
    print(f"{i + 1}. {feature_names[indices[i]]}: {importances[indices[i]]:.4f}")

Feature ranking:
1. fantasy_points: 0.4310
2. rushing_yards: 0.1686
3. target_share: 0.1254
4. receiving_yards_after_catch: 0.0820
5. year_signed: 0.0574
6. receiving_epa: 0.0264
7. rushing_epa: 0.0228
8. receptions: 0.0194
9. targets: 0.0163
10. carries: 0.0153
11. receiving_first_downs: 0.0106
12. games: 0.0084
13. rushing_tds: 0.0080
14. rushing_fumbles: 0.0041
15. rushing_fumbles_lost: 0.0023
16. rushing_first_downs: 0.0021


In [82]:
rb_test.to_csv('../../data/dashboard/rb.csv')