In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor

In [2]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Player,Season,Passing Yds,Passing Tds,Rushing Yds,Rushing Tds,Receiving Yds,Receiving Tds,VBD
0,Ron Johnson,1970,0,0,1027,8,48,5,135
1,Gene Washington,1970,0,0,0,0,44,1,33
2,MacArthur Lane,1970,0,0,977,11,32,2,126
3,Warren Wells,1970,0,0,34,0,43,0,112
4,John Brodie,1970,2941,24,29,2,0,1,105


In [3]:
aggregated = df.groupby("Player").agg({
    'Passing Yds': 'sum',
    'Passing Tds': 'sum',
    'Rushing Yds': 'sum',
    'Rushing Tds': 'sum',
    'Receiving Yds': 'sum',
    'Receiving Tds': 'sum',
    'VBD': 'mean'  # Keep target (VBD) as the mean for regression
}).reset_index()

In [4]:
# Features to normalize
features = ['Passing Yds', 'Passing Tds', 'Rushing Yds', 'Rushing Tds', 'Receiving Yds', 'Receiving Tds']

scaler = MinMaxScaler()
aggregated[features] = scaler.fit_transform(aggregated[features])

In [5]:
X = aggregated.drop(['Player', 'VBD'], axis=1)
y = aggregated['VBD']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [7]:
y_pred = model.predict(X_test).flatten()

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")

MSE: 33.53
RMSE: 5.79
MAE: 2.06


In [8]:
aggregated["XGB Predicted Score"] = model.predict(X).round(2)

top10 = aggregated[['Player', 'XGB Predicted Score', 'VBD']].sort_values('XGB Predicted Score', ascending=False).head(10)
print('\nTop 10 Players by Predicted Score:')
print(top10)


Top 10 Players by Predicted Score:
                   Player  XGB Predicted Score         VBD
372         Barry Sanders           120.209999  120.700000
6643        Walter Payton            98.489998   98.846154
4450       Marshall Faulk            95.919998   96.083333
2172         Emmitt Smith            87.260002   86.933333
4024  LaDainian Tomlinson            84.070000  118.181818
6250          Todd Gurley            83.480003   85.166667
3576           Josh Allen            83.309998   84.857143
2788         Jahmyr Gibbs            83.120003   91.500000
6664         Warren Wells            81.930000  112.000000
1809        Derrick Henry            81.860001   82.222222


In [9]:
aggregated.to_csv('xgboost.csv', index=False, columns=['Player', 'XGB Predicted Score'])