In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [2]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,Player,Season,Passing Yds,Passing Tds,Rushing Yds,Rushing Tds,Receiving Yds,Receiving Tds,VBD
0,Ron Johnson,1970,0,0,1027,8,48,5,135
1,Gene Washington,1970,0,0,0,0,44,1,33
2,MacArthur Lane,1970,0,0,977,11,32,2,126
3,Warren Wells,1970,0,0,34,0,43,0,112
4,John Brodie,1970,2941,24,29,2,0,1,105


In [3]:
aggregated = df.groupby("Player").agg({
    'Passing Yds': 'sum',
    'Passing Tds': 'sum',
    'Rushing Yds': 'sum',
    'Rushing Tds': 'sum',
    'Receiving Yds': 'sum',
    'Receiving Tds': 'sum',
    'VBD': 'mean'  # Keep target (VBD) as the mean for regression
}).reset_index()

In [4]:
# Features to normalize
features = ['Passing Yds', 'Passing Tds', 'Rushing Yds', 'Rushing Tds', 'Receiving Yds', 'Receiving Tds']

scaler = MinMaxScaler()
aggregated[features] = scaler.fit_transform(aggregated[features])

In [5]:
X = aggregated.drop(['Player', 'VBD'], axis=1)
y = aggregated['VBD']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
lr = LinearRegression()
lr.fit(X_train, y_train)

ridge = Ridge(alpha=0.001)
ridge.fit(X_train, y_train)

lasso = Lasso(alpha=0.001)
lasso.fit(X_train, y_train)

In [7]:
def evaluate(model, y_pred):
    print(f'{model}:')
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"MAE: {mae:.2f}\n")

In [8]:
lr_pred = lr.predict(X_test)
ridge_pred = ridge.predict(X_test)
lasso_pred = lasso.predict(X_test)

evaluate('Linear Regression', lr_pred)
evaluate('Ridge', ridge_pred)
evaluate('Lasso', lasso_pred)

Linear Regression:
MSE: 35.29
RMSE: 5.94
MAE: 2.80

Ridge:
MSE: 35.30
RMSE: 5.94
MAE: 2.80

Lasso:
MSE: 35.75
RMSE: 5.98
MAE: 2.79



In [9]:
aggregated["LR Predicted_Score"] = lr.predict(X)
aggregated["Ridge Predicted_Score"] = ridge.predict(X)
aggregated["Lasso Predicted_Score"] = lasso.predict(X)

In [10]:
top10 = aggregated[['Player', 'LR Predicted_Score', 'VBD']].sort_values('LR Predicted_Score', ascending=False).head(10)
print('\nTop 10 Players by LR Predicted Score:')
print(top10)


Top 10 Players by LR Predicted Score:
                   Player  LR Predicted_Score         VBD
2172         Emmitt Smith           90.573333   86.933333
4024  LaDainian Tomlinson           84.421497  118.181818
4337         Marcus Allen           74.815341   54.937500
6643        Walter Payton           72.718902   98.846154
4450       Marshall Faulk           66.267371   96.083333
31          Aaron Rodgers           60.920933   48.600000
3446         John Riggins           59.702375   33.571429
2282        Franco Harris           58.649542   45.461538
372         Barry Sanders           58.138191  120.700000
2185       Eric Dickerson           57.322008   74.454545


In [11]:
top10 = aggregated[['Player', 'Ridge Predicted_Score', 'VBD']].sort_values('Ridge Predicted_Score', ascending=False).head(10)
print('\nTop 10 Players by Ridge Predicted Score:')
print(top10)


Top 10 Players by Ridge Predicted Score:
                   Player  Ridge Predicted_Score         VBD
2172         Emmitt Smith              90.568023   86.933333
4024  LaDainian Tomlinson              84.397720  118.181818
4337         Marcus Allen              74.791537   54.937500
6643        Walter Payton              72.693830   98.846154
4450       Marshall Faulk              66.270856   96.083333
31          Aaron Rodgers              60.571692   48.600000
3446         John Riggins              59.690415   33.571429
2282        Franco Harris              58.637083   45.461538
372         Barry Sanders              58.144311  120.700000
2185       Eric Dickerson              57.311120   74.454545


In [12]:
top10 = aggregated[['Player', 'Lasso Predicted_Score', 'VBD']].sort_values('Lasso Predicted_Score', ascending=False).head(10)
print('\nTop 10 Players by Lasso Predicted Score:')
print(top10)


Top 10 Players by Lasso Predicted Score:
                   Player  Lasso Predicted_Score         VBD
2172         Emmitt Smith              90.374760   86.933333
4024  LaDainian Tomlinson              83.704765  118.181818
4337         Marcus Allen              74.028314   54.937500
6643        Walter Payton              71.657299   98.846154
4450       Marshall Faulk              66.327166   96.083333
3446         John Riggins              59.259786   33.571429
372         Barry Sanders              58.183526  120.700000
2282        Franco Harris              58.087490   45.461538
2185       Eric Dickerson              56.781386   74.454545
1809        Derrick Henry              54.820736   82.222222
