In [330]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [331]:
stats = pd.read_csv("mvp-stats.csv")

del stats["Unnamed: 0"]

stats

Unnamed: 0,Player,Tm,Pos,G,GS,Cmp,PassAtt,PassYds,PassTD,Int,...,L,W-L%,PF,PA,PD,MoV,SoS,SRS,OSRS,DSRS
0,Chris Greisen,ARI,QB,3,0,6.0,10.0,65.0,1.0,0.0,...,13,0.188,210,443,-233,-14.6,-0.7,-15.2,-7.2,-8.1
1,Clarence Williams,ARI,RB,2,0,0.0,0.0,0.0,0.0,0.0,...,13,0.188,210,443,-233,-14.6,-0.7,-15.2,-7.2,-8.1
2,Dave Brown,ARI,QB,6,2,40.0,69.0,467.0,2.0,3.0,...,13,0.188,210,443,-233,-14.6,-0.7,-15.2,-7.2,-8.1
3,Jake Plummer,ARI,QB,14,14,270.0,475.0,2946.0,13.0,21.0,...,13,0.188,210,443,-233,-14.6,-0.7,-15.2,-7.2,-8.1
4,Michael Pittman,ARI,RB,16,12,0.0,0.0,0.0,0.0,0.0,...,13,0.188,210,443,-233,-14.6,-0.7,-15.2,-7.2,-8.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4972,Derrius Guice,WAS,RB,5,1,0.0,0.0,0.0,0.0,0.0,...,13,0.188,266,435,-169,-10.6,-0.2,-10.8,-6.3,-4.5
4973,Dwayne Haskins,WAS,QB,9,7,119.0,203.0,1365.0,7.0,7.0,...,13,0.188,266,435,-169,-10.6,-0.2,-10.8,-6.3,-4.5
4974,Josh Ferguson,WAS,RB,2,0,0.0,0.0,0.0,0.0,0.0,...,13,0.188,266,435,-169,-10.6,-0.2,-10.8,-6.3,-4.5
4975,Michael Burton,WAS,RB,10,2,0.0,0.0,0.0,0.0,0.0,...,13,0.188,266,435,-169,-10.6,-0.2,-10.8,-6.3,-4.5


In [332]:
predictors = ['G', 'GS', 'Cmp', 'PassAtt', 'PassYds', 'PassTD',
       'Int', 'Year', 'Rec', 'RecYds', 'RecTD', 'RushAtt', 'RushYds',
       'RushTD', 'W', 'L','W-L%', 'PF', 'PA', 'PD', 
        'MoV', 'SoS', 'SRS', 'OSRS', 'DSRS']

In [333]:
train = stats[stats["Year"] < 2023]
test = stats[stats["Year"] == 2023]

In [334]:
reg = Ridge(alpha=.1)

reg.fit(train[predictors], train["Share"])

In [335]:
predictions = reg.predict(test[predictors])
predictions = pd.DataFrame(predictions, columns=["prediction"], index=test.index)

In [336]:
predictions

Unnamed: 0,prediction
162,-0.665142
163,0.066089
164,0.055084
165,1.497143
166,-0.482342
...,...
4838,0.375907
4839,0.320824
4840,2.327704
4841,0.417487


In [337]:
combination = pd.concat([test[["Player", "Share"]], predictions], axis=1)
combination

Unnamed: 0,Player,Share,prediction
162,Clayton Tune,0.0,-0.665142
163,Damien Williams,0.0,0.066089
164,Emari Demercado,0.0,0.055084
165,James Conner,0.0,1.497143
166,Keaontay Ingram,0.0,-0.482342
...,...,...,...
4838,Chris Rodriguez,0.0,0.375907
4839,Derrick Gore,0.0,0.320824
4840,Jacoby Brissett,0.0,2.327704
4841,Jonathan Williams,0.0,0.417487


In [338]:
combination.sort_values("Share", ascending=False).head(10)

Unnamed: 0,Player,Share,prediction
468,Lamar Jackson,98.6,7.01055
619,Josh Allen,16.0,8.139226
162,Clayton Tune,0.0,-0.665142
2928,Alexander Mattison,0.0,-0.996447
2930,Joshua Dobbs,0.0,-1.756745
2931,Kene Nwangwu,0.0,-0.31399
2932,Kirk Cousins,0.0,5.639802
2933,Nick Mullens,0.0,-0.466771
2934,Ty Chandler,0.0,-0.285612
3091,Bailey Zappe,0.0,-2.383032


In [339]:
mean_squared_error(combination["Share"], combination["prediction"])

45.212153876118045

In [340]:
combination["Share"].value_counts()

Share
0.0     209
98.6      1
16.0      1
Name: count, dtype: int64

In [341]:
combination = combination.sort_values("Share", ascending=False)
combination["Rk"] = list(range(1, combination.shape[0] + 1))

combination.head(10)

Unnamed: 0,Player,Share,prediction,Rk
468,Lamar Jackson,98.6,7.01055,1
619,Josh Allen,16.0,8.139226,2
162,Clayton Tune,0.0,-0.665142,3
2928,Alexander Mattison,0.0,-0.996447,4
2930,Joshua Dobbs,0.0,-1.756745,5
2931,Kene Nwangwu,0.0,-0.31399,6
2932,Kirk Cousins,0.0,5.639802,7
2933,Nick Mullens,0.0,-0.466771,8
2934,Ty Chandler,0.0,-0.285612,9
3091,Bailey Zappe,0.0,-2.383032,10


In [372]:
# combination = combination.sort_values("prediction", ascending=False)
combination["Predicted_Rk"] = list(range(1, combination.shape[0] + 1))
combination = combination.sort_values("Predicted_Rk", ascending=True)

combination.head(10)

Unnamed: 0,Player,Share,prediction,Predicted_Rk
1370,Dak Prescott,0.0,12.510477,1
1866,Jordan Love,0.0,10.679482,2
4258,Brock Purdy,0.0,10.365428,3
1534,Russell Wilson,0.0,9.660628,4
619,Josh Allen,16.0,8.139226,5
468,Lamar Jackson,98.6,7.01055,6
1701,Jared Goff,0.0,6.800205,7
4666,Baker Mayfield,0.0,6.212856,8
2932,Kirk Cousins,0.0,5.639802,9
2783,Tua Tagovailoa,0.0,5.212639,10


In [343]:
def find_ap(combination):
    actual = combination.sort_values("Share", ascending=False).head(5)
    predicted = combination.sort_values("prediction", ascending=False)
    ps = []
    found = 0
    seen = 1

    for index, row in predicted.iterrows():
        if row["Player"] in actual["Player"].values:
            found += 1
            ps.append(found/seen)
        seen += 1
    return sum(ps) / len(ps)

In [344]:
find_ap(combination)

0.39268507402961184

In [345]:
years = list(range(2000, 2024))
aps = []
all_predictions = []

for year in years[5:]:
    train = stats[stats["Year"] < year]
    test = stats[stats["Year"] == year]
    reg.fit(train[predictors], train["Share"])

    predictions = reg.predict(test[predictors])
    predictions = pd.DataFrame(predictions, columns=["prediction"], index=test.index)
    combination = pd.concat([test[["Player", "Share"]], predictions], axis=1)

    all_predictions.append(combination)
    aps.append(find_ap(combination))

In [346]:
sum(aps) / len(aps)

0.42734465320310894

In [347]:
def add_ranks(combination):
    combination = combination.sort_values("Share", ascending=False)
    combination["Rk"] = list(range(1, combination.shape[0] + 1))

    combination = combination.sort_values("prediction", ascending=False)
    combination["Predicted_Rk"] = list(range(1, combination.shape[0] + 1))

    combination["Diff"] = combination["Rk"] - combination["Predicted_Rk"]
    return combination

In [348]:
ranking = add_ranks(all_predictions[1])
ranking[ranking["Rk"] < 6].sort_values("Diff", ascending=False)

Unnamed: 0,Player,Share,prediction,Rk,Predicted_Rk,Diff
2046,Peyton Manning,4.0,11.34702,3,1,2
4058,LaDainian Tomlinson,88.0,8.269595,1,2,-1
3135,Drew Brees,8.0,5.684863,2,4,-2
3291,Brandon Jacobs,0.0,1.134436,5,39,-34
36,Edgerrin James,0.0,-2.217257,4,189,-185


In [349]:
def backtest(stats, model, year, predictors):
    aps = []
    all_predictions = []
    
    for year in years[5:]:
        train = stats[stats["Year"] < year]
        test = stats[stats["Year"] == year]
        model.fit(train[predictors], train["Share"])
    
        predictions = reg.predict(test[predictors])
        predictions = pd.DataFrame(predictions, columns=["prediction"], index=test.index)
        combination = pd.concat([test[["Player", "Share"]], predictions], axis=1)
        combination = add_ranks(combination)
    
        all_predictions.append(combination)
        aps.append(find_ap(combination))
    return sum(aps)/len(aps), aps, pd.concat(all_predictions)

In [350]:
mean_ap, aps, all_predictions = backtest(stats, reg, years[5:], predictors)

In [351]:
mean_ap

0.42142504916278495

In [352]:
all_predictions[all_predictions["Rk"] < 5].sort_values("Diff").head(10)

Unnamed: 0,Player,Share,prediction,Rk,Predicted_Rk,Diff
153,Colt McCoy,0.0,-3.333488,4,217,-213
3482,Mark Sanchez,0.0,-5.441077,4,212,-208
36,Edgerrin James,0.0,-2.217257,4,189,-185
130,Chase Edmonds,0.0,-0.959946,3,186,-183
2928,Alexander Mattison,0.0,-0.996447,4,184,-180
62,Beanie Wells,0.0,-1.161514,2,180,-178
3369,Elijhaa Penny,0.0,-0.766928,4,177,-173
162,Clayton Tune,0.0,-0.665142,3,169,-166
3241,Ian Book,0.0,-0.877027,4,164,-160
840,Adrian Peterson,6.0,-0.990701,4,162,-158


In [364]:
pd.concat([pd.Series(reg.coef_), pd.Series(predictors)], axis=1).sort_values(0, ascending=False)

Unnamed: 0,0,1
22,1.306712,SRS
5,0.911455,PassTD
14,0.681173,W
13,0.246558,RushTD
10,0.219095,RecTD
23,0.035483,OSRS
1,0.01668,GS
18,0.005072,PA
12,0.003692,RushYds
9,-0.000569,RecYds


In [365]:
stats["NPos"] = stats["Pos"].astype("category").cat.codes

In [366]:
rf = RandomForestRegressor(n_estimators=50, random_state=1, min_samples_split=5)

mean_ap, aps, all_predictions = backtest(stats, rf, years[20:], predictors)

In [367]:
mean_ap

0.42467304517073334

In [368]:
mean_ap, aps, all_predictions = backtest(stats, reg, years[20:], predictors)

In [369]:
mean_ap

0.42142504916278495