In [1]:
import matplotlib.pyplot as plt
from modeling.evaluate import Evaluate
import xgboost as xgb
import warnings
import pandas as pd
warnings.simplefilter(action="ignore")
plt.style.use("seaborn-v0_8-darkgrid")


In [2]:
df = pd.read_csv('database/DIM_PLAYER_GAMES.csv')
df_dfns = pd.read_csv('database/DIM_DEFENSE.csv').drop(columns='Unnamed: 0')

In [3]:
replace_dict = {
    "RAV": "BAL",
    "RAM": "LAR",
    "OTI": "TEN",
    "HTX": "HOU",
    "SDG": "LAC",
    "RAI": "LVR",
    "CRD": "ARI",
    "CLT": "IND",
}

In [4]:
df_dfns.replace(replace_dict, inplace=True)

In [5]:
df_dfns.columns

Index(['DEF_TEAM', 'YEAR', 'WEEK', 'SCORE_OPP', 'PASSING_CMP', 'PASSING_ATT',
       'PASSING_YDS', 'PASSING_TD', 'PASSING_INT', 'PASSING_SK', 'PASSING_Y_A',
       'PASSING_NY_A', 'PASSING_CMP_PCT', 'PASSING_RATE', 'RUSHING_ATT',
       'RUSHING_YDS', 'RUSHING_Y_A', 'RUSHING_TD'],
      dtype='object')

In [6]:
df_dfns_yearly = df_dfns.groupby(['DEF_TEAM', 'YEAR'])['SCORE_OPP', 'PASSING_CMP', 'PASSING_ATT',
       'PASSING_YDS', 'PASSING_TD', 'PASSING_INT', 'PASSING_SK', 'PASSING_Y_A',
       'PASSING_NY_A', 'PASSING_CMP_PCT', 'PASSING_RATE', 'RUSHING_ATT',
       'RUSHING_YDS', 'RUSHING_Y_A', 'RUSHING_TD'].mean().reset_index()


In [7]:
df_dfns_yearly.columns = [f"{col}_DEFST" if col not in ['DEF_TEAM', 'YEAR'] else col for col in df_dfns_yearly.columns]
df_dfns_yearly['JOIN_YEAR'] = df_dfns_yearly['YEAR'] + 1

In [8]:
df_dfns_yearly['QB_WR_PTS_DEFST'] = df_dfns_yearly['PASSING_TD_DEFST']*6+df_dfns_yearly['PASSING_YDS_DEFST']/10+df_dfns_yearly['PASSING_CMP_DEFST']
df_dfns_yearly['RB_PTS_DEFST'] = df_dfns_yearly['RUSHING_TD_DEFST']*6+df_dfns_yearly['RUSHING_YDS_DEFST']/10

In [9]:
df_dfns_yearly['DEF_RANK'] = df_dfns_yearly.groupby(['YEAR'])['SCORE_OPP_DEFST'].rank(method='min')
df_dfns_yearly['QB_WR_DEF_RANK'] = df_dfns_yearly.groupby(['YEAR'])['QB_WR_PTS_DEFST'].rank(method='min')
df_dfns_yearly['RB_DEF_RANK'] = df_dfns_yearly.groupby(['YEAR'])['RB_PTS_DEFST'].rank(method='min')

In [10]:
df_dfns_yearly.columns

Index(['DEF_TEAM', 'YEAR', 'SCORE_OPP_DEFST', 'PASSING_CMP_DEFST',
       'PASSING_ATT_DEFST', 'PASSING_YDS_DEFST', 'PASSING_TD_DEFST',
       'PASSING_INT_DEFST', 'PASSING_SK_DEFST', 'PASSING_Y_A_DEFST',
       'PASSING_NY_A_DEFST', 'PASSING_CMP_PCT_DEFST', 'PASSING_RATE_DEFST',
       'RUSHING_ATT_DEFST', 'RUSHING_YDS_DEFST', 'RUSHING_Y_A_DEFST',
       'RUSHING_TD_DEFST', 'JOIN_YEAR', 'QB_WR_PTS_DEFST', 'RB_PTS_DEFST',
       'DEF_RANK', 'QB_WR_DEF_RANK', 'RB_DEF_RANK'],
      dtype='object')

In [11]:
df_comb = pd.merge(df, df_dfns_yearly.drop(columns=['YEAR']), how='inner', left_on=['YEAR', 'OPPONENT'], right_on=['JOIN_YEAR', 'DEF_TEAM']).sort_values(by=['PLAYER', 'DATE'])

In [61]:
top_180_eval = [
    Evaluate(
        df=df_comb,
        player=name,
        steps=17,
        regressor=xgb.XGBRFRegressor(random_state=123),
        lags=9,
        exogs=["QB_WR_DEF_RANK", "RB_DEF_RANK", "DEF_RANK"],
    ).eval()
    for name in list(set(df["PLAYER"]))
]

df_top_180 = pd.concat(top_180_eval)

Not enough data. Adjusting lags for Sam LaPorta.
Not enough data. Adjusting steps for Jahan Dotson.
Not enough data. Adjusting lags for Zay Flowers.
Not enough data. Adjusting lags for Aidan O'Connell.
Not enough data. Adjusting steps for Christian Watson.
Not enough data. Adjusting steps for Ty Chandler.
Not enough data. Adjusting lags for Puka Nacua.
Not enough data. Adjusting steps for Isaiah Likely.
Not enough data. Adjusting steps for Brian Robinson Jr..
Not enough data. Adjusting lags for Josh Downs.
Not enough data. Adjusting lags for Jaleel McLaughlin.
Not enough data. Adjusting steps for Desmond Ridder.
Not enough data. Adjusting lags for Tommy DeVito.
Not enough data. Adjusting lags for Anthony Richardson.
Not enough data. Adjusting steps for Nick Mullens.
Not enough data. Adjusting steps for Jordan Love.
Not enough data. Adjusting lags for Rashee Rice.
Not enough data. Adjusting steps for Drake London.
Not enough data. Adjusting lags for Michael Wilson.
Not enough data. Adju

In [62]:
df_top_180.sort_values(by='ACTUAL SEASON TOTAL', ascending=False)

Unnamed: 0,PLAYER,ACTUAL SEASON TOTAL,PREDICTED,SEASON MAPE,GAME MAPE,GAME RSME,GAMES PREDICTED,LAGS,REGRESSOR
0,Josh Allen,430.64,433.622313,0.006925,2.257320e-01,7.065988,17,9,"XGBRFRegressor(base_score=None, booster=None, ..."
0,Christian McCaffrey,412.20,360.181196,0.126198,2.426095e-01,9.532918,17,9,"XGBRFRegressor(base_score=None, booster=None, ..."
0,CeeDee Lamb,405.20,243.686300,0.398602,4.525395e-01,15.165559,17,9,"XGBRFRegressor(base_score=None, booster=None, ..."
0,Jalen Hurts,396.82,404.776775,0.020051,7.228299e-01,7.899268,17,9,"XGBRFRegressor(base_score=None, booster=None, ..."
0,Tyreek Hill,382.70,370.771509,0.031169,8.294861e-01,14.149188,17,9,"XGBRFRegressor(base_score=None, booster=None, ..."
...,...,...,...,...,...,...,...,...,...
0,Michael Wilson,35.40,38.496643,0.087476,1.442219e+16,6.686770,5,5,"XGBRFRegressor(base_score=None, booster=None, ..."
0,Jaleel McLaughlin,33.00,33.517697,0.015688,2.289359e+00,3.554813,7,7,"XGBRFRegressor(base_score=None, booster=None, ..."
0,Easton Stick,29.92,36.209267,0.210203,2.630472e-01,4.387746,2,2,"XGBRFRegressor(base_score=None, booster=None, ..."
0,Tommy DeVito,13.38,57.086472,3.266552,2.856607e+16,15.258107,3,3,"XGBRFRegressor(base_score=None, booster=None, ..."


In [63]:
np.mean(df_top_180['SEASON MAPE']), np.mean(df_top_180['GAME RSME']), np.mean(df_top_180['GAME MAPE'])

(0.32316806051163754, 8.55091955412107, 1338069591844644.0)

In [64]:
np.sqrt(mean_squared_error(df_top_180['ACTUAL SEASON TOTAL'], df_top_180['PREDICTED']))

60.29954432909588

In [46]:
# np.sqrt(mean_squared_error(df_top_180['ACTUAL SEASON TOTAL'], df_top_180['PREDICTED']))

In [47]:
# top_180_pred = [
# Predict(
#     df=df, player=i, steps=17, regressor=GradientBoostingRegressor, lags=9
# ).predict_season() for i in list(set(df['PLAYER']))
# ]

# clean_data = [row for row in top_180_pred if row is not None]

In [48]:
# df_preds = pd.DataFrame(clean_data, columns=['PLAYER', 'PROJECTED POINTS'])

In [49]:
# df_2023 = df.loc[df['YEAR']==2023]
# df_players_2023 = df_2023.groupby(['PLAYER', 'TEAM'])['PPR'].sum().reset_index()
# df_players_2023.rename(columns={'PPR':'PPR_2023'}, inplace=True)

In [50]:
# df_adp = pd.read_csv('ADP_2024.csv')
# df_adp['Position'] = df_adp['Consensus Pos'].str[:2]
# df_adp['Target Round'] = (df_adp['Consensus'] // 10).astype(int) + 1

In [51]:
# df_merge1 = pd.merge(df_players_2023, df_preds,  how='inner', on='PLAYER')

In [52]:
# df_final = pd.merge(df_merge1, df_adp[['Player', 'Position', 'Consensus', 'Target Round']], left_on='PLAYER', right_on='Player', how='inner').drop(columns=['Player'])

In [53]:
# ranked_values = {
#     'QB': df_final[df_final['Position'] == 'QB'].nlargest(10, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'QB']) >= 10 else df_final[df_final['Position'] == 'QB']['PROJECTED POINTS'].min(),
#     'TE': df_final[df_final['Position'] == 'TE'].nlargest(10, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'TE']) >= 10 else df_final[df_final['Position'] == 'TE']['PROJECTED POINTS'].min(),
#     'WR': df_final[df_final['Position'] == 'WR'].nlargest(30, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'WR']) >= 30 else df_final[df_final['Position'] == 'WR']['PROJECTED POINTS'].min(),
#     'RB': df_final[df_final['Position'] == 'RB'].nlargest(20, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'RB']) >= 20 else df_final[df_final['Position'] == 'RB']['PPROJECTED POINTS'].min()
# }

# # Print the ranked PPR values for debugging
# print("Ranked PPR values:")
# print(ranked_values)

In [54]:
# def VBD(row):
#     category = row['Position']
#     ranked_ppr = ranked_values.get(category, 0) 
#     return row['PROJECTED POINTS'] - ranked_ppr

# # Apply the function to each row
# df_final['VBD'] = df_final.apply(VBD, axis=1)


In [55]:
# df_final['My Ranking'] = df_final['VBD'].rank(ascending=False, method='min').astype(int)
# df_final.sort_values(by='VBD', ascending=False).head(25)

In [56]:
# df_final.to_csv('Draft Strategy 2024.csv')