In [1]:
import matplotlib.pyplot as plt
from modeling.evaluate import Evaluate
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.preprocessing import MinMaxScaler
import warnings
import pandas as pd
import numpy as np
warnings.simplefilter(action="ignore")
plt.style.use("seaborn-v0_8-darkgrid")
pd.set_option('display.max_columns', 50)


In [2]:
df = pd.read_csv('database/DIM_PLAYER_GAMES.csv')
df_dfns = pd.read_csv('database/DIM_DEFENSE.csv').drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])

In [3]:
df.columns

Index(['GAME_ORDER', 'YEAR', 'DATE', 'GAME_NUMBER', 'WEEK', 'PLAYER', 'AGE',
       'TEAM', 'OPPONENT', 'RESULT', 'RUSHING_ATT', 'RUSHING_YDS',
       'RUSHING_TD', 'RUSHING_1D', 'RUSHING_YBC', 'RUSHING_YBC_ATT',
       'RUSHING_YAC', 'RUSHING_YAC_ATT', 'RUSHING_BRKTKL', 'RUSHING_ATT_BR',
       'RECEIVING_TGT', 'RECEIVING_REC', 'RECEIVING_YDS', 'RECEIVING_TD',
       'RECEIVING_1D', 'RECEIVING_YBC', 'RECEIVING_YBC_R', 'RECEIVING_YAC',
       'RECEIVING_YAC_R', 'RECEIVING_ADOT', 'RECEIVING_BRKTKL',
       'RECEIVING_REC_BR', 'RECEIVING_DROP', 'RECEIVING_DROP_PCT',
       'RECEIVING_INT', 'RECEIVING_RAT', 'PASSING_CMP', 'PASSING_ATT',
       'PASSING_CMP_PCT', 'PASSING_YDS', 'PASSING_TD', 'PASSING_INT',
       'PASSING_RATE', 'PASSING_SK', 'PASSING_YDS1', 'PASSING_Y_A',
       'PASSING_AY_A', 'PPR'],
      dtype='object')

In [4]:
df_dfns.columns

Index(['Unnamed: 0.2', 'DEF_TEAM', 'OPP_CODE', 'YEAR', 'WEEK', 'SCORE_OPP',
       'PASSING_CMP', 'PASSING_ATT', 'PASSING_YDS', 'PASSING_TD',
       'PASSING_INT', 'PASSING_SK', 'PASSING_Y_A', 'PASSING_NY_A',
       'PASSING_CMP_PCT', 'PASSING_RATE', 'RUSHING_ATT', 'RUSHING_YDS',
       'RUSHING_Y_A', 'RUSHING_TD'],
      dtype='object')

In [5]:
interested = [
    "SCORE_OPP",
    "PASSING_CMP",
    "PASSING_ATT",
    "PASSING_YDS",
    "PASSING_TD",
    "PASSING_INT",
    "PASSING_SK",
    "PASSING_Y_A",
    "PASSING_NY_A",
    "PASSING_CMP_PCT",
    "PASSING_RATE",
    "RUSHING_ATT",
    "RUSHING_YDS",
    "RUSHING_Y_A",
    "RUSHING_TD"
]

for i in interested:
    # Calculate the rolling mean
    df_dfns.sort_values(["DEF_TEAM", "YEAR", "WEEK"], inplace=True)
    rolling_series = (
        df_dfns.sort_values(["DEF_TEAM", "YEAR", "WEEK"])
        .groupby("DEF_TEAM")[i]
        .rolling(window=8)
        .mean()
        .shift(periods=1)
    )

    # Reset the index to align with the original dataframe
    rolling_series = rolling_series.reset_index(level=0, drop=True)

    # scaler = MinMaxScaler()
    # scaled_values = scaler.fit_transform(rolling_series.values.reshape(-1, 1)).flatten()

    # Assign the values to the new column
    df_dfns[f"{i}_rolling"] = rolling_series

In [6]:
df_dfns_j = df_dfns[
    [
        "YEAR",
        "WEEK",
        "DEF_TEAM",
        "OPP_CODE",
        "SCORE_OPP_rolling",
        "PASSING_CMP_rolling",
        "PASSING_ATT_rolling",
        "PASSING_YDS_rolling",
        "PASSING_TD_rolling",
        "PASSING_INT_rolling",
        "PASSING_SK_rolling",
        "PASSING_Y_A_rolling",
        "PASSING_NY_A_rolling",
        "PASSING_CMP_PCT_rolling",
        "PASSING_RATE_rolling",
        "RUSHING_ATT_rolling",
        "RUSHING_YDS_rolling",
        "RUSHING_Y_A_rolling",
        "RUSHING_TD_rolling",
    ]
]

In [7]:
df_comb = pd.merge(df_dfns_j, df, how='left', left_on=['YEAR', 'WEEK', 'OPP_CODE'], right_on=['YEAR', 'WEEK', 'TEAM']).sort_values(by=['PLAYER', 'DATE'])

In [8]:
df_comb.corr().sort_values('PPR', ascending=False)['PPR']

PPR                        1.000000
RECEIVING_TD               0.470177
RECEIVING_YDS              0.455873
RECEIVING_1D               0.428108
RECEIVING_YAC              0.418852
PASSING_TD                 0.410625
RECEIVING_REC              0.403728
RUSHING_TD                 0.381005
PASSING_YDS                0.367587
PASSING_RATE               0.351509
RECEIVING_YBC              0.343764
RUSHING_1D                 0.338715
PASSING_CMP                0.338347
RUSHING_YBC                0.325893
PASSING_ATT                0.323442
RUSHING_YDS                0.314362
PASSING_Y_A                0.312998
PASSING_CMP_PCT            0.312954
RECEIVING_TGT              0.307347
PASSING_AY_A               0.305553
RECEIVING_REC_BR           0.249803
RUSHING_ATT                0.243809
RUSHING_YAC                0.233874
RECEIVING_RAT              0.216844
RUSHING_YBC_ATT            0.210380
PASSING_SK                 0.196936
RECEIVING_BRKTKL           0.182541
PASSING_YDS1               0

In [9]:
df_comb['DATE'] = pd.to_datetime(df_comb['DATE'])

In [10]:
df_comb2 = df_comb.loc[(df_comb['DATE']<pd.to_datetime('2023-10-31'))&(df_comb['PLAYER']!='Easton Stick')]


In [16]:
try:
    top_180_eval = [
        Evaluate(
            df=df_comb2,
            player=name,
            steps=1,
            target='PPR',
        # regressor=xgb.XGBRegressor(colsample_bytree=1, learning_rate=0.4, base_score=10),
        regressor = RandomForestRegressor(random_state=42),
            lags=17,
            exogs=[
            "SCORE_OPP_rolling",
            "PASSING_YDS_rolling",
            "PASSING_TD_rolling",
            "PASSING_INT_rolling",
            "PASSING_SK_rolling",
            "PASSING_Y_A_rolling",
            "PASSING_NY_A_rolling",
            "PASSING_CMP_PCT_rolling",
            "PASSING_RATE_rolling",
            "RUSHING_ATT_rolling",
            "RUSHING_YDS_rolling",
            "RUSHING_Y_A_rolling",
            "RUSHING_TD_rolling"],
        ).eval()
        for name in list(set(df_comb2["PLAYER"]))
    ]
except:
    pass

df_top_180 = pd.concat(top_180_eval)

Not enough data. Adjusting lags for Desmond Ridder.
Not enough data. Adjusting lags for Zach Wilson.
Not enough data. Adjusting lags for Tyler Allgeier.
Not enough data. Adjusting lags for Isaiah Likely.
Not enough data. Adjusting lags for Romeo Doubs.
Not enough data. Adjusting lags for Jahmyr Gibbs.
Not enough data. Adjusting lags for Jaxon Smith-Njigba.
Not enough data. Adjusting lags for Roschon Johnson.
Not enough data. Adjusting lags for Justin Fields.
Not enough data. Adjusting lags for Trey McBride.
Not enough data. Adjusting lags for Jaleel McLaughlin.
Not enough data. Adjusting lags for Nick Mullens.
Not enough data. Adjusting lags for Jordan Love.
Not enough data. Adjusting lags for Anthony Richardson.
Not enough data. Adjusting lags for Christian Watson.
Not enough data. Adjusting lags for Jahan Dotson.
Not enough data. Adjusting lags for Sam LaPorta.
Not enough data. Adjusting lags for Jordan Addison.
Not enough data. Adjusting lags for Tank Dell.
Not enough data. Adjustin

NameError: name 'top_180_eval' is not defined

In [15]:
# df_top_181 = df_top_180.loc[(df_top_180['ACTUAL SEASON TOTAL']>120)&(df_top_180['LAGS']>=10)]
df_top_181 = df_top_180.loc[(df_top_180['LAGS']>=5)&(df_top_180['ACTUAL SEASON TOTAL']>0)]

NameError: name 'df_top_180' is not defined

In [14]:
df_top_181.sort_values(by='SEASON MAPE').tail(25)

NameError: name 'df_top_181' is not defined

In [393]:
np.mean(df_top_181['SEASON MAPE']), np.mean(df_top_181['GAME RSME']), np.mean(df_top_181['GAME MAPE'])

(1.3121261819023533, 6.370892857142857, 1.3121261819023533)

In [394]:
df_top_181['ACTUAL SEASON TOTAL'].mean()

13.04

In [395]:
# np.sqrt(mean_squared_error(df_top_180['ACTUAL SEASON TOTAL'], df_top_180['PREDICTED']))

In [396]:
# np.sqrt(mean_squared_error(df_top_180['ACTUAL SEASON TOTAL'], df_top_180['PREDICTED']))

In [397]:
# top_180_pred = [
# Predict(
#     df=df, player=i, steps=17, regressor=GradientBoostingRegressor, lags=9
# ).predict_season() for i in list(set(df['PLAYER']))
# ]

# clean_data = [row for row in top_180_pred if row is not None]

In [398]:
# df_preds = pd.DataFrame(clean_data, columns=['PLAYER', 'PROJECTED POINTS'])

In [399]:
# df_2023 = df.loc[df['YEAR']==2023]
# df_players_2023 = df_2023.groupby(['PLAYER', 'TEAM'])['PPR'].sum().reset_index()
# df_players_2023.rename(columns={'PPR':'PPR_2023'}, inplace=True)

In [400]:
# df_adp = pd.read_csv('ADP_2024.csv')
# df_adp['Position'] = df_adp['Consensus Pos'].str[:2]
# df_adp['Target Round'] = (df_adp['Consensus'] // 10).astype(int) + 1

In [401]:
# df_merge1 = pd.merge(df_players_2023, df_preds,  how='inner', on='PLAYER')

In [402]:
# df_final = pd.merge(df_merge1, df_adp[['Player', 'Position', 'Consensus', 'Target Round']], left_on='PLAYER', right_on='Player', how='inner').drop(columns=['Player'])

In [403]:
# ranked_values = {
#     'QB': df_final[df_final['Position'] == 'QB'].nlargest(10, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'QB']) >= 10 else df_final[df_final['Position'] == 'QB']['PROJECTED POINTS'].min(),
#     'TE': df_final[df_final['Position'] == 'TE'].nlargest(10, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'TE']) >= 10 else df_final[df_final['Position'] == 'TE']['PROJECTED POINTS'].min(),
#     'WR': df_final[df_final['Position'] == 'WR'].nlargest(30, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'WR']) >= 30 else df_final[df_final['Position'] == 'WR']['PROJECTED POINTS'].min(),
#     'RB': df_final[df_final['Position'] == 'RB'].nlargest(20, 'PROJECTED POINTS')['PROJECTED POINTS'].iloc[-1] if len(df_final[df_final['Position'] == 'RB']) >= 20 else df_final[df_final['Position'] == 'RB']['PPROJECTED POINTS'].min()
# }

# # Print the ranked PPR values for debugging
# print("Ranked PPR values:")
# print(ranked_values)

In [404]:
# def VBD(row):
#     category = row['Position']
#     ranked_ppr = ranked_values.get(category, 0) 
#     return row['PROJECTED POINTS'] - ranked_ppr

# # Apply the function to each row
# df_final['VBD'] = df_final.apply(VBD, axis=1)


In [405]:
# df_final['My Ranking'] = df_final['VBD'].rank(ascending=False, method='min').astype(int)
# df_final.sort_values(by='VBD', ascending=False).head(25)

In [406]:
# df_final.to_csv('Draft Strategy 2024.csv')