# NFL Fantasy Football Projection Model Using an RNN

This notebook presents a comprehensive workflow for building an NFL fantasy football projection model using an RNN. Data sources include Yahoo, Pro-Football Reference, SportsDataIO, and NFLVerse. The goal is to leverage advanced machine learning techniques and rich datasets to generate accurate player projections for fantasy football analysis and decision-making.

In [2]:
import pandas as pd
import numpy as np
import os
import sys

project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
print(f"Project Root: {project_root}")
print("Sys Path Before:", sys.path)
if project_root not in sys.path:
    print("Inserting project root to sys.path")
    sys.path.insert(0, project_root)

# Now import
from data_api import SportsDataIO, Yahoo, PFR, POSITION_PLAYER_STAT_PROJECTION_DATA_DICT
import utils
import yahoo_helpers

from dotenv import load_dotenv
load_dotenv()

Project Root: c:\Users\bengu\Documents\NFL Data Project\clairvoyent-raven-sports-analysis\src
Sys Path Before: ['C:\\Users\\bengu\\AppData\\Local\\Programs\\Python\\Python310\\python310.zip', 'C:\\Users\\bengu\\AppData\\Local\\Programs\\Python\\Python310\\DLLs', 'C:\\Users\\bengu\\AppData\\Local\\Programs\\Python\\Python310\\lib', 'C:\\Users\\bengu\\AppData\\Local\\Programs\\Python\\Python310', 'c:\\Users\\bengu\\.virtualenvs\\cfeproj-oIABPDjj', '', 'c:\\Users\\bengu\\.virtualenvs\\cfeproj-oIABPDjj\\lib\\site-packages', 'c:\\Users\\bengu\\.virtualenvs\\cfeproj-oIABPDjj\\lib\\site-packages\\win32', 'c:\\Users\\bengu\\.virtualenvs\\cfeproj-oIABPDjj\\lib\\site-packages\\win32\\lib', 'c:\\Users\\bengu\\.virtualenvs\\cfeproj-oIABPDjj\\lib\\site-packages\\Pythonwin']
Inserting project root to sys.path


True

## Import and clean data from nflverse source

In [3]:
NFLVERSE_DATA_PATH = r"C:\Users\bengu\Documents\Sports Analysis Project\clairvoyent-raven-sports-analysis\data\nfl_player_stats_cleaned.csv"
NFLVERSE_TEAMS_DATA_PATH = r"C:\Users\bengu\Documents\Sports Analysis Project\clairvoyent-raven-sports-analysis\data\nfl_team_stats.xlsx"
NFLVERSE_INJURIES_PATH = r"C:\Users\bengu\Documents\Sports Analysis Project\clairvoyent-raven-sports-analysis\data\nfl_injuries.xlsx"
NFLVERSE_DEPTH_CHART_PATH = r"C:\Users\bengu\Documents\NFL Data Project\clairvoyent-raven-sports-analysis\data\nfl_depth_charts.xlsx"

all_players_df = pd.read_csv(NFLVERSE_DATA_PATH)
all_teams_df = pd.read_excel(NFLVERSE_TEAMS_DATA_PATH, engine="openpyxl")
injuries_df = pd.read_excel(NFLVERSE_INJURIES_PATH, engine="openpyxl")
depth_df = pd.read_excel(NFLVERSE_DEPTH_CHART_PATH, engine="openpyxl")

In [4]:
from sklearn.preprocessing import OneHotEncoder

if "gsis_id" in injuries_df.columns:
   injuries_df = injuries_df.rename({"gsis_id": "player_id"}, axis=1)

filtered_injuries_df = injuries_df[["season", "week", "player_id", "report_status", "practice_status"]]

encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

X_categorical_injury = filtered_injuries_df[["report_status", "practice_status"]]

encoded_data = encoder.fit_transform(X_categorical_injury)

encoded_feature_names = encoder.get_feature_names_out(X_categorical_injury.columns)

encoded_df = pd.DataFrame(encoded_data, columns=encoded_feature_names)

filtered_injuries_df = pd.concat([filtered_injuries_df[["season", "week", "player_id"]], encoded_df], axis=1)

In [5]:
if "gsis_id" in depth_df.columns:
    filtered_depth_df = depth_df.rename({"gsis_id": "player_id"}, axis=1)

filtered_depth_df = filtered_depth_df[["season", "week", "player_id", "depth_team"]]
filtered_depth_df = filtered_depth_df.dropna(subset=["season", "week"])
filtered_depth_df["week"] = filtered_depth_df["week"].astype(int)
filtered_depth_df["season"] = filtered_depth_df["season"].astype(int)

In [6]:
"""Merge depth chart information and injury information onto the all players data frame.
"""

all_players_df = all_players_df.merge(filtered_injuries_df, how="left", on=["player_id", "season", "week"])

all_players_df = all_players_df.merge(filtered_depth_df, how="left", on=["player_id", "season", "week"])

In [7]:
# Fill none values for depth chart with the average value.
# Fill the injury encoded values with 0 because it's safe to assume that no injury occurred

depth_mean = all_players_df["depth_team"].mean()
all_players_df["depth_team"] = all_players_df["depth_team"].fillna(depth_mean)
all_players_df[encoded_feature_names] = all_players_df[encoded_feature_names].fillna(0)

In [8]:
column_categories = {
    "standard": {
        "player_id",
        "player_name",
        "position",
        "position_group",
        "season",
        "week",
        "season_type",
        "team",
        "opponent_team",
        "pacr",
        "report_status_Doubtful",
        "report_status_Note",
        "report_status_Out",
        "report_status_Probable",
        "report_status_Questionable",
        "report_status_nan",
        "practice_status_\n    ",
        "practice_status_Did Not Participate In Practice",
        "practice_status_Full Participation in Practice",
        "practice_status_Limited Participation in Practice",
        "practice_status_Note",
        "practice_status_Out (Definitely Will Not Play)",
        "depth_team", 
    },
    "passing": {
        "completions",
        "attempts",
        "passing_yards",
        "passing_tds",
        "passing_interceptions",
        "sacks_suffered",
        "sack_yards_lost",
        "sack_fumbles",
        "sack_fumbles_lost",
        "passing_air_yards",
        "passing_yards_after_catch",
        "passing_first_downs",
        "passing_epa",
        "passing_cpoe",
        "passing_2pt_conversions",
        "carries", # Include rushing stats for QBs
        "rushing_yards",
        "rushing_tds",
        "rushing_fumbles",
        "rushing_fumbles_lost",
        "rushing_first_downs",
        "rushing_epa",
        "rushing_2pt_conversions",
    },
    "rushing_and_receiving": {
        "carries",
        "rushing_yards",
        "rushing_tds",
        "rushing_fumbles",
        "rushing_fumbles_lost",
        "rushing_first_downs",
        "rushing_epa",
        "rushing_2pt_conversions",
        "receptions",
        "targets",
        "receiving_yards",
        "receiving_tds",
        "receiving_fumbles",
        "receiving_fumbles_lost",
        "receiving_air_yards",
        "receiving_yards_after_catch",
        "receiving_first_downs",
        "rushing_epa",
        "rushing_2pt_conversions",
        "receptions",
        "targets",
        "receiving_yards",
        "receiving_tds",
        "receiving_fumbles",
        "receiving_fumbles_lost",
        "receiving_air_yards",
        "receiving_yards_after_catch",
        "receiving_first_downs",
        "receiving_epa",
        "receiving_2pt_conversions",
        "racr",
        "target_share",
        "air_yards_share",
        "wopr",
    },
    "defense": {
        "special_teams_tds",
        "def_tackles_solo",
        "def_tackles_with_assist",
        "def_tackle_assists",
        "def_tackles_for_loss",
        "def_tackles_for_loss_yards",
        "def_fumbles_forced",
        "def_sacks",
        "def_sack_yards",
        "def_qb_hits",
        "def_interceptions",
        "def_interception_yards",
        "def_pass_defended",
        "def_tds",
        "def_fumbles",
        "def_safeties",
        "misc_yards",
        "fumble_recovery_own",
        "fumble_recovery_yards_own",
        "fumble_recovery_opp",
        "fumble_recovery_yards_opp",
        "fumble_recovery_tds",
        "penalties",
    },
    "kicking": {
        "fg_made",
        "fg_att",
        "fg_missed",
        "fg_blocked",
        "fg_long",
        "fg_pct",
        "fg_made_0_19",
        "fg_made_20_29",
        "fg_made_30_39",
        "fg_made_40_49",
        "fg_made_50_59",
        "fg_made_60_",
        "fg_missed_0_19",
        "fg_missed_20_29",
        "fg_missed_30_39",
        "fg_missed_40_49",
        "fg_missed_50_59",
        "fg_missed_60_",
        "fg_made_list",
        "fg_missed_list",
        "fg_blocked_list",
        "fg_made_distance",
        "fg_missed_distance",
        "fg_blocked_distance",
        "pat_made",
        "pat_att",
        "pat_missed",
        "pat_blocked",
        "pat_pct",
        "gwfg_made",
        "gwfg_att",
        "gwfg_missed",
        "gwfg_blocked",
        "gwfg_distance",
    },
    "special_teams": {
        "punt_returns",
        "punt_return_yards",
        "kickoff_returns",
        "kickoff_return_yards",
    },
}

categories_positions = {
    "passing": ["QB"],
    "rushing_and_receiving": ["RB", "WR"],
    # Not available "kicking": ["K"] 
}


In [9]:
# Establish dataframes for each positional group, filter by position value
def filter_by_positional_group(df, category_key_a, category_key_b="standard") -> pd.DataFrame:
    new_df = df.loc[:, list(column_categories[category_key_a] | column_categories[category_key_b])]
    new_df = new_df[new_df["position"].isin(categories_positions[category_key_a])]
    return new_df.reset_index(drop=True)

# Data of interest
passing_df = filter_by_positional_group(all_players_df, "passing")
rushing_and_receiving_df = filter_by_positional_group(all_players_df, "rushing_and_receiving")

# Save in case
defense_df = all_players_df.loc[:, list(column_categories["standard"] | column_categories["defense"])]
kicking_df = all_players_df.loc[:, list(column_categories["standard"] | column_categories["kicking"])]
specials_teams_df = all_players_df.loc[:, list(column_categories["standard"] | column_categories["special_teams"])]

# Give the dataframes each an alias
passing_df.alias = "passing_df"
rushing_and_receiving_df.alias = "rushing_and_receiving_df"
defense_df.alias = "defense_df"
kicking_df.alias = "kicking_df"
specials_teams_df.alias = "specials_teams_df"

In [10]:
for df in [passing_df, rushing_and_receiving_df, defense_df, kicking_df, specials_teams_df]:
    null_values = 0
    print(f"Checking {df.alias} for null values")
    null_counts = df.isna().sum()

    for column, count in zip(null_counts.index, null_counts):
        if count != 0:
            null_values += 1
            print(f"Column: {column} contains {count} null values.")
    
    print(f"{df.alias} contains {null_values} null values.")

df = None

Checking passing_df for null values
passing_df contains 0 null values.
Checking rushing_and_receiving_df for null values
rushing_and_receiving_df contains 0 null values.
Checking defense_df for null values
defense_df contains 0 null values.
Checking kicking_df for null values
kicking_df contains 0 null values.
Checking specials_teams_df for null values
specials_teams_df contains 0 null values.


### Implement and Apply Model

Implement multiple linear regression models to predict multiple statistics for rushers and receivers. 

* Define the targets: rushing_yards, rushing_tds, receiving_yards, receiving_tds

In [6]:
rushing_and_receiving_df.columns

Index(['season_type', 'receiving_yards', 'receiving_tds', 'week',
       'receiving_epa', 'rushing_2pt_conversions', 'rushing_first_downs',
       'player_name', 'receiving_fumbles_lost', 'season', 'target_share',
       'air_yards_share', 'receiving_air_yards', 'rushing_epa',
       'receiving_first_downs', 'racr', 'opponent_team', 'receptions',
       'position_group', 'rushing_fumbles_lost', 'player_id',
       'receiving_2pt_conversions', 'rushing_tds', 'carries', 'team',
       'rushing_fumbles', 'pacr', 'targets', 'receiving_fumbles', 'wopr',
       'rushing_yards', 'position', 'receiving_yards_after_catch'],
      dtype='object')

In [11]:
"""
Collect xs, and ys.
Normalize data using Z-score normalization
(val - mean_val) / standard_deviation

Create 4 buckets, one for each statistical category.

Prefix   | Stat Category
-------------------------
rsh_yd_ -> rushing_yards
rsh_td_ -> rushing_tds
rc_yd_  -> receiving_yards
rc_td_  -> receiving_tds
"""

inputs = {
    "rsh_yd": ["rushing_tds", "rushing_yards", "rushing_fumbles_lost", "racr", "rushing_epa", "rushing_fumbles", "carries", "pacr", "rushing_first_downs", "rushing_2pt_conversions"],
    "rsh_td": ["rushing_tds", "rushing_yards", "rushing_fumbles_lost", "racr", "rushing_epa", "rushing_fumbles", "carries", "pacr", "rushing_first_downs", "rushing_2pt_conversions"],
    "rc_yd" : ["receiving_epa", "receiving_2pt_conversions", "racr", "air_yards_share", "receiving_tds", "receiving_first_downs", "wopr", "receiving_yards_after_catch", "receiving_air_yards", "target_share", "pacr", "targets", "receiving_yards", "receiving_fumbles_lost", "receptions"],
    "rc_td" : ["receiving_epa", "receiving_2pt_conversions", "racr", "air_yards_share", "receiving_tds", "receiving_first_downs", "wopr", "receiving_yards_after_catch", "receiving_air_yards", "target_share", "pacr", "targets", "receiving_yards", "receiving_fumbles_lost", "receptions"],
    "rc": ["receiving_epa", "receiving_2pt_conversions", "racr", "air_yards_share", "receiving_tds", "receiving_first_downs", "wopr", "receiving_yards_after_catch", "receiving_air_yards", "target_share", "pacr", "targets", "receiving_yards", "receiving_fumbles_lost", "receptions"],
    "p_yd": ["completions", "attempts", "passing_yards", "passing_tds", "passing_interceptions", "sacks_suffered", "sack_yards_lost", "sack_fumbles", "sack_fumbles_lost", "passing_air_yards", "passing_yards_after_catch", "passing_first_downs", "passing_epa", "passing_cpoe", "passing_2pt_conversions", "pacr"],
    "p_td": ["completions", "attempts", "passing_yards", "passing_tds", "passing_interceptions", "sacks_suffered", "sack_yards_lost", "sack_fumbles", "sack_fumbles_lost", "passing_air_yards", "passing_yards_after_catch", "passing_first_downs", "passing_epa", "passing_cpoe", "passing_2pt_conversions", "pacr"],
    "intcpt": ["completions", "attempts", "passing_yards", "passing_tds", "passing_interceptions", "sacks_suffered", "sack_yards_lost", "sack_fumbles", "sack_fumbles_lost", "passing_air_yards", "passing_yards_after_catch", "passing_first_downs", "passing_epa", "passing_cpoe", "passing_2pt_conversions", "pacr"],
    "rsh_fmbls": ["rushing_tds", "rushing_yards", "rushing_fumbles_lost", "racr", "rushing_epa", "rushing_fumbles", "carries", "pacr", "rushing_first_downs", "rushing_2pt_conversions"],
    "rc_fmbls": ["receiving_epa", "receiving_2pt_conversions", "racr", "air_yards_share", "receiving_tds", "receiving_first_downs", "wopr", "receiving_yards_after_catch", "receiving_air_yards", "target_share", "pacr", "targets", "receiving_yards", "receiving_fumbles_lost", "receptions"],
    "def": ["def_tackles_solo", "def_tackles_with_assist", "def_tackle_assists", "def_tackles_for_loss", "def_tackles_for_loss_yards", "def_fumbles_forced", "def_sacks", "def_sack_yards", "def_qb_hits", "def_interceptions", "def_interception_yards", "def_pass_defended", "def_tds", "def_fumbles", "def_safeties"]
}

# Stat 1 rushing_yards
rsh_yd_X = rushing_and_receiving_df[inputs["rsh_yd"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 2 rushing_tds
rsh_td_X = rushing_and_receiving_df[inputs["rsh_td"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 3 receiving_yards
rc_yd_X = rushing_and_receiving_df[inputs["rc_yd"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 4 receiving_tds
rc_td_X = rushing_and_receiving_df[inputs["rc_td"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 5 receptions
rc_X = rushing_and_receiving_df[inputs["rc"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 6 passing_yards
p_yd_X = passing_df[inputs["p_yd"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 7 passing_tds
p_td_X = passing_df[inputs["p_td"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 8 interceptions
intcpt_X = passing_df[inputs["intcpt"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
# Stat 9-10 fmbls
rsh_fmbls_X = pd.concat([rushing_and_receiving_df, passing_df]).reset_index(drop=True)[inputs["rsh_fmbls"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]
rc_fmbls_X = pd.concat([rushing_and_receiving_df, passing_df]).reset_index(drop=True)[inputs["rc_fmbls"] + ["season", "week", "player_id", "player_name", "opponent_team", "depth_team"] + list(encoded_feature_names)]

# Defensive stats for opponent
defense_stats_df = all_teams_df[inputs["def"] + ["season", "week", "team"]]

In [12]:
def calculate_rolling_data(df: pd.DataFrame, sort_values: list, input_ref: str, groupby: list, rolling_period: int=3, min_periods: int=1, shift: int=1):
    df = df.sort_values(sort_values)

    df[[f"{c}_roll{rolling_period}_shift" for c in inputs[input_ref]]] = (
        df.groupby(groupby)[inputs[input_ref]]
         .transform(lambda x: x.rolling(rolling_period, min_periods=min_periods).mean().shift(shift))
    )

    return df

rolling_period = 4

rsh_yd_X = calculate_rolling_data(rsh_yd_X, ["season", "week", "player_id"], "rsh_yd", ["season", "player_id"], rolling_period=rolling_period)
rsh_td_X = calculate_rolling_data(rsh_td_X, ["season", "week", "player_id"], "rsh_td", ["season", "player_id"], rolling_period=rolling_period)
rc_yd_X = calculate_rolling_data(rc_yd_X, ["season", "week", "player_id"], "rc_yd", ["season", "player_id"], rolling_period=rolling_period)
rc_td_X = calculate_rolling_data(rc_td_X, ["season", "week", "player_id"], "rc_td", ["season", "player_id"], rolling_period=rolling_period)
rc_X = calculate_rolling_data(rc_X, ["season", "week", "player_id"], "rc", ["season", "player_id"], rolling_period=rolling_period)
p_yd_X = calculate_rolling_data(p_yd_X, ["season", "week", "player_id"], "p_yd", ["season", "player_id"], rolling_period=rolling_period)
p_td_X = calculate_rolling_data(p_td_X, ["season", "week", "player_id"], "p_td", ["season", "player_id"], rolling_period=rolling_period)
intcpt_X = calculate_rolling_data(intcpt_X, ["season", "week", "player_id"], "intcpt", ["season", "player_id"], rolling_period=rolling_period)
rsh_fmbls_X = calculate_rolling_data(rsh_fmbls_X, ["season", "week", "player_id"], "rsh_fmbls", ["season", "player_id"], rolling_period=rolling_period)
rc_fmbls_X = calculate_rolling_data(rc_fmbls_X, ["season", "week", "player_id"], "rc_fmbls", ["season", "player_id"], rolling_period=rolling_period)
defense_stats_df = calculate_rolling_data(defense_stats_df, ["season", "week", "team"], "def", ["season", "team"], rolling_period=rolling_period)

In [13]:
# Normalize the data using Z-Score Standardization (sklearn StandardScaler)
from sklearn.preprocessing import StandardScaler

rsh_yd_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["rsh_yd"]] + ["depth_team"] + list(encoded_feature_names) 
rsh_td_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["rsh_td"]] + ["depth_team"] + list(encoded_feature_names)
rc_yd_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["rc_yd"]] + ["depth_team"] + list(encoded_feature_names)
rc_td_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["rc_td"]] + ["depth_team"] + list(encoded_feature_names)
rc_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["rc"]] + ["depth_team"] + list(encoded_feature_names)
p_yd_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["p_yd"]] + ["depth_team"] + list(encoded_feature_names)
p_td_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["p_td"]] + ["depth_team"] + list(encoded_feature_names)
intcpt_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["intcpt"]] + ["depth_team"] + list(encoded_feature_names)
rsh_fmbls_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["rsh_fmbls"]] + ["depth_team"] + list(encoded_feature_names)
rc_fmbls_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["rc_fmbls"]] + ["depth_team"] + list(encoded_feature_names)
def_input_cols = [col + f"_roll{rolling_period}_shift" for col in inputs["def"]]

scalers = {}

def scale_inplace(df, cols, name):
    # optionally: df = df.copy()  # if you want to avoid mutating the original
    scaler = StandardScaler()
    df.loc[:, cols] = scaler.fit_transform(df[cols])
    scalers[name] = scaler
    return df

rsh_yd_X = scale_inplace(rsh_yd_X, rsh_yd_input_cols, "rsh_yd")
rsh_td_X = scale_inplace(rsh_td_X, rsh_td_input_cols, "rsh_td")
rc_yd_X = scale_inplace(rc_yd_X,  rc_yd_input_cols,  "rc_yd")
rc_td_X = scale_inplace(rc_td_X,  rc_td_input_cols,  "rc_td")
p_yd_X = scale_inplace(p_yd_X,  p_yd_input_cols,  "p_yd")
p_td_X = scale_inplace(p_td_X,  p_td_input_cols,  "p_td")
intcpt_X = scale_inplace(intcpt_X,  intcpt_input_cols,  "intcpt")
rsh_fmbls_X = scale_inplace(rsh_fmbls_X, rsh_fmbls_input_cols, "rsh_fmbls")
rc_fmbls_X = scale_inplace(rc_fmbls_X, rc_fmbls_input_cols, "rc_fmbls")
defense_stats_df = scale_inplace(defense_stats_df, def_input_cols, "def")

In [14]:
# Merge the defensive dataframe to the offensive one.
rsh_yd_X = rsh_yd_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
rsh_td_X = rsh_td_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
rc_yd_X = rc_yd_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
rc_td_X = rc_td_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
rc_X = rc_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
p_yd_X = p_yd_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
p_td_X = p_td_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
intcpt_X = intcpt_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
rsh_fmbls_X = rsh_fmbls_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])
rc_fmbls_X = rc_fmbls_X.merge(defense_stats_df, how="left", left_on=["opponent_team", "season", "week"], right_on=["team", "season", "week"])

### Apply the RNN Model

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import r2_score
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

model_results = {}
models = {}

input_dataframes = [rsh_yd_X, rsh_td_X, rc_yd_X, rc_td_X, rc_X, p_yd_X, p_td_X, intcpt_X, rsh_fmbls_X, rc_fmbls_X]  
inputs = [rsh_yd_input_cols, rsh_td_input_cols, rc_yd_input_cols, rc_td_input_cols, rc_input_cols, p_yd_input_cols, p_td_input_cols, intcpt_input_cols, rsh_fmbls_input_cols, rc_fmbls_input_cols]
targets = ["rushing_yards", "rushing_tds", "receiving_yards", "receiving_tds", "receptions", "passing_yards", "passing_tds", "passing_interceptions", "rushing_fumbles_lost", "receiving_fumbles_lost"]

window = 64      # past steps to look at 
horizon = 1      # predict 1 step ahead

def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

for df, input, target in zip(input_dataframes, inputs, targets):
    X = df.dropna()[input + def_input_cols].reset_index(drop=True)
    y = df.dropna()[target].reset_index(drop=True)

    X_train, X_valid, y_train, y_valid = train_test_split(
        X, y, test_size=0.2, random_state=42, shuffle=False
    )

    y_train = y_train.astype("float32")
    y_valid = y_valid.astype("float32")

    rnn = keras.Sequential([
        layers.Input(shape=(window, 1)),
        # Swap LSTM -> GRU by using layers.GRU(...)
        layers.LSTM(64, return_sequences=True),
        layers.LSTM(32),
        layers.Dense(horizon)
    ])

    rnn.compile(optimizer=keras.optimizers.Adam(1e-3), loss="mse", metrics=["mae", rmse])
    rnn.summary()

    callbacks = [
        keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3),
        keras.callbacks.EarlyStopping(patience=8, restore_best_weights=True)
    ]
    
    history = rnn.fit(
        X_train, y_train,
        validation_data=(X_valid, y_valid),
        epochs=20,
        batch_size=256,
        callbacks=callbacks,
        verbose=1
    )
    
    # Evaluate
    preds = rnn.predict(X_valid, batch_size=256)
    rmse = root_mean_squared_error(y_valid, preds)

    r2 = r2_score(y_valid, preds)

    model_results[target] = {"validation_rmse": f"{rmse:.4f}", "r2": f"{r2:.3f}"}
    models[target] = rnn


Epoch 1/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 74ms/step - loss: 801.8892 - mae: 16.7490 - rmse: 28.1268 - val_loss: 662.9083 - val_mae: 17.0031 - val_rmse: 25.6561 - learning_rate: 0.0010
Epoch 2/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 76ms/step - loss: 787.0677 - mae: 18.0396 - rmse: 27.9106 - val_loss: 663.1049 - val_mae: 17.1273 - val_rmse: 25.6608 - learning_rate: 0.0010
Epoch 3/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 75ms/step - loss: 787.0494 - mae: 18.0325 - rmse: 27.9030 - val_loss: 663.0603 - val_mae: 17.1159 - val_rmse: 25.6599 - learning_rate: 0.0010
Epoch 4/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 75ms/step - loss: 787.0637 - mae: 18.0658 - rmse: 27.8690 - val_loss: 663.3377 - val_mae: 17.2268 - val_rmse: 25.6662 - learning_rate: 0.0010
Epoch 5/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 76ms/step - loss: 787.0538 - mae: 18.1019 

Epoch 1/20


ValueError: Expected all entries in the `metrics` list to be metric objects. Received instead:
metrics=['mae', 25.74700927734375]

In [18]:
print(model_results)

{'rushing_yards': {'validation_rmse': '18.8798', 'r2': '0.462'}, 'rushing_tds': {'validation_rmse': '0.2902', 'r2': '0.223'}, 'receiving_yards': {'validation_rmse': '31.8813', 'r2': '-0.000'}, 'receiving_tds': {'validation_rmse': '0.3757', 'r2': '0.114'}, 'receptions': {'validation_rmse': '1.7227', 'r2': '0.457'}, 'passing_yards': {'validation_rmse': '197.2082', 'r2': '-2.812'}, 'passing_tds': {'validation_rmse': '1.0726', 'r2': '0.141'}, 'passing_interceptions': {'validation_rmse': '0.8427', 'r2': '0.018'}, 'rushing_fumbles_lost': {'validation_rmse': '0.1051', 'r2': '0.019'}, 'receiving_fumbles_lost': {'validation_rmse': '0.1070', 'r2': '0.017'}}


### Check Sample Predictions

In [19]:
# Params
SEASON = 2025
WEEK = 4

# Map dataframe names to their corresponding input-cols variable names
datasets = {
    "rsh_yd_X": "rsh_yd_input_cols",
    "rsh_td_X": "rsh_td_input_cols",
    "rc_yd_X":  "rc_yd_input_cols",
    "rc_td_X":  "rc_td_input_cols",
    "rc_X":     "rc_input_cols",
    "p_yd_X":   "p_yd_input_cols",
    "p_td_X":   "p_td_input_cols",
    "intcpt_X": "intcpt_input_cols",
    "rsh_fmbls_X": "rsh_fmbls_input_cols",
    "rc_fmbls_X":  "rc_fmbls_input_cols",
}

for df_name, cols_name in datasets.items():
    df = globals()[df_name]
    # 1) filter to current week
    cw_filtered = (
        df[(df["season"] == SEASON) & (df["week"] == WEEK)]
        .dropna()
        .reset_index(drop=True)
    )
    globals()[f"cw_{df_name}"] = cw_filtered


In [20]:
from collections import defaultdict
input_dataframes = [cw_rsh_yd_X, cw_rsh_td_X, cw_rc_yd_X, cw_rc_td_X, cw_rc_X, cw_p_yd_X, cw_p_td_X, cw_intcpt_X, cw_rsh_fmbls_X, cw_rc_fmbls_X]
inputs = [rsh_yd_input_cols, rsh_td_input_cols, rc_yd_input_cols, rc_td_input_cols, rc_input_cols, p_yd_input_cols, p_td_input_cols, intcpt_input_cols, rsh_fmbls_input_cols, rc_fmbls_input_cols]
targets = ["rushing_yards", "rushing_tds", "receiving_yards", "receiving_tds", "receptions", "passing_yards", "passing_tds", "passing_interceptions", "rushing_fumbles_lost", "receiving_fumbles_lost"]

player_projections = defaultdict(dict)

for df, input, target in zip(input_dataframes, inputs, targets):
    X = df.dropna()[input + def_input_cols].reset_index(drop=True)
    y = df.dropna()[target].reset_index(drop=True)

    # Get the model from saved
    reg = models[target]

    # Evaluate
    preds = reg.predict(X)

    for i, pred in enumerate(preds):
        player_id, player_name = df.loc[i, "player_id"], df.loc[i, "player_name"]
        if player_id not in player_projections:
            player_projections[player_id]["player_name"] = player_name
        
        player_data = POSITION_PLAYER_STAT_PROJECTION_DATA_DICT[target].copy()
        player_data["projection"] = pred
        player_data["true"] = y.iloc[i]

        player_projections[player_id][target] = player_data

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [21]:
week_4_projections = []
 
for player_id, data in player_projections.items():
    projected_points = 0
    true_points = 0
    for target in targets:
        if target in data:
            projected_points += data[target]["projection"] * data[target]["weight"]
            true_points += data[target]["true"] * data[target]["weight"]

    week_4_projections.append((data["player_name"], projected_points, true_points))

In [22]:
true = [x[2] for x in week_4_projections]
preds = [x[1] for x in week_4_projections]

error = root_mean_squared_error(true, preds)
r2 = r2_score(true, preds)
print(error, r2)

6.863065929306464 0.1942275666387382
