<a href="https://colab.research.google.com/github/bradymiller2310/FantasyFootballDashboard/blob/main/ESPN_FF_Scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [106]:
!pip install espn-api
!pip install pandas
!pip install openpyxl

#pip uninstall -y numpy catboost
!pip install numpy==1.24.4  # Safe version compatible with catboost
!pip install catboost --no-cache-dir



In [107]:
from espn_api.football import League
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool

In [108]:
league = League(league_id=1292514, year=2024, espn_s2="AECPNWyLvfnlcFfE0Xb73O%2BP5ZU9n1G%2FSnP3ixxvuqSwQRcQe7bEQfBuz3YBNeRluTYofdhX2aTajj0jZSF%2B8i9ewdk8Dwf6p9G215F2FjmI06wPKyhjleL4WzaT3PCiP15FDXe55YNMkQeJXgbsCuK1VHbdQ1Nxidq0JxAHezRn2zAt6WwNtSqrF0k2nNHV33VpJqPmMk4msFmaoCaz0UAp95xxy1YmXyEUnZWgA60HJkbsgYkZ8AaESwFENsPYdUvKYqCwnZLjvgI5varhOTDYekjvokdtaCoxJfzOdGLJ3g%3D%3D", swid="{D68FE5F2-46CE-421C-B248-DDA01360BB34}")
print(league.teams)  #Teams in league

[Team(Fightin' Furries), Team(Quon Solo), Team(Jen-eral ⚔️), Team(Captain Sweatpants), Team(Salmon LipBalm !!), Team(Graham’s Groupie), Team(pop-pop's bible study), Team(Bucktown Bandits), Team(bird gang), Team(bungalicious  💅)]


## **Testing out api functionalities**
*This section contains code chunks that load in various data - used to see what we had access to.*

In [109]:
for week in range(1, 15):
    print(f"Week {week} Schedule:")
    for matchup in league.scoreboard(week=week):
        print(f"{matchup.home_team} vs {matchup.away_team}")

Week 1 Schedule:
Team(Bucktown Bandits) vs Team(Graham’s Groupie)
Team(bungalicious  💅) vs Team(Fightin' Furries)
Team(bird gang) vs Team(Salmon LipBalm !!)
Team(pop-pop's bible study) vs Team(Quon Solo)
Team(Jen-eral ⚔️) vs Team(Captain Sweatpants)
Week 2 Schedule:
Team(Fightin' Furries) vs Team(Salmon LipBalm !!)
Team(Quon Solo) vs Team(Bucktown Bandits)
Team(Captain Sweatpants) vs Team(Graham’s Groupie)
Team(pop-pop's bible study) vs Team(bungalicious  💅)
Team(Jen-eral ⚔️) vs Team(bird gang)
Week 3 Schedule:
Team(Quon Solo) vs Team(Captain Sweatpants)
Team(Fightin' Furries) vs Team(pop-pop's bible study)
Team(Salmon LipBalm !!) vs Team(Jen-eral ⚔️)
Team(Bucktown Bandits) vs Team(bungalicious  💅)
Team(Graham’s Groupie) vs Team(bird gang)
Week 4 Schedule:
Team(pop-pop's bible study) vs Team(Jen-eral ⚔️)
Team(bungalicious  💅) vs Team(Quon Solo)
Team(bird gang) vs Team(Captain Sweatpants)
Team(Bucktown Bandits) vs Team(Fightin' Furries)
Team(Graham’s Groupie) vs Team(Salmon LipBalm !!)


In [110]:
free_agents = league.free_agents(size=20, position='QB')  # Get top 20 available QBs

print("Available Free Agents (QB):")
for player in free_agents:
    print(f"  - {player.name} ({player.proTeam})")

Available Free Agents (QB):
  - C.J. Stroud (HOU)
  - Tua Tagovailoa (MIA)
  - Anthony Richardson (IND)
  - Aaron Rodgers (None)
  - Kirk Cousins (ATL)
  - Bryce Young (CAR)
  - Michael Penix Jr. (ATL)
  - Trevor Lawrence (JAX)
  - Jameis Winston (CLE)
  - Derek Carr (NO)
  - Justin Fields (NYJ)
  - Joe Flacco (IND)
  - Deshaun Watson (CLE)
  - Cooper Rush (BAL)
  - Daniel Jones (IND)
  - Tom Brady (TB)
  - Mason Rudolph (PIT)
  - Mac Jones (SF)
  - Tommy DeVito (NYG)
  - Aidan O'Connell (LV)


In [111]:
for team in league.teams:
    print(f"\nTeam: {team.team_name}")
    print("Roster:")
    for player in team.roster:
        print(f"  - {player.name} ({player.position})")


Team: Fightin' Furries
Roster:
  - Tyreek Hill (WR)
  - Isiah Pacheco (RB)
  - Joe Mixon (RB)
  - Patrick Mahomes (QB)
  - Dalton Kincaid (TE)
  - Zay Flowers (WR)
  - Jaxon Smith-Njigba (WR)
  - Gus Edwards (RB)
  - Matthew Stafford (QB)
  - Kareem Hunt (RB)
  - Broncos D/ST (D/ST)
  - David Njoku (TE)
  - Caleb Williams (QB)
  - DeAndre Hopkins (WR)
  - Bills D/ST (D/ST)
  - Matthew Wright (K)

Team: Quon Solo
Roster:
  - Justin Jefferson (WR)
  - Kyren Williams (RB)
  - Marvin Harrison Jr. (WR)
  - Mike Evans (WR)
  - D'Andre Swift (RB)
  - Brian Robinson Jr. (RB)
  - Kyler Murray (QB)
  - J.K. Dobbins (RB)
  - Geno Smith (QB)
  - Chris Boswell (K)
  - Romeo Doubs (WR)
  - Mark Andrews (TE)
  - Bears D/ST (D/ST)
  - Courtland Sutton (WR)
  - Josh Downs (WR)
  - Evan Engram (TE)

Team: Jen-eral ⚔️
Roster:
  - Derrick Henry (RB)
  - A.J. Brown (WR)
  - Alvin Kamara (RB)
  - Jalen Hurts (QB)
  - Calvin Ridley (WR)
  - Raheem Mostert (RB)
  - Jets D/ST (D/ST)
  - Pat Freiermuth (TE)
  

In [112]:
week_number = 1
box_scores = league.box_scores(week=week_number)

for matchup in box_scores:
    print(f"\nMatchup: {matchup.home_team} vs {matchup.away_team}")
    print(f"  Home Score: {matchup.home_score}")
    print(f"  Away Score: {matchup.away_score}")


Matchup: Team(Bucktown Bandits) vs Team(Graham’s Groupie)
  Home Score: 73.0
  Away Score: 96.0

Matchup: Team(bungalicious  💅) vs Team(Fightin' Furries)
  Home Score: 101.0
  Away Score: 94.0

Matchup: Team(bird gang) vs Team(Salmon LipBalm !!)
  Home Score: 59.0
  Away Score: 101.0

Matchup: Team(pop-pop's bible study) vs Team(Quon Solo)
  Home Score: 79.0
  Away Score: 97.0

Matchup: Team(Jen-eral ⚔️) vs Team(Captain Sweatpants)
  Home Score: 82.0
  Away Score: 110.0


In [113]:
# getting injury status of free agents (not on rosters)

free_agents = league.free_agents()
i=0
for player in free_agents:
  player_obj = free_agents[i]
  print(f"Player: {player_obj.name}, Injury Status: {player_obj.injuryStatus}, Injured: {player_obj.injured}")
  i+=1

Player: Ladd McConkey, Injury Status: ACTIVE, Injured: False
Player: Keenan Allen, Injury Status: ACTIVE, Injured: False
Player: Michael Pittman Jr., Injury Status: QUESTIONABLE, Injured: False
Player: Steelers D/ST, Injury Status: [], Injured: False
Player: C.J. Stroud, Injury Status: ACTIVE, Injured: False
Player: Kyle Pitts, Injury Status: ACTIVE, Injured: False
Player: Adam Thielen, Injury Status: ACTIVE, Injured: False
Player: Chris Olave, Injury Status: QUESTIONABLE, Injured: False
Player: Tucker Kraft, Injury Status: ACTIVE, Injured: False
Player: 49ers D/ST, Injury Status: [], Injured: False
Player: Jerome Ford, Injury Status: QUESTIONABLE, Injured: False
Player: Texans D/ST, Injury Status: [], Injured: False
Player: Quentin Johnston, Injury Status: ACTIVE, Injured: False
Player: Tua Tagovailoa, Injury Status: QUESTIONABLE, Injured: False
Player: Nick Chubb, Injury Status: QUESTIONABLE, Injured: False
Player: Anthony Richardson, Injury Status: QUESTIONABLE, Injured: False
Playe

In [114]:
# getting injury status of players on rosters
for team in league.teams:
    print(f"\nTeam: {team.team_name}")
    for player in team.roster:
        print(f"Player: {player.name}, Injury Status: {player.injuryStatus}, Injured: {player.injured}")


Team: Fightin' Furries
Player: Tyreek Hill, Injury Status: QUESTIONABLE, Injured: False
Player: Isiah Pacheco, Injury Status: ACTIVE, Injured: False
Player: Joe Mixon, Injury Status: ACTIVE, Injured: False
Player: Patrick Mahomes, Injury Status: ACTIVE, Injured: False
Player: Dalton Kincaid, Injury Status: ACTIVE, Injured: False
Player: Zay Flowers, Injury Status: QUESTIONABLE, Injured: False
Player: Jaxon Smith-Njigba, Injury Status: ACTIVE, Injured: False
Player: Gus Edwards, Injury Status: ACTIVE, Injured: False
Player: Matthew Stafford, Injury Status: ACTIVE, Injured: False
Player: Kareem Hunt, Injury Status: ACTIVE, Injured: False
Player: Broncos D/ST, Injury Status: NORMAL, Injured: False
Player: David Njoku, Injury Status: QUESTIONABLE, Injured: False
Player: Caleb Williams, Injury Status: QUESTIONABLE, Injured: False
Player: DeAndre Hopkins, Injury Status: ACTIVE, Injured: False
Player: Bills D/ST, Injury Status: NORMAL, Injured: False
Player: Matthew Wright, Injury Status: AC

In [115]:
# getting starting roster for each week

for team in league.teams:
    print(f"\nTeam: {team.team_name} (Week {week})")

    # Get the team's roster for the given week
    for player in team.roster:
        #if player.lineupSlot != "BE" and player.lineupSlot != "IR":  # Exclude Bench and IR players
            print(f"  - {player.name} ({player.lineupSlot})")



Team: Fightin' Furries (Week 14)
  - Tyreek Hill (WR)
  - Isiah Pacheco (RB)
  - Joe Mixon (BE)
  - Patrick Mahomes (QB)
  - Dalton Kincaid (BE)
  - Zay Flowers (BE)
  - Jaxon Smith-Njigba (RB/WR/TE)
  - Gus Edwards (BE)
  - Matthew Stafford (BE)
  - Kareem Hunt (RB)
  - Broncos D/ST (BE)
  - David Njoku (TE)
  - Caleb Williams (BE)
  - DeAndre Hopkins (WR)
  - Bills D/ST (D/ST)
  - Matthew Wright (K)

Team: Quon Solo (Week 14)
  - Justin Jefferson (WR)
  - Kyren Williams (RB)
  - Marvin Harrison Jr. (BE)
  - Mike Evans (BE)
  - D'Andre Swift (RB)
  - Brian Robinson Jr. (RB/WR/TE)
  - Kyler Murray (QB)
  - J.K. Dobbins (BE)
  - Geno Smith (BE)
  - Chris Boswell (K)
  - Romeo Doubs (BE)
  - Mark Andrews (TE)
  - Bears D/ST (D/ST)
  - Courtland Sutton (WR)
  - Josh Downs (BE)
  - Evan Engram (BE)

Team: Jen-eral ⚔️ (Week 14)
  - Derrick Henry (RB)
  - A.J. Brown (WR)
  - Alvin Kamara (RB)
  - Jalen Hurts (QB)
  - Calvin Ridley (WR)
  - Raheem Mostert (BE)
  - Jets D/ST (D/ST)
  - Pat Fr

## **Getting the fantasy team rosters**
*Will use to join to player data to maintain team assigments*

In [116]:
# Define the range of weeks
weeks = range(1, 19)  # Weeks 1 to 18

# Create an empty list to store data
all_weeks_roster = []

# Loop through each week and get the roster data
for week in weeks:
    week_data = []  # Temporary storage for current week's data

    for team in league.teams:
        team_name = team.team_name  # Fantasy Team Name

        for player in team.roster:
            week_data.append({
                "cur_Fteam": team_name,
                "week": week,
                "espn_id": player.playerId,  # ESPN Unique Player ID
                "cur_roster_slot": player.position if player.lineupSlot != "BE" else "Bench"
            })

    # If no data is returned for this week, stop iterating
    if not week_data:
        print(f"No data found for Week {week}. Stopping iteration.")
        break  # Stop looping when a week has no data

    # Add this week's data to the master list
    all_weeks_roster.extend(week_data)

# Convert to DataFrame
weekly_rosters = pd.DataFrame(all_weeks_roster)

cur_roster = weekly_rosters[weekly_rosters['week'] == weekly_rosters['week'].max()].drop('week', axis=1)
print(cur_roster)

             cur_Fteam  espn_id cur_roster_slot
2720  Fightin' Furries  3116406              WR
2721  Fightin' Furries  4361529              RB
2722  Fightin' Furries  3116385           Bench
2723  Fightin' Furries  3139477              QB
2724  Fightin' Furries  4385690           Bench
...                ...      ...             ...
2875   bungalicious  💅  4243537           Bench
2876   bungalicious  💅  4689936               K
2877   bungalicious  💅  3912547              QB
2878   bungalicious  💅   -16011            D/ST
2879   bungalicious  💅  4426385              RB

[160 rows x 3 columns]


### **Getting statistical data from xlsx files**

In [117]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [118]:
file_path = "/content/drive/My Drive/ff_data.xlsx"

# Load specific sheets
season_stats_all = pd.read_excel(file_path, sheet_name="Season", engine="openpyxl")
weekly_stats_all = pd.read_excel(file_path, sheet_name="Weekly", engine="openpyxl")

# Display first few rows
print(season_stats_all.head())
print(weekly_stats_all.head())

   season   player_id player_display_name team position  games_played  \
0    2021  00-0019596           Tom Brady   TB       QB          20.0   
1    2021  00-0022924  Ben Roethlisberger  PIT       QB          18.0   
2    2021  00-0023459       Aaron Rodgers   GB       QB          17.0   
3    2021  00-0023682    Ryan Fitzpatrick  WAS       QB           1.0   
4    2021  00-0024243      Marcedes Lewis   GB       TE          14.0   

   completions  attempts  comp%  passing_yards  ...  total_st_tds  st_tds_pg  \
0        573.0     847.0  67.65         6187.0  ...           NaN        NaN   
1        448.0     693.0  64.65         4170.0  ...           NaN        NaN   
2        386.0     560.0  68.93         4340.0  ...           NaN        NaN   
3          3.0       6.0  50.00           13.0  ...           NaN        NaN   
4          0.0       0.0    NaN            0.0  ...           NaN        NaN   

   total_fp  fp_pg  espn_id                name   age  height  weight  \
0      

## **Joining current fantasy team roster data with weekly and season data**

### *Season data*

In [119]:
season_stats_all = pd.merge(season_stats_all, cur_roster, on="espn_id", how="left")

season_stats_all.loc[
    (season_stats_all['season'] == 2024) &
    (season_stats_all['cur_Fteam'] != 'FA') &
    (season_stats_all['cur_roster_slot'] != 'FA'),
    ['cur_Fteam', 'cur_roster_slot']
] = 'FA'

In [120]:
#print(season_stats_all.head(10))

### *Weekly data*

In [121]:
weekly_stats_all = pd.merge(weekly_stats_all, weekly_rosters, on=["espn_id", "week"], how="left")

weekly_stats_all.loc[
    (weekly_stats_all['season'] == 2024) &
    (weekly_stats_all['cur_Fteam'] != 'FA') &
    (weekly_stats_all['cur_roster_slot'] != 'FA'),
    ['cur_Fteam', 'cur_roster_slot']
] = 'FA'

In [122]:
#print(weekly_stats_all.head(10))

# **Getting matchup data**

In [123]:
total_weeks = league.settings.reg_season_count  # Regular season length

# Initialize records dict
team_records = {team.team_name: {'W': 0, 'L': 0} for team in league.teams}

# Store all weekly matchups and records
matchups_all_weeks = []

# Loop through each week
for week in range(1, total_weeks + 1):
    scoreboard = league.scoreboard(week=week)

    for matchup in scoreboard:
        home = matchup.home_team.team_name
        away = matchup.away_team.team_name
        home_score = matchup.home_score
        away_score = matchup.away_score

        # Determine winner and update records
        if home_score > away_score:
            team_records[home]['W'] += 1
            team_records[away]['L'] += 1
        elif home_score < away_score:
            team_records[away]['W'] += 1
            team_records[home]['L'] += 1
        else:
            # Tie case (optional)
            pass

        # Record after this week
        home_record = f"{team_records[home]['W']}-{team_records[home]['L']}"
        away_record = f"{team_records[away]['W']}-{team_records[away]['L']}"

        matchups_all_weeks.append({
            'week': week,
            'home_team': home,
            'away_team': away,
            'home_score': home_score,
            'away_score': away_score,
            'home_record': home_record,
            'away_record': away_record
        })

# Convert to DataFrame
weekly_matchup_data = pd.DataFrame(matchups_all_weeks)

# Display the result
print(weekly_matchup_data)

    week              home_team              away_team  home_score  \
0      1       Bucktown Bandits       Graham’s Groupie        73.0   
1      1        bungalicious  💅       Fightin' Furries       101.0   
2      1              bird gang      Salmon LipBalm !!        59.0   
3      1  pop-pop's bible study              Quon Solo        79.0   
4      1            Jen-eral ⚔️     Captain Sweatpants        82.0   
..   ...                    ...                    ...         ...   
65    14       Graham’s Groupie       Bucktown Bandits       143.0   
66    14      Salmon LipBalm !!       Fightin' Furries       115.0   
67    14     Captain Sweatpants              Quon Solo        88.0   
68    14            Jen-eral ⚔️  pop-pop's bible study        68.0   
69    14              bird gang        bungalicious  💅        81.0   

    away_score home_record away_record  
0         96.0         0-1         1-0  
1         94.0         1-0         0-1  
2        101.0         0-1         1

# **Getting injury status**
PROBABLY CAN'T USE B/C ONLY HAVE 2024 DATA

In [124]:
all_players = []
current_week = league.current_week

# 1. Rostered players from each team
for team in league.teams:
    for player in team.roster:
        all_players.append({
            "week": current_week,
            "team_owner": team.team_name,
            "name": player.name,
            "position": player.position,
            "espn_id": player.playerId,
            "injured": player.injured,
            "injury_status": player.injuryStatus,
            "rostered": True
        })

# 2. Free agents (unrostered players)
for player in league.free_agents(size=1000):  # you can adjust the size to pull more players
    all_players.append({
        "week": current_week,
        "team_owner": "Free Agent",
        "name": player.name,
        "position": player.position,
        "espn_id": player.playerId,
        "injured": player.injured,
        "injury_status": player.injuryStatus,
        "rostered": False
    })

# Optional: Convert to pandas DataFrame
import pandas as pd
injury_status = pd.DataFrame(all_players)

# Display or export
print(injury_status.head(10))

print(injury_status[injury_status["team_owner"] == "Free Agent"].head(10))

   week        team_owner                name position  espn_id  injured  \
0    17  Fightin' Furries         Tyreek Hill       WR  3116406    False   
1    17  Fightin' Furries       Isiah Pacheco       RB  4361529    False   
2    17  Fightin' Furries           Joe Mixon       RB  3116385    False   
3    17  Fightin' Furries     Patrick Mahomes       QB  3139477    False   
4    17  Fightin' Furries      Dalton Kincaid       TE  4385690    False   
5    17  Fightin' Furries         Zay Flowers       WR  4429615    False   
6    17  Fightin' Furries  Jaxon Smith-Njigba       WR  4430878    False   
7    17  Fightin' Furries         Gus Edwards       RB  3051926    False   
8    17  Fightin' Furries    Matthew Stafford       QB    12483    False   
9    17  Fightin' Furries         Kareem Hunt       RB  3059915    False   

  injury_status  rostered  
0  QUESTIONABLE      True  
1        ACTIVE      True  
2        ACTIVE      True  
3        ACTIVE      True  
4        ACTIVE      Tr

# **QB CatBoost Model**

## **Feature Engineering**
*Need to create features that can be used in both train and test sets since we can't use rush yards, pass yards (any game specific stats) because they have not occurred and need to use like rolling features or averages*

In [125]:
df_qb = weekly_stats_all[weekly_stats_all['position'] == 'QB'].copy()

# Sorting data for time-series purposes
df_qb = df_qb.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features for model
# Takes into account past 3 games (or less, depending on the data available that week)
rolling_cols = [
    'passing_yards', 'passing_tds', 'interceptions', 'passing_epa',
    'rushing_yards', 'rushing_tds', 'completions', 'attempts',
    'fantasy_points_ppr', 'passing_air_yards', 'passing_yards_after_catch',
    'rushing_fumbles_lost', 'rushing_epa', 'rushing_2pt_conversions', 'passing_2pt_conversions'
]

for col in rolling_cols:
    df_qb[f'{col}_last3'] = df_qb.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Season averages
expanding_cols = ['passing_yards', 'passing_tds', 'interceptions', 'rushing_yards', 'rushing_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_qb[col] = pd.to_numeric(df_qb[col], errors='coerce')

for col in expanding_cols:
    df_qb[f'{col}_season_avg'] = (
        df_qb.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )

df_qb['completions'] = pd.to_numeric(df_qb['completions'], errors='coerce')
df_qb['attempts'] = pd.to_numeric(df_qb['attempts'], errors='coerce')

# creating yards per attempt feature
df_qb['yards_per_attempt'] = df_qb['passing_yards'] / df_qb['attempts'].replace(0, pd.NA)

# Creating completion percentage feature
df_qb['comp%'] = (
    (df_qb['completions'] / df_qb['attempts']) * 100
).replace([pd.NA, np.inf, -np.inf], np.nan)

df_qb['comp%'] = pd.to_numeric(df_qb['comp%'], errors='coerce')

df_qb['yards_per_attempt'] = pd.to_numeric(df_qb['yards_per_attempt'], errors='coerce')

print(df_qb.head(10))

    player_id player_name player_display_name position position_group  \
0  00-0019596     T.Brady           Tom Brady       QB             QB   
1  00-0019596     T.Brady           Tom Brady       QB             QB   
2  00-0019596     T.Brady           Tom Brady       QB             QB   
3  00-0019596     T.Brady           Tom Brady       QB             QB   
4  00-0019596     T.Brady           Tom Brady       QB             QB   
5  00-0019596     T.Brady           Tom Brady       QB             QB   
6  00-0019596     T.Brady           Tom Brady       QB             QB   
7  00-0019596     T.Brady           Tom Brady       QB             QB   
8  00-0019596     T.Brady           Tom Brady       QB             QB   
9  00-0019596     T.Brady           Tom Brady       QB             QB   

                                        headshot_url recent_team  season  \
0  https://static.www.nfl.com/image/private/f_aut...          TB    2021   
1  https://static.www.nfl.com/image/private/

In [126]:
#Creating some efficiency metrics
df_qb['pass_attempts_shifted'] = df_qb.groupby('player_id')['attempts'].shift(1)

df_qb['comp_pct_last3'] = df_qb.groupby('player_id')['comp%'].shift(1).rolling(3,min_periods = 1).mean()
df_qb['yards_per_attempt_last3'] = df_qb.groupby('player_id')['yards_per_attempt'].shift(1).rolling(3,min_periods = 1).mean()
df_qb['td_rate_last3'] = df_qb.groupby('player_id')['passing_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_qb['pass_attempts_shifted']
df_qb['int_rate_last3'] = df_qb.groupby('player_id')['interceptions'].shift(1).rolling(3,min_periods = 1).sum() / df_qb['pass_attempts_shifted']

# Creating rolling/lag features for advanced QB metrics
advanced = ['pacr', 'dakota']
for col in advanced:
    df_qb[f'{col}_trend'] = df_qb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# Creating defensive lag features
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_qb[f'{col}_trend'] = df_qb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()


In [127]:
#print(df_qb.head(10))

***Getting list of columns to select from***

In [128]:
#print(df_qb.columns.tolist())

In [129]:
TARGET_COL = 'fantasy_points_ppr'

# Creating test and train sets
train_df = df_qb[
    (df_qb[TARGET_COL].notna()) &
    (
        (df_qb['season'] < 2024) |
        ((df_qb['season'] == 2024) & (df_qb['week'] < 14))
    )
]

test_df = df_qb[(df_qb['season'] == 2024) & (df_qb['week'] == 14)]


feature_cols = [
    col for col in df_qb.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [130]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 8.9456128	total: 9.53ms	remaining: 9.52s
50:	learn: 8.1178320	total: 905ms	remaining: 16.8s
100:	learn: 7.6410595	total: 1.79s	remaining: 16s
150:	learn: 7.3802963	total: 2.99s	remaining: 16.8s
200:	learn: 7.2176080	total: 4.13s	remaining: 16.4s
250:	learn: 7.0782452	total: 5.79s	remaining: 17.3s
300:	learn: 6.9539157	total: 7.27s	remaining: 16.9s
350:	learn: 6.8611232	total: 8.56s	remaining: 15.8s
400:	learn: 6.7691747	total: 9.62s	remaining: 14.4s
450:	learn: 6.6989539	total: 10.3s	remaining: 12.5s
500:	learn: 6.6265265	total: 10.7s	remaining: 10.6s
550:	learn: 6.5734129	total: 11s	remaining: 9s
600:	learn: 6.5214721	total: 11.4s	remaining: 7.58s
650:	learn: 6.4766232	total: 11.8s	remaining: 6.33s
700:	learn: 6.4189386	total: 12.2s	remaining: 5.21s
750:	learn: 6.3705137	total: 12.6s	remaining: 4.18s
800:	learn: 6.3211150	total: 13s	remaining: 3.23s
850:	learn: 6.2796464	total: 13.4s	remaining: 2.34s
900:	learn: 6.2402344	total: 13.8s	remaining: 1.51s
950:	learn: 6.1951748	t

<catboost.core.CatBoostRegressor at 0x7aa19d172810>

In [131]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
QB_test_pred = X_test.copy()
QB_test_pred['predicted_fantasy_points'] = y_pred

print(QB_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name']])

       season  week opponent_team  predicted_fantasy_points  \
16904    2024    14           MIA                 15.398341   
16933    2024    14           BUF                 16.767097   
16953    2024    14           CLE                 16.336262   
16971    2024    14           MIN                 14.764804   
17095    2024    14           ARI                 17.328012   
17141    2024    14           NYG                 16.465678   
17192    2024    14           PIT                 13.248461   
17272    2024    14            KC                  9.573386   
17368    2024    14           CHI                  9.949140   
17510    2024    14            GB                 17.270025   
17624    2024    14           ATL                  8.258414   
17781    2024    14           CIN                 13.767175   
17869    2024    14           LAC                 20.055098   
18439    2024    14            LV                 17.792246   
18457    2024    14            LA                 21.19

### Adding boom/bust prediction interval

In [132]:
# Finding standard deviation residuals from trian data
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


QB_test_pred['upper_bound'] = ci_upper
QB_test_pred['lower_bound'] = ci_lower

print(QB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
16904    2024    14           MIA     5.165464                 15.398341   
16933    2024    14           BUF     6.534220                 16.767097   
16953    2024    14           CLE     6.103385                 16.336262   
16971    2024    14           MIN     4.531927                 14.764804   
17095    2024    14           ARI     7.095135                 17.328012   
17141    2024    14           NYG     6.232800                 16.465678   
17192    2024    14           PIT     3.015584                 13.248461   
17272    2024    14            KC     0.000000                  9.573386   
17368    2024    14           CHI     0.000000                  9.949140   
17510    2024    14            GB     7.037148                 17.270025   
17624    2024    14           ATL     0.000000                  8.258414   
17781    2024    14           CIN     3.534297                 13.767175   
17869    202

### Evaluating QB CatBoost model predictions

In [133]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = QB_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 66.43
✅ MAE: 5.82


# **RB CatBoost Model**

In [134]:
df_rb = weekly_stats_all[weekly_stats_all['position'] == 'RB'].copy()

# Sorting for time-series purposes
df_rb = df_rb.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features for model
# Takes into account past 3 games (or less, depending on the data available that week)
rolling_cols = [
    'rushing_yards', 'rushing_tds',
    'fantasy_points_ppr', 'carries',
    'rushing_fumbles_lost', 'rushing_epa', 'rushing_2pt_conversions', 'passing_2pt_conversions', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_epa', 'receiving_2pt_conversions', 'target_share', 'offense_snaps', 'offense_pct', 'air_yards_share'
    ]

for col in rolling_cols:
    df_rb[f'{col}_last3'] = df_rb.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Creating season averages for certain stats
expanding_cols = ['rushing_yards', 'rushing_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_rb[col] = pd.to_numeric(df_rb[col], errors='coerce')

for col in expanding_cols:
    df_rb[f'{col}_season_avg'] = (
        df_rb.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )


df_rb['carries'] = pd.to_numeric(df_rb['carries'], errors='coerce')

# Creating yards per carry feature
df_rb['yards_per_attempt'] = df_rb['rushing_yards'] / df_rb['carries'].replace(0, pd.NA)
df_rb['yards_per_attempt'] = pd.to_numeric(df_rb['yards_per_attempt'], errors='coerce')

print(df_rb.head(10))

      player_id player_name player_display_name position position_group  \
68   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
69   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
70   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
71   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
266  00-0027966    M.Ingram         Mark Ingram       RB             RB   
267  00-0027966    M.Ingram         Mark Ingram       RB             RB   
268  00-0027966    M.Ingram         Mark Ingram       RB             RB   
269  00-0027966    M.Ingram         Mark Ingram       RB             RB   
270  00-0027966    M.Ingram         Mark Ingram       RB             RB   
271  00-0027966    M.Ingram         Mark Ingram       RB             RB   

                                          headshot_url recent_team  season  \
68   https://static.www.nfl.com/image/private/f_aut...         TEN    2021   
69   https://stati

In [135]:
# Creating efficiency metrics for RBs
df_rb['carries_shifted'] = df_rb.groupby('player_id')['carries'].shift(1)
df_rb['receptions_shifted'] = df_rb.groupby('player_id')['receptions'].shift(1)


df_rb['yards_per_attempt_last3'] = df_rb.groupby('player_id')['yards_per_attempt'].shift(1).rolling(3,min_periods = 1).mean()
df_rb['rush_td_rate_last3'] = df_rb.groupby('player_id')['rushing_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['carries_shifted']
df_rb['rec_td_rate_last3'] = df_rb.groupby('player_id')['receiving_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['receptions_shifted']

# Crewating lag/rolling features for RB advanced stats
advanced = ['wopr', 'racr']
for col in advanced:
    df_rb[f'{col}_trend'] = df_rb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# Create lag features for opposing defensive stats
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_rb[f'{col}_trend'] = df_rb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()


In [136]:
TARGET_COL = 'fantasy_points_ppr'

# Creating train and test sets
train_df = df_rb[
    (df_rb[TARGET_COL].notna()) &
    (
        (df_rb['season'] < 2024) |
        ((df_rb['season'] == 2024) & (df_rb['week'] < 14))
    )
]

test_df = df_rb[(df_rb['season'] == 2024) & (df_rb['week'] == 14)]


feature_cols = [
    col for col in df_rb.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [137]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    #iterations=200,
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 7.8844507	total: 13.4ms	remaining: 13.4s
50:	learn: 7.1091038	total: 523ms	remaining: 9.74s
100:	learn: 6.7186174	total: 996ms	remaining: 8.87s
150:	learn: 6.5112589	total: 1.51s	remaining: 8.47s
200:	learn: 6.3966107	total: 1.98s	remaining: 7.88s
250:	learn: 6.3243741	total: 2.46s	remaining: 7.35s
300:	learn: 6.2673514	total: 3.32s	remaining: 7.72s
350:	learn: 6.2252575	total: 4.27s	remaining: 7.9s
400:	learn: 6.1873968	total: 5.24s	remaining: 7.82s
450:	learn: 6.1521589	total: 6s	remaining: 7.3s
500:	learn: 6.1211165	total: 6.51s	remaining: 6.49s
550:	learn: 6.0919244	total: 7.01s	remaining: 5.71s
600:	learn: 6.0590259	total: 7.5s	remaining: 4.98s
650:	learn: 6.0330075	total: 7.97s	remaining: 4.27s
700:	learn: 6.0075611	total: 8.46s	remaining: 3.61s
750:	learn: 5.9851419	total: 8.93s	remaining: 2.96s
800:	learn: 5.9593712	total: 9.44s	remaining: 2.35s
850:	learn: 5.9339092	total: 9.93s	remaining: 1.74s
900:	learn: 5.9055668	total: 10.4s	remaining: 1.15s
950:	learn: 5.880843

<catboost.core.CatBoostRegressor at 0x7aa1a0ed4410>

In [138]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
RB_test_pred = X_test.copy()
RB_test_pred['predicted_fantasy_points'] = y_pred

print(RB_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name']])

       season  week opponent_team  predicted_fantasy_points  \
17108    2024    14           CLE                  5.243407   
17290    2024    14            TB                  9.328712   
17470    2024    14           CIN                  5.558331   
17603    2024    14           ATL                 14.405033   
17675    2024    14           LAC                  5.799112   
...       ...   ...           ...                       ...   
22051    2024    14           BUF                  4.237266   
22104    2024    14           MIA                  5.381880   
22117    2024    14           MIA                  4.171702   
22222    2024    14           NYJ                  3.950106   
22444    2024    14           SEA                  4.711971   

         player_display_name  
17108  Cordarrelle Patterson  
17290         Ameer Abdullah  
17470        Ezekiel Elliott  
17603            Aaron Jones  
17675          Samaje Perine  
...                      ...  
22051            Blake Cor

### Adding boom/bust prediction interval

In [139]:
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust intervals
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


RB_test_pred['upper_bound'] = ci_upper
RB_test_pred['lower_bound'] = ci_lower

print(RB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
17108    2024    14           CLE     0.000000                  5.243407   
17290    2024    14            TB     0.000000                  9.328712   
17470    2024    14           CIN     0.000000                  5.558331   
17603    2024    14           ATL     4.700067                 14.405033   
17675    2024    14           LAC     0.000000                  5.799112   
...       ...   ...           ...          ...                       ...   
22051    2024    14           BUF     0.000000                  4.237266   
22104    2024    14           MIA     0.000000                  5.381880   
22117    2024    14           MIA     0.000000                  4.171702   
22222    2024    14           NYJ     0.000000                  3.950106   
22444    2024    14           SEA     0.000000                  4.711971   

       upper_bound    player_display_name  
17108    14.948374  Cordarrelle Patterson  

### Evaluating RB CatBoost model predictions

In [140]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = RB_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 51.45
✅ MAE: 5.08


# **WR CatBoost Model**

In [141]:
df_wr = weekly_stats_all[weekly_stats_all['position'] == 'WR'].copy()

# Sorting for time series purposes
df_wr = df_wr.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features to be used as input for the model
rolling_cols = [
    'rushing_yards', 'rushing_tds',
    'fantasy_points_ppr', 'carries',
    'rushing_epa', 'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_epa', 'receiving_2pt_conversions', 'target_share', 'offense_snaps', 'offense_pct', 'air_yards_share'
    ]

for col in rolling_cols:
    df_wr[f'{col}_last3'] = df_wr.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Creating season averages
expanding_cols = ['receiving_yards', 'receiving_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_wr[col] = pd.to_numeric(df_wr[col], errors='coerce')

for col in expanding_cols:
    df_wr[f'{col}_season_avg'] = (
        df_wr.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )


df_wr['receiving_yards'] = pd.to_numeric(df_wr['receiving_yards'], errors='coerce')
df_wr['receptions'] = pd.to_numeric(df_wr['receptions'], errors='coerce')

# Creating yards per catch data
df_wr['yards_per_catch'] = df_wr['receiving_yards'] / df_wr['receptions'].replace(0, pd.NA)
df_wr['yards_per_catch'] = pd.to_numeric(df_wr['yards_per_catch'], errors='coerce')



In [142]:
# Creating WR efficiency metrics
df_wr['carries_shifted'] = df_wr.groupby('player_id')['carries'].shift(1)
df_wr['receptions_shifted'] = df_wr.groupby('player_id')['receptions'].shift(1)


df_wr['yards_per_reception_last3'] = df_wr.groupby('player_id')['yards_per_catch'].shift(1).rolling(3,min_periods = 1).mean()
df_wr['rec_td_rate_last3'] = df_wr.groupby('player_id')['receiving_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['receptions_shifted']

# Creating rolling/lag features for advanced WR stats
advanced = ['wopr', 'racr']
for col in advanced:
    df_wr[f'{col}_trend'] = df_wr.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# Creating lag/rolling features for opposing defense stats
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_wr[f'{col}_trend'] = df_wr.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

In [143]:
TARGET_COL = 'fantasy_points_ppr'

# Creating test & trian sets
train_df = df_wr[
    (df_wr[TARGET_COL].notna()) &
    (
        (df_wr['season'] < 2024) |
        ((df_wr['season'] == 2024) & (df_wr['week'] < 14))
    )
]

test_df = df_wr[(df_wr['season'] == 2024) & (df_wr['week'] == 14)]


feature_cols = [
    col for col in df_wr.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [144]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 7.7354804	total: 14ms	remaining: 14s
50:	learn: 7.0373176	total: 588ms	remaining: 10.9s
100:	learn: 6.7128962	total: 1.16s	remaining: 10.3s
150:	learn: 6.5449715	total: 1.7s	remaining: 9.57s
200:	learn: 6.4472930	total: 2.29s	remaining: 9.09s
250:	learn: 6.3861382	total: 2.87s	remaining: 8.56s
300:	learn: 6.3393508	total: 3.52s	remaining: 8.18s
350:	learn: 6.3074649	total: 4.71s	remaining: 8.71s
400:	learn: 6.2821407	total: 5.96s	remaining: 8.91s
450:	learn: 6.2567554	total: 6.93s	remaining: 8.43s
500:	learn: 6.2340853	total: 7.51s	remaining: 7.48s
550:	learn: 6.2107628	total: 8.09s	remaining: 6.59s
600:	learn: 6.1924229	total: 8.68s	remaining: 5.76s
650:	learn: 6.1720275	total: 9.29s	remaining: 4.98s
700:	learn: 6.1550782	total: 9.87s	remaining: 4.21s
750:	learn: 6.1342741	total: 10.5s	remaining: 3.48s
800:	learn: 6.1121091	total: 11.1s	remaining: 2.76s
850:	learn: 6.0932859	total: 11.7s	remaining: 2.05s
900:	learn: 6.0732707	total: 12.3s	remaining: 1.35s
950:	learn: 6.05259

<catboost.core.CatBoostRegressor at 0x7aa1a0ef77d0>

In [145]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
WR_test_pred = X_test.copy()
WR_test_pred['predicted_fantasy_points'] = y_pred

print(WR_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name']])

       season  week opponent_team  predicted_fantasy_points  \
16992    2024    14           PHI                 12.697022   
17027    2024    14            SF                 13.145877   
17076    2024    14           LAC                 10.829618   
17120    2024    14           NYJ                  4.354308   
17126    2024    14           CIN                  9.168697   
...       ...   ...           ...                       ...   
22251    2024    14           NYJ                  3.544078   
22285    2024    14           TEN                 14.310319   
22302    2024    14           LAC                  9.110364   
22385    2024    14           CHI                  4.902839   
22424    2024    14            SF                  8.677350   

      player_display_name  
16992        Adam Thielen  
17027        Keenan Allen  
17076     DeAndre Hopkins  
17120       Odell Beckham  
17126       Brandin Cooks  
...                   ...  
22251    Malik Washington  
22285        Brian 

### Adding boom/bust prediction interval

In [146]:
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


WR_test_pred['upper_bound'] = ci_upper
WR_test_pred['lower_bound'] = ci_lower

print(WR_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
16992    2024    14           PHI     2.831348                 12.697022   
17027    2024    14            SF     3.280202                 13.145877   
17076    2024    14           LAC     0.963944                 10.829618   
17120    2024    14           NYJ     0.000000                  4.354308   
17126    2024    14           CIN     0.000000                  9.168697   
...       ...   ...           ...          ...                       ...   
22251    2024    14           NYJ     0.000000                  3.544078   
22285    2024    14           TEN     4.444645                 14.310319   
22302    2024    14           LAC     0.000000                  9.110364   
22385    2024    14           CHI     0.000000                  4.902839   
22424    2024    14            SF     0.000000                  8.677350   

       upper_bound player_display_name  
16992    22.562697        Adam Thielen  
17027

### Evaluating the WR CatBoost model preidctions

In [147]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = WR_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 57.90
✅ MAE: 5.54


# **TE CatBoost Model**

In [148]:
df_te= weekly_stats_all[weekly_stats_all['position'] == 'TE'].copy()

# Sorting for time series purposes
df_te = df_te.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features to be used as model input
rolling_cols = [
    'fantasy_points_ppr', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_epa', 'receiving_2pt_conversions', 'target_share', 'offense_snaps', 'offense_pct', 'air_yards_share'
    ]

for col in rolling_cols:
    df_te[f'{col}_last3'] = df_te.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Creating season averages
expanding_cols = ['receiving_yards', 'receiving_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_te[col] = pd.to_numeric(df_te[col], errors='coerce')

for col in expanding_cols:
    df_te[f'{col}_season_avg'] = (
        df_te.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )


df_te['receiving_yards'] = pd.to_numeric(df_te['receiving_yards'], errors='coerce')
df_te['receptions'] = pd.to_numeric(df_te['receptions'], errors='coerce')

# Creating yards per catch feature
df_te['yards_per_catch'] = df_te['receiving_yards'] / df_te['receptions'].replace(0, pd.NA)
df_te['yards_per_catch'] = pd.to_numeric(df_te['yards_per_catch'], errors='coerce')



print(df_te.head(10))

     player_id player_name player_display_name position position_group  \
54  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
55  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
56  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
57  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
58  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
59  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
60  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
61  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
62  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
63  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   

                                         headshot_url recent_team  season  \
54  https://static.www.nfl.com/image/private/f_aut...          GB    2021   
55  https://static.www.nfl.com/

In [149]:
# Creating efficiency metrics (rolling)
df_te['carries_shifted'] = df_te.groupby('player_id')['carries'].shift(1)
df_te['receptions_shifted'] = df_te.groupby('player_id')['receptions'].shift(1)


df_te['yards_per_reception_last3'] = df_te.groupby('player_id')['yards_per_catch'].shift(1).rolling(3,min_periods = 1).mean()
df_te['rec_td_rate_last3'] = df_te.groupby('player_id')['receiving_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['receptions_shifted']

# Creating rolling features for advanced features
advanced = ['wopr', 'racr']
for col in advanced:
    df_te[f'{col}_trend'] = df_te.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# -------------------------------
# STEP 7: Opponent Defense (already lagged by design)
# -------------------------------
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_te[f'{col}_trend'] = df_te.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

In [150]:
TARGET_COL = 'fantasy_points_ppr'

# test and train splits
train_df = df_te[
    (df_te[TARGET_COL].notna()) &
    (
        (df_te['season'] < 2024) |
        ((df_te['season'] == 2024) & (df_te['week'] < 14))
    )
]

test_df = df_te[(df_te['season'] == 2024) & (df_te['week'] == 14)]


feature_cols = [
    col for col in df_te.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [151]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 6.0043360	total: 10.2ms	remaining: 10.2s
50:	learn: 5.5139502	total: 460ms	remaining: 8.56s
100:	learn: 5.2612506	total: 846ms	remaining: 7.53s
150:	learn: 5.1284839	total: 1.23s	remaining: 6.91s
200:	learn: 5.0491599	total: 1.62s	remaining: 6.44s
250:	learn: 4.9901773	total: 2.01s	remaining: 6s
300:	learn: 4.9456479	total: 2.42s	remaining: 5.62s
350:	learn: 4.9109847	total: 3.19s	remaining: 5.9s
400:	learn: 4.8800247	total: 3.95s	remaining: 5.9s
450:	learn: 4.8487963	total: 4.81s	remaining: 5.85s
500:	learn: 4.8237270	total: 5.65s	remaining: 5.63s
550:	learn: 4.7986172	total: 6.09s	remaining: 4.96s
600:	learn: 4.7725064	total: 6.51s	remaining: 4.32s
650:	learn: 4.7482421	total: 6.91s	remaining: 3.71s
700:	learn: 4.7267429	total: 7.32s	remaining: 3.12s
750:	learn: 4.7011546	total: 7.75s	remaining: 2.57s
800:	learn: 4.6761008	total: 8.17s	remaining: 2.03s
850:	learn: 4.6526628	total: 8.59s	remaining: 1.5s
900:	learn: 4.6258754	total: 9s	remaining: 989ms
950:	learn: 4.5970454	t

<catboost.core.CatBoostRegressor at 0x7aa1a0ef6610>

In [152]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
TE_test_pred = X_test.copy()
TE_test_pred['predicted_fantasy_points'] = y_pred

print(TE_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name']])

       season  week opponent_team  predicted_fantasy_points  \
17057    2024    14           LAC                 14.593815   
17229    2024    14           CLE                  2.987043   
17362    2024    14           JAX                  3.234373   
17585    2024    14           CHI                 11.279746   
17659    2024    14           ARI                  2.971119   
17744    2024    14           CHI                  3.839349   
17843    2024    14           NYJ                 11.708264   
17884    2024    14           TEN                 10.905046   
17894    2024    14           PIT                 12.753433   
18053    2024    14            KC                  6.312394   
18089    2024    14           MIA                  7.436870   
18151    2024    14           PIT                  4.685981   
18254    2024    14           DAL                  5.483110   
18340    2024    14           NYJ                  3.332110   
18372    2024    14           DAL                  8.05

### Adding boom/bust interval using the standard deviation

In [153]:
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


TE_test_pred['upper_bound'] = ci_upper
TE_test_pred['lower_bound'] = ci_lower

print(TE_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
17057    2024    14           LAC     7.016120                 14.593815   
17229    2024    14           CLE     0.000000                  2.987043   
17362    2024    14           JAX     0.000000                  3.234373   
17585    2024    14           CHI     3.702051                 11.279746   
17659    2024    14           ARI     0.000000                  2.971119   
17744    2024    14           CHI     0.000000                  3.839349   
17843    2024    14           NYJ     4.130568                 11.708264   
17884    2024    14           TEN     3.327350                 10.905046   
17894    2024    14           PIT     5.175737                 12.753433   
18053    2024    14            KC     0.000000                  6.312394   
18089    2024    14           MIA     0.000000                  7.436870   
18151    2024    14           PIT     0.000000                  4.685981   
18254    202

### Evaluating predictions

In [154]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = TE_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 13.65
✅ MAE: 2.93


# **Putting together all the prediction data**

In [155]:
QB_test_pred = QB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']]
RB_test_pred = RB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']]
WR_test_pred = WR_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']]
TE_test_pred = TE_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name']]


all_predictions = pd.concat([QB_test_pred, RB_test_pred, WR_test_pred, TE_test_pred], axis= 0, ignore_index = True)

In [156]:
print(all_predictions)

     season  week opponent_team  lower_bound  predicted_fantasy_points  \
0      2024    14           MIA     5.165464                 15.398341   
1      2024    14           BUF     6.534220                 16.767097   
2      2024    14           CLE     6.103385                 16.336262   
3      2024    14           MIN     4.531927                 14.764804   
4      2024    14           ARI     7.095135                 17.328012   
..      ...   ...           ...          ...                       ...   
241    2024    14           PHI     0.000000                  5.931194   
242    2024    14           NYG     0.000000                  2.910450   
243    2024    14           JAX     0.000000                  2.433625   
244    2024    14           SEA     0.000000                  2.307815   
245    2024    14           ARI     0.000000                  4.575119   

     upper_bound    player_display_name  
0      25.631218          Aaron Rodgers  
1      26.999974       Matt

# Putting dataframes back in excel/google drive
*Saving the season & weekly stats data frames, the data frame containing all the player fantasy point predictions, and the data frame containing the schedule & team record*

In [157]:
import os

output_path = '/content/drive/MyDrive/all_fantasy_data.xlsx'

if os.path.exists(output_path):
    os.remove(output_path)

with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
    weekly_stats_all.to_excel(writer, sheet_name='Weekly_Stats', index=False)
    season_stats_all.to_excel(writer, sheet_name='Season_Stats', index=False)
    weekly_matchup_data.to_excel(writer, sheet_name='Team_Schedules', index=False)
    all_predictions.to_excel(writer, sheet_name='Player_Predictions', index=False)