<a href="https://colab.research.google.com/github/bradymiller2310/FantasyFootballDashboard/blob/main/ESPN_FF_Scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install espn-api
!pip install pandas
!pip install openpyxl

#!pip uninstall -y numpy catboost
!pip install numpy==1.24.4  # Safe version compatible with catboost
!pip install catboost --no-cache-dir

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m335.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
from espn_api.football import League
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool

In [None]:
try:
    league = League(league_id=1292514, year=2024, espn_s2="AECPNWyLvfnlcFfE0Xb73O%2BP5ZU9n1G%2FSnP3ixxvuqSwQRcQe7bEQfBuz3YBNeRluTYofdhX2aTajj0jZSF%2B8i9ewdk8Dwf6p9G215F2FjmI06wPKyhjleL4WzaT3PCiP15FDXe55YNMkQeJXgbsCuK1VHbdQ1Nxidq0JxAHezRn2zAt6WwNtSqrF0k2nNHV33VpJqPmMk4msFmaoCaz0UAp95xxy1YmXyEUnZWgA60HJkbsgYkZ8AaESwFENsPYdUvKYqCwnZLjvgI5varhOTDYekjvokdtaCoxJfzOdGLJ3g%3D%3D", swid="{D68FE5F2-46CE-421C-B248-DDA01360BB34}")
    print(league.teams)
except Exception as e:
    print("API Parameters incorrect.")

[Team(Fightin' Furries), Team(Quon Solo), Team(Jen-eral ⚔️), Team(Captain Sweatpants), Team(Salmon LipBalm !!), Team(Graham’s Groupie), Team(pop-pop's bible study), Team(Bucktown Bandits), Team(bird gang), Team(bungalicious  💅)]


## **Getting the fantasy team rosters**
*Will use to join to player data to maintain team assigments*

In [None]:
# Define the range of weeks
weeks = range(1, 23)  # Weeks 1 to 18

# Create an empty list to store data
all_weeks_roster = []

# Loop through each week and get the roster data
for week in weeks:
    week_data = []  # Temporary storage for current week's data

    for team in league.teams:
        team_name = team.team_name  # Fantasy Team Name

        for player in team.roster:
            week_data.append({
                "cur_Fteam": team_name,
                "week": week,
                "espn_id": player.playerId,  # ESPN Unique Player ID
                "cur_roster_slot": player.position if player.lineupSlot != "BE" else "Bench"
            })

    # If no data is returned for this week, stop iterating
    if not week_data:
        print(f"No data found for Week {week}. Stopping iteration.")
        break  # Stop looping when a week has no data

    # Add this week's data to the master list
    all_weeks_roster.extend(week_data)

# Convert to DataFrame
weekly_rosters = pd.DataFrame(all_weeks_roster)

#cur_roster = weekly_rosters[weekly_rosters['week'] == weekly_rosters['week'].max()]
print(weekly_rosters)

             cur_Fteam  week  espn_id cur_roster_slot
0     Fightin' Furries     1  3116406              WR
1     Fightin' Furries     1  4361529              RB
2     Fightin' Furries     1  3116385           Bench
3     Fightin' Furries     1  3139477              QB
4     Fightin' Furries     1  4385690           Bench
...                ...   ...      ...             ...
3515   bungalicious  💅    22  4243537           Bench
3516   bungalicious  💅    22  4689936               K
3517   bungalicious  💅    22  3912547              QB
3518   bungalicious  💅    22   -16011            D/ST
3519   bungalicious  💅    22  4426385              RB

[3520 rows x 4 columns]


### **Getting statistical data from xlsx files**

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
file_path = "/content/drive/My Drive/ff_data.xlsx"

# Load specific sheets
season_stats_all = pd.read_excel(file_path, sheet_name="Season", engine="openpyxl")
weekly_stats_all = pd.read_excel(file_path, sheet_name="Weekly", engine="openpyxl")

# Display first few rows
print(season_stats_all.head())
print(weekly_stats_all.head())

   season   player_id player_display_name team position  games_played  \
0    2021  00-0019596           Tom Brady   TB       QB          20.0   
1    2021  00-0022924  Ben Roethlisberger  PIT       QB          18.0   
2    2021  00-0023459       Aaron Rodgers   GB       QB          17.0   
3    2021  00-0023682    Ryan Fitzpatrick  WAS       QB           1.0   
4    2021  00-0024243      Marcedes Lewis   GB       TE          14.0   

   completions  attempts  comp%  passing_yards  ...  total_st_tds  st_tds_pg  \
0        573.0     847.0  67.65         6187.0  ...           NaN        NaN   
1        448.0     693.0  64.65         4170.0  ...           NaN        NaN   
2        386.0     560.0  68.93         4340.0  ...           NaN        NaN   
3          3.0       6.0  50.00           13.0  ...           NaN        NaN   
4          0.0       0.0    NaN            0.0  ...           NaN        NaN   

   total_fp  fp_pg  espn_id                name   age  height  weight  \
0      

## **Joining current fantasy team roster data with weekly and season data**

### *Season data*

In [None]:
cur_roster = weekly_rosters[weekly_rosters['week'] == weekly_rosters['week'].max()]

season_stats_all = pd.merge(season_stats_all, cur_roster, on="espn_id", how="left")

In [None]:
season_stats_all.loc[
    season_stats_all['season'] == 2024 &
    season_stats_all['cur_Fteam'].isna() &
    season_stats_all['cur_roster_slot'].isna(),
    ['cur_Fteam', 'cur_roster_slot']
] = 'FA'

### *Weekly data*

In [None]:
weekly_stats_all = pd.merge(weekly_stats_all, weekly_rosters, on=["espn_id", "week"], how="left")

weekly_stats_all.loc[
    (weekly_stats_all['season'] == 2024) &
    (weekly_stats_all['cur_Fteam'].isna()) &
    (weekly_stats_all['cur_roster_slot'].isna()),
    ['cur_Fteam', 'cur_roster_slot']
] = 'FA'

In [None]:
#print(weekly_stats_all)

# **Getting matchup data**

In [None]:
total_weeks = league.settings.reg_season_count  # Regular season length

# Initialize records dict
team_records = {team.team_name: {'W': 0, 'L': 0} for team in league.teams}

# Store all weekly matchups and records
matchups_all_weeks = []

# Loop through each week
for week in range(1, total_weeks + 1):
    scoreboard = league.scoreboard(week=week)

    for matchup in scoreboard:
        home = matchup.home_team.team_name
        away = matchup.away_team.team_name
        home_score = matchup.home_score
        away_score = matchup.away_score

        # Determine winner and update records
        if home_score > away_score:
            team_records[home]['W'] += 1
            team_records[away]['L'] += 1
        elif home_score < away_score:
            team_records[away]['W'] += 1
            team_records[home]['L'] += 1
        else:
            # Tie case (optional)
            pass

        # Record after this week
        home_record = f"{team_records[home]['W']}-{team_records[home]['L']}"
        away_record = f"{team_records[away]['W']}-{team_records[away]['L']}"

        matchups_all_weeks.append({
            'week': week,
            'home_team': home,
            'away_team': away,
            'home_score': home_score,
            'away_score': away_score,
            'home_record': home_record,
            'away_record': away_record
        })

# Convert to DataFrame
weekly_matchup_data = pd.DataFrame(matchups_all_weeks)

# Display the result
print(weekly_matchup_data)

    week              home_team              away_team  home_score  \
0      1       Bucktown Bandits       Graham’s Groupie        73.0   
1      1        bungalicious  💅       Fightin' Furries       101.0   
2      1              bird gang      Salmon LipBalm !!        59.0   
3      1  pop-pop's bible study              Quon Solo        79.0   
4      1            Jen-eral ⚔️     Captain Sweatpants        82.0   
..   ...                    ...                    ...         ...   
65    14       Graham’s Groupie       Bucktown Bandits       143.0   
66    14      Salmon LipBalm !!       Fightin' Furries       115.0   
67    14     Captain Sweatpants              Quon Solo        88.0   
68    14            Jen-eral ⚔️  pop-pop's bible study        68.0   
69    14              bird gang        bungalicious  💅        81.0   

    away_score home_record away_record  
0         96.0         0-1         1-0  
1         94.0         1-0         0-1  
2        101.0         0-1         1

# **QB CatBoost Model**

## **Feature Engineering**
*Need to create features that can be used in both train and test sets since we can't use rush yards, pass yards (any game specific stats) because they have not occurred and need to use like rolling features or averages*

In [None]:
df_qb = weekly_stats_all[weekly_stats_all['position'] == 'QB'].copy()

# Sorting data for time-series purposes
df_qb = df_qb.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features for model
# Takes into account past 3 games (or less, depending on the data available that week)
rolling_cols = [
    'passing_yards', 'passing_tds', 'interceptions', 'passing_epa',
    'rushing_yards', 'rushing_tds', 'completions', 'attempts',
    'fantasy_points_ppr', 'passing_air_yards', 'passing_yards_after_catch',
    'rushing_fumbles_lost', 'rushing_epa', 'rushing_2pt_conversions', 'passing_2pt_conversions'
]

for col in rolling_cols:
    df_qb[f'{col}_last3'] = df_qb.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Season averages
expanding_cols = ['passing_yards', 'passing_tds', 'interceptions', 'rushing_yards', 'rushing_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_qb[col] = pd.to_numeric(df_qb[col], errors='coerce')

for col in expanding_cols:
    df_qb[f'{col}_season_avg'] = (
        df_qb.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )

df_qb['completions'] = pd.to_numeric(df_qb['completions'], errors='coerce')
df_qb['attempts'] = pd.to_numeric(df_qb['attempts'], errors='coerce')

# creating yards per attempt feature
df_qb['yards_per_attempt'] = df_qb['passing_yards'] / df_qb['attempts'].replace(0, pd.NA)

# Creating completion percentage feature
df_qb['comp%'] = (
    (df_qb['completions'] / df_qb['attempts']) * 100
).replace([pd.NA, np.inf, -np.inf], np.nan)

df_qb['comp%'] = pd.to_numeric(df_qb['comp%'], errors='coerce')

df_qb['yards_per_attempt'] = pd.to_numeric(df_qb['yards_per_attempt'], errors='coerce')

print(df_qb.head(10))

    player_id player_name player_display_name position position_group  \
0  00-0019596     T.Brady           Tom Brady       QB             QB   
1  00-0019596     T.Brady           Tom Brady       QB             QB   
2  00-0019596     T.Brady           Tom Brady       QB             QB   
3  00-0019596     T.Brady           Tom Brady       QB             QB   
4  00-0019596     T.Brady           Tom Brady       QB             QB   
5  00-0019596     T.Brady           Tom Brady       QB             QB   
6  00-0019596     T.Brady           Tom Brady       QB             QB   
7  00-0019596     T.Brady           Tom Brady       QB             QB   
8  00-0019596     T.Brady           Tom Brady       QB             QB   
9  00-0019596     T.Brady           Tom Brady       QB             QB   

                                        headshot_url recent_team  season  \
0  https://static.www.nfl.com/image/private/f_aut...          TB    2021   
1  https://static.www.nfl.com/image/private/

In [None]:
#Creating some efficiency metrics
df_qb['pass_attempts_shifted'] = df_qb.groupby('player_id')['attempts'].shift(1)

df_qb['comp_pct_last3'] = df_qb.groupby('player_id')['comp%'].shift(1).rolling(3,min_periods = 1).mean()
df_qb['yards_per_attempt_last3'] = df_qb.groupby('player_id')['yards_per_attempt'].shift(1).rolling(3,min_periods = 1).mean()
df_qb['td_rate_last3'] = df_qb.groupby('player_id')['passing_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_qb['pass_attempts_shifted']
df_qb['int_rate_last3'] = df_qb.groupby('player_id')['interceptions'].shift(1).rolling(3,min_periods = 1).sum() / df_qb['pass_attempts_shifted']

# Creating rolling/lag features for advanced QB metrics
advanced = ['pacr', 'dakota']
for col in advanced:
    df_qb[f'{col}_trend'] = df_qb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# Creating defensive lag features
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_qb[f'{col}_trend'] = df_qb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()


In [None]:
#print(df_qb.head(10))

***Getting list of columns to select from***

In [None]:
#print(df_qb.columns.tolist())

In [None]:
TARGET_COL = 'fantasy_points_ppr'

# Creating test and train sets
train_df = df_qb[
    (df_qb[TARGET_COL].notna()) &
    (
        (df_qb['season'] < 2024) |
        ((df_qb['season'] == 2024) & (df_qb['week'] < 14))
    )
]

test_df = df_qb[(df_qb['season'] == 2024) & (df_qb['week'] == 14)]


feature_cols = [
    col for col in df_qb.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [None]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 8.9456128	total: 48.9ms	remaining: 48.8s
50:	learn: 8.1178320	total: 402ms	remaining: 7.48s
100:	learn: 7.6410595	total: 760ms	remaining: 6.77s
150:	learn: 7.3802963	total: 1.13s	remaining: 6.35s
200:	learn: 7.2176080	total: 1.5s	remaining: 5.96s
250:	learn: 7.0782452	total: 1.87s	remaining: 5.58s
300:	learn: 6.9539157	total: 2.25s	remaining: 5.23s
350:	learn: 6.8611232	total: 2.64s	remaining: 4.87s
400:	learn: 6.7691747	total: 3.02s	remaining: 4.52s
450:	learn: 6.6989539	total: 3.41s	remaining: 4.15s
500:	learn: 6.6265265	total: 3.77s	remaining: 3.75s
550:	learn: 6.5734129	total: 4.16s	remaining: 3.39s
600:	learn: 6.5214721	total: 4.52s	remaining: 3s
650:	learn: 6.4766232	total: 4.92s	remaining: 2.63s
700:	learn: 6.4189386	total: 5.33s	remaining: 2.27s
750:	learn: 6.3705137	total: 5.72s	remaining: 1.9s
800:	learn: 6.3211150	total: 6.45s	remaining: 1.6s
850:	learn: 6.2796464	total: 7.24s	remaining: 1.27s
900:	learn: 6.2402344	total: 8.12s	remaining: 892ms
950:	learn: 6.195174

<catboost.core.CatBoostRegressor at 0x7ed00ba4edd0>

In [None]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
QB_test_pred = X_test.copy()
QB_test_pred['predicted_fantasy_points'] = y_pred
#QB_test_pred['espn_id'] = test_df['espn_id']
QB_test_pred['espn_id'] = test_df['espn_id'].astype(int)
QB_test_pred['position'] = "QB"
#print(QB_test_pred.head(10))
#print(QB_test_pred.columns.tolist())

print(QB_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name', 'espn_id', 'position']])

       season  week opponent_team  predicted_fantasy_points  \
20305    2024    14           MIA                 15.398341   
20334    2024    14           BUF                 16.767097   
20354    2024    14           CLE                 16.336262   
20372    2024    14           MIN                 14.764804   
20496    2024    14           ARI                 17.328012   
20542    2024    14           NYG                 16.465678   
20593    2024    14           PIT                 13.248461   
20673    2024    14            KC                  9.573386   
20769    2024    14           CHI                  9.949140   
20911    2024    14            GB                 17.270025   
21025    2024    14           ATL                  8.258414   
21182    2024    14           CIN                 13.767175   
21270    2024    14           LAC                 20.055098   
21840    2024    14            LV                 17.792246   
21858    2024    14            LA                 21.19

### Adding boom/bust prediction interval

In [None]:
# Finding standard deviation residuals from trian data
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


QB_test_pred['upper_bound'] = ci_upper
QB_test_pred['lower_bound'] = ci_lower

print(QB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
20305    2024    14           MIA     5.165464                 15.398341   
20334    2024    14           BUF     6.534220                 16.767097   
20354    2024    14           CLE     6.103385                 16.336262   
20372    2024    14           MIN     4.531927                 14.764804   
20496    2024    14           ARI     7.095135                 17.328012   
20542    2024    14           NYG     6.232800                 16.465678   
20593    2024    14           PIT     3.015584                 13.248461   
20673    2024    14            KC     0.000000                  9.573386   
20769    2024    14           CHI     0.000000                  9.949140   
20911    2024    14            GB     7.037148                 17.270025   
21025    2024    14           ATL     0.000000                  8.258414   
21182    2024    14           CIN     3.534297                 13.767175   
21270    202

### Evaluating QB CatBoost model predictions

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = QB_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 66.43
✅ MAE: 5.82


# **RB CatBoost Model**

In [None]:
df_rb = weekly_stats_all[weekly_stats_all['position'] == 'RB'].copy()

# Sorting for time-series purposes
df_rb = df_rb.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features for model
# Takes into account past 3 games (or less, depending on the data available that week)
rolling_cols = [
    'rushing_yards', 'rushing_tds',
    'fantasy_points_ppr', 'carries',
    'rushing_fumbles_lost', 'rushing_epa', 'rushing_2pt_conversions', 'passing_2pt_conversions', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_epa', 'receiving_2pt_conversions', 'target_share', 'offense_snaps', 'offense_pct', 'air_yards_share'
    ]

for col in rolling_cols:
    df_rb[f'{col}_last3'] = df_rb.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Creating season averages for certain stats
expanding_cols = ['rushing_yards', 'rushing_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_rb[col] = pd.to_numeric(df_rb[col], errors='coerce')

for col in expanding_cols:
    df_rb[f'{col}_season_avg'] = (
        df_rb.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )


df_rb['carries'] = pd.to_numeric(df_rb['carries'], errors='coerce')

# Creating yards per carry feature
df_rb['yards_per_attempt'] = df_rb['rushing_yards'] / df_rb['carries'].replace(0, pd.NA)
df_rb['yards_per_attempt'] = pd.to_numeric(df_rb['yards_per_attempt'], errors='coerce')

print(df_rb.head(10))

      player_id player_name player_display_name position position_group  \
68   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
69   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
70   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
71   00-0025394  A.Peterson     Adrian Peterson       RB             RB   
266  00-0027966    M.Ingram         Mark Ingram       RB             RB   
267  00-0027966    M.Ingram         Mark Ingram       RB             RB   
268  00-0027966    M.Ingram         Mark Ingram       RB             RB   
269  00-0027966    M.Ingram         Mark Ingram       RB             RB   
270  00-0027966    M.Ingram         Mark Ingram       RB             RB   
271  00-0027966    M.Ingram         Mark Ingram       RB             RB   

                                          headshot_url recent_team  season  \
68   https://static.www.nfl.com/image/private/f_aut...         TEN    2021   
69   https://stati

In [None]:
# Creating efficiency metrics for RBs
df_rb['carries_shifted'] = df_rb.groupby('player_id')['carries'].shift(1)
df_rb['receptions_shifted'] = df_rb.groupby('player_id')['receptions'].shift(1)


df_rb['yards_per_attempt_last3'] = df_rb.groupby('player_id')['yards_per_attempt'].shift(1).rolling(3,min_periods = 1).mean()
df_rb['rush_td_rate_last3'] = df_rb.groupby('player_id')['rushing_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['carries_shifted']
df_rb['rec_td_rate_last3'] = df_rb.groupby('player_id')['receiving_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['receptions_shifted']

# Crewating lag/rolling features for RB advanced stats
advanced = ['wopr', 'racr']
for col in advanced:
    df_rb[f'{col}_trend'] = df_rb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# Create lag features for opposing defensive stats
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_rb[f'{col}_trend'] = df_rb.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()


In [None]:
TARGET_COL = 'fantasy_points_ppr'

# Creating train and test sets
train_df = df_rb[
    (df_rb[TARGET_COL].notna()) &
    (
        (df_rb['season'] < 2024) |
        ((df_rb['season'] == 2024) & (df_rb['week'] < 14))
    )
]

test_df = df_rb[(df_rb['season'] == 2024) & (df_rb['week'] == 14)]


feature_cols = [
    col for col in df_rb.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [None]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    #iterations=200,
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 7.8844507	total: 13ms	remaining: 13s
50:	learn: 7.1091038	total: 515ms	remaining: 9.57s
100:	learn: 6.7186174	total: 970ms	remaining: 8.63s
150:	learn: 6.5112589	total: 1.43s	remaining: 8.06s
200:	learn: 6.3966107	total: 2s	remaining: 7.96s
250:	learn: 6.3243741	total: 3.21s	remaining: 9.59s
300:	learn: 6.2673514	total: 4.74s	remaining: 11s
350:	learn: 6.2252575	total: 6.31s	remaining: 11.7s
400:	learn: 6.1873968	total: 8.19s	remaining: 12.2s
450:	learn: 6.1521589	total: 9.73s	remaining: 11.8s
500:	learn: 6.1211165	total: 11s	remaining: 11s
550:	learn: 6.0919244	total: 12.5s	remaining: 10.2s
600:	learn: 6.0590259	total: 13s	remaining: 8.61s
650:	learn: 6.0330075	total: 13.4s	remaining: 7.2s
700:	learn: 6.0075611	total: 13.9s	remaining: 5.92s
750:	learn: 5.9851419	total: 14.3s	remaining: 4.76s
800:	learn: 5.9593712	total: 14.8s	remaining: 3.68s
850:	learn: 5.9339092	total: 15.3s	remaining: 2.68s
900:	learn: 5.9055668	total: 15.8s	remaining: 1.74s
950:	learn: 5.8808430	total: 1

<catboost.core.CatBoostRegressor at 0x7ed00aa53510>

In [None]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
RB_test_pred = X_test.copy()
RB_test_pred['predicted_fantasy_points'] = y_pred
RB_test_pred['espn_id'] = test_df['espn_id'].apply(lambda x: str(int(x)) if pd.notna(x) else np.nan)
RB_test_pred['position'] = "RB"

print(RB_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name', 'espn_id', 'position']])

       season  week opponent_team  predicted_fantasy_points  \
20509    2024    14           CLE                  5.243407   
20691    2024    14            TB                  9.328712   
20871    2024    14           CIN                  5.558331   
21004    2024    14           ATL                 14.405033   
21076    2024    14           LAC                  5.799112   
...       ...   ...           ...                       ...   
25453    2024    14           BUF                  4.237266   
25506    2024    14           MIA                  5.381880   
25519    2024    14           MIA                  4.171702   
25624    2024    14           NYJ                  3.950106   
25846    2024    14           SEA                  4.711971   

         player_display_name  espn_id position  
20509  Cordarrelle Patterson    15807       RB  
20691         Ameer Abdullah  2576336       RB  
20871        Ezekiel Elliott  3051392       RB  
21004            Aaron Jones  3042519       RB 

### Adding boom/bust prediction interval

In [None]:
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust intervals
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


RB_test_pred['upper_bound'] = ci_upper
RB_test_pred['lower_bound'] = ci_lower

print(RB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
20509    2024    14           CLE     0.000000                  5.243407   
20691    2024    14            TB     0.000000                  9.328712   
20871    2024    14           CIN     0.000000                  5.558331   
21004    2024    14           ATL     4.700067                 14.405033   
21076    2024    14           LAC     0.000000                  5.799112   
...       ...   ...           ...          ...                       ...   
25453    2024    14           BUF     0.000000                  4.237266   
25506    2024    14           MIA     0.000000                  5.381880   
25519    2024    14           MIA     0.000000                  4.171702   
25624    2024    14           NYJ     0.000000                  3.950106   
25846    2024    14           SEA     0.000000                  4.711971   

       upper_bound    player_display_name  espn_id position  
20509    14.948374  Corda

### Evaluating RB CatBoost model predictions

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = RB_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 51.45
✅ MAE: 5.08


# **WR CatBoost Model**

In [None]:
df_wr = weekly_stats_all[weekly_stats_all['position'] == 'WR'].copy()

# Sorting for time series purposes
df_wr = df_wr.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features to be used as input for the model
rolling_cols = [
    'rushing_yards', 'rushing_tds',
    'fantasy_points_ppr', 'carries',
    'rushing_epa', 'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_epa', 'receiving_2pt_conversions', 'target_share', 'offense_snaps', 'offense_pct', 'air_yards_share'
    ]

for col in rolling_cols:
    df_wr[f'{col}_last3'] = df_wr.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Creating season averages
expanding_cols = ['receiving_yards', 'receiving_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_wr[col] = pd.to_numeric(df_wr[col], errors='coerce')

for col in expanding_cols:
    df_wr[f'{col}_season_avg'] = (
        df_wr.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )


df_wr['receiving_yards'] = pd.to_numeric(df_wr['receiving_yards'], errors='coerce')
df_wr['receptions'] = pd.to_numeric(df_wr['receptions'], errors='coerce')

# Creating yards per catch data
df_wr['yards_per_catch'] = df_wr['receiving_yards'] / df_wr['receptions'].replace(0, pd.NA)
df_wr['yards_per_catch'] = pd.to_numeric(df_wr['yards_per_catch'], errors='coerce')



In [None]:
# Creating WR efficiency metrics
df_wr['carries_shifted'] = df_wr.groupby('player_id')['carries'].shift(1)
df_wr['receptions_shifted'] = df_wr.groupby('player_id')['receptions'].shift(1)


df_wr['yards_per_reception_last3'] = df_wr.groupby('player_id')['yards_per_catch'].shift(1).rolling(3,min_periods = 1).mean()
df_wr['rec_td_rate_last3'] = df_wr.groupby('player_id')['receiving_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['receptions_shifted']

# Creating rolling/lag features for advanced WR stats
advanced = ['wopr', 'racr']
for col in advanced:
    df_wr[f'{col}_trend'] = df_wr.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# Creating lag/rolling features for opposing defense stats
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_wr[f'{col}_trend'] = df_wr.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

In [None]:
TARGET_COL = 'fantasy_points_ppr'

# Creating test & trian sets
train_df = df_wr[
    (df_wr[TARGET_COL].notna()) &
    (
        (df_wr['season'] < 2024) |
        ((df_wr['season'] == 2024) & (df_wr['week'] < 14))
    )
]

test_df = df_wr[(df_wr['season'] == 2024) & (df_wr['week'] == 14)]


feature_cols = [
    col for col in df_wr.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [None]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 7.7354804	total: 22.3ms	remaining: 22.3s
50:	learn: 7.0373176	total: 638ms	remaining: 11.9s
100:	learn: 6.7128962	total: 1.21s	remaining: 10.8s
150:	learn: 6.5449715	total: 1.76s	remaining: 9.89s
200:	learn: 6.4472930	total: 2.33s	remaining: 9.26s
250:	learn: 6.3861382	total: 2.92s	remaining: 8.7s
300:	learn: 6.3393508	total: 3.51s	remaining: 8.15s
350:	learn: 6.3074649	total: 4.31s	remaining: 7.97s
400:	learn: 6.2821407	total: 5.57s	remaining: 8.31s
450:	learn: 6.2567554	total: 6.81s	remaining: 8.29s
500:	learn: 6.2340853	total: 7.45s	remaining: 7.42s
550:	learn: 6.2107628	total: 8.02s	remaining: 6.54s
600:	learn: 6.1924229	total: 8.6s	remaining: 5.71s
650:	learn: 6.1720275	total: 9.18s	remaining: 4.92s
700:	learn: 6.1550782	total: 9.73s	remaining: 4.15s
750:	learn: 6.1342741	total: 10.3s	remaining: 3.43s
800:	learn: 6.1121091	total: 10.9s	remaining: 2.72s
850:	learn: 6.0932859	total: 11.8s	remaining: 2.07s
900:	learn: 6.0732707	total: 12.4s	remaining: 1.37s
950:	learn: 6.05

<catboost.core.CatBoostRegressor at 0x7ed00a7e2c90>

In [None]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
WR_test_pred = X_test.copy()
WR_test_pred['predicted_fantasy_points'] = y_pred
WR_test_pred['espn_id'] = test_df['espn_id'].apply(lambda x: str(int(x)) if pd.notna(x) else np.nan)
WR_test_pred['position'] = "WR"

print(WR_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name', 'espn_id', 'position']])

       season  week opponent_team  predicted_fantasy_points  \
20393    2024    14           PHI                 12.697022   
20428    2024    14            SF                 13.145877   
20477    2024    14           LAC                 10.829618   
20521    2024    14           NYJ                  4.354308   
20527    2024    14           CIN                  9.168697   
...       ...   ...           ...                       ...   
25653    2024    14           NYJ                  3.544078   
25687    2024    14           TEN                 14.310319   
25704    2024    14           LAC                  9.110364   
25787    2024    14           CHI                  4.902839   
25826    2024    14            SF                  8.677350   

      player_display_name  espn_id position  
20393        Adam Thielen    16460       WR  
20428        Keenan Allen    15818       WR  
20477     DeAndre Hopkins    15795       WR  
20521       Odell Beckham    16733       WR  
20527       B

### Adding boom/bust prediction interval

In [None]:
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


WR_test_pred['upper_bound'] = ci_upper
WR_test_pred['lower_bound'] = ci_lower

print(WR_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
20393    2024    14           PHI     2.831348                 12.697022   
20428    2024    14            SF     3.280202                 13.145877   
20477    2024    14           LAC     0.963944                 10.829618   
20521    2024    14           NYJ     0.000000                  4.354308   
20527    2024    14           CIN     0.000000                  9.168697   
...       ...   ...           ...          ...                       ...   
25653    2024    14           NYJ     0.000000                  3.544078   
25687    2024    14           TEN     4.444645                 14.310319   
25704    2024    14           LAC     0.000000                  9.110364   
25787    2024    14           CHI     0.000000                  4.902839   
25826    2024    14            SF     0.000000                  8.677350   

       upper_bound player_display_name  espn_id position  
20393    22.562697        Ad

### Evaluating the WR CatBoost model preidctions

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = WR_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 57.90
✅ MAE: 5.54


# **TE CatBoost Model**

In [None]:
df_te= weekly_stats_all[weekly_stats_all['position'] == 'TE'].copy()

# Sorting for time series purposes
df_te = df_te.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features to be used as model input
rolling_cols = [
    'fantasy_points_ppr', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_epa', 'receiving_2pt_conversions', 'target_share', 'offense_snaps', 'offense_pct', 'air_yards_share'
    ]

for col in rolling_cols:
    df_te[f'{col}_last3'] = df_te.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Creating season averages
expanding_cols = ['receiving_yards', 'receiving_tds', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_te[col] = pd.to_numeric(df_te[col], errors='coerce')

for col in expanding_cols:
    df_te[f'{col}_season_avg'] = (
        df_te.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )


df_te['receiving_yards'] = pd.to_numeric(df_te['receiving_yards'], errors='coerce')
df_te['receptions'] = pd.to_numeric(df_te['receptions'], errors='coerce')

# Creating yards per catch feature
df_te['yards_per_catch'] = df_te['receiving_yards'] / df_te['receptions'].replace(0, pd.NA)
df_te['yards_per_catch'] = pd.to_numeric(df_te['yards_per_catch'], errors='coerce')



print(df_te.head(10))

     player_id player_name player_display_name position position_group  \
54  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
55  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
56  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
57  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
58  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
59  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
60  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
61  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
62  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   
63  00-0024243     M.Lewis      Marcedes Lewis       TE             TE   

                                         headshot_url recent_team  season  \
54  https://static.www.nfl.com/image/private/f_aut...          GB    2021   
55  https://static.www.nfl.com/

In [None]:
# Creating efficiency metrics (rolling)
df_te['carries_shifted'] = df_te.groupby('player_id')['carries'].shift(1)
df_te['receptions_shifted'] = df_te.groupby('player_id')['receptions'].shift(1)


df_te['yards_per_reception_last3'] = df_te.groupby('player_id')['yards_per_catch'].shift(1).rolling(3,min_periods = 1).mean()
df_te['rec_td_rate_last3'] = df_te.groupby('player_id')['receiving_tds'].shift(1).rolling(3,min_periods = 1).sum() / df_rb['receptions_shifted']

# Creating rolling features for advanced features
advanced = ['wopr', 'racr']
for col in advanced:
    df_te[f'{col}_trend'] = df_te.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

# -------------------------------
# STEP 7: Opponent Defense (already lagged by design)
# -------------------------------
opponent_cols = ['opp_avg_ypg_allowed', 'opp_avg_ppg_allowed', 'opp_int_pg', 'opp_sacks_pg', 'opp_fumbles_pg']
for col in opponent_cols:
    df_te[f'{col}_trend'] = df_te.groupby('player_id')[col].shift(1).rolling(3,min_periods = 1).mean()

In [None]:
TARGET_COL = 'fantasy_points_ppr'

# test and train splits
train_df = df_te[
    (df_te[TARGET_COL].notna()) &
    (
        (df_te['season'] < 2024) |
        ((df_te['season'] == 2024) & (df_te['week'] < 14))
    )
]

test_df = df_te[(df_te['season'] == 2024) & (df_te['week'] == 14)]


feature_cols = [
    col for col in df_te.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [None]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

0:	learn: 6.0043360	total: 9.82ms	remaining: 9.81s
50:	learn: 5.5139502	total: 410ms	remaining: 7.63s
100:	learn: 5.2612506	total: 812ms	remaining: 7.22s
150:	learn: 5.1284839	total: 1.17s	remaining: 6.59s
200:	learn: 5.0491599	total: 1.53s	remaining: 6.1s
250:	learn: 4.9901773	total: 1.93s	remaining: 5.76s
300:	learn: 4.9456479	total: 2.31s	remaining: 5.37s
350:	learn: 4.9109847	total: 3.01s	remaining: 5.57s
400:	learn: 4.8800247	total: 3.41s	remaining: 5.1s
450:	learn: 4.8487963	total: 3.82s	remaining: 4.65s
500:	learn: 4.8237270	total: 4.21s	remaining: 4.19s
550:	learn: 4.7986172	total: 4.59s	remaining: 3.75s
600:	learn: 4.7725064	total: 5.02s	remaining: 3.33s
650:	learn: 4.7482421	total: 5.41s	remaining: 2.9s
700:	learn: 4.7267429	total: 5.83s	remaining: 2.49s
750:	learn: 4.7011546	total: 6.24s	remaining: 2.07s
800:	learn: 4.6761008	total: 6.66s	remaining: 1.65s
850:	learn: 4.6526628	total: 7.1s	remaining: 1.24s
900:	learn: 4.6258754	total: 7.51s	remaining: 825ms
950:	learn: 4.5970

<catboost.core.CatBoostRegressor at 0x7ed00a78a190>

In [None]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
TE_test_pred = X_test.copy()
TE_test_pred['predicted_fantasy_points'] = y_pred
TE_test_pred['espn_id'] = test_df['espn_id'].apply(lambda x: str(int(x)) if pd.notna(x) else np.nan)
TE_test_pred['position'] = "TE"


print(TE_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name', 'espn_id', 'position']])

       season  week opponent_team  predicted_fantasy_points  \
20458    2024    14           LAC                 14.593815   
20630    2024    14           CLE                  2.987043   
20763    2024    14           JAX                  3.234373   
20986    2024    14           CHI                 11.279746   
21060    2024    14           ARI                  2.971119   
21145    2024    14           CHI                  3.839349   
21244    2024    14           NYJ                 11.708264   
21285    2024    14           TEN                 10.905046   
21295    2024    14           PIT                 12.753433   
21454    2024    14            KC                  6.312394   
21490    2024    14           MIA                  7.436870   
21552    2024    14           PIT                  4.685981   
21655    2024    14           DAL                  5.483110   
21741    2024    14           NYJ                  3.332110   
21773    2024    14           DAL                  8.05

### Adding boom/bust interval using the standard deviation

In [None]:
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


TE_test_pred['upper_bound'] = ci_upper
TE_test_pred['lower_bound'] = ci_lower

print(TE_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']])

       season  week opponent_team  lower_bound  predicted_fantasy_points  \
20458    2024    14           LAC     7.016120                 14.593815   
20630    2024    14           CLE     0.000000                  2.987043   
20763    2024    14           JAX     0.000000                  3.234373   
20986    2024    14           CHI     3.702051                 11.279746   
21060    2024    14           ARI     0.000000                  2.971119   
21145    2024    14           CHI     0.000000                  3.839349   
21244    2024    14           NYJ     4.130568                 11.708264   
21285    2024    14           TEN     3.327350                 10.905046   
21295    2024    14           PIT     5.175737                 12.753433   
21454    2024    14            KC     0.000000                  6.312394   
21490    2024    14           MIA     0.000000                  7.436870   
21552    2024    14           PIT     0.000000                  4.685981   
21655    202

### Evaluating predictions

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Actuals and predictions
y_true = y_test
y_pred = TE_test_pred['predicted_fantasy_points']

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 13.65
✅ MAE: 2.93


# **K CatBoost Model**

In [None]:
df_k = weekly_stats_all[weekly_stats_all['position'] == 'K'].copy()

# Sorting data for time-series purposes
df_k = df_k.sort_values(by=['player_id', 'season', 'week'])

# Creating lag/rolling features for model
# Takes into account past 3 games (or less, depending on the data available that week)
rolling_cols = [
    'fg_made','fg_att','fg_pct','fg_blocked','fg_long','fg_made_0_19','fg_made_20_29','fg_made_30_39','fg_made_40_49','fg_made_50_59','fg_made_60_',
    'fg_missed_0_19','fg_missed_20_29','fg_missed_30_39','fg_missed_40_49','fg_missed_50_59','fg_missed_60_','pat_made','pat_att','pat_blocked','pat_pct'
]

for col in rolling_cols:
    df_k[f'{col}_last3'] = df_k.groupby('player_id')[col].shift(1).rolling(3, min_periods = 1).mean()

# Season averages
expanding_cols = ['fg_made', 'fg_pct', 'pat_made', 'fantasy_points_ppr'
]

for col in expanding_cols:
    df_k[col] = pd.to_numeric(df_k[col], errors='coerce')

for col in expanding_cols:
    df_k[f'{col}_season_avg'] = (
        df_k.groupby(['player_id', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )

print(df_k.head(10))

        player_id player_name player_display_name position position_group  \
16892  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16893  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16894  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16895  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16896  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16897  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16898  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16899  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16900  00-0023252     R.Gould        Robbie Gould        K           SPEC   
16901  00-0023252     R.Gould        Robbie Gould        K           SPEC   

                                            headshot_url recent_team  season  \
16892  https://static.www.nfl.com/image/private/f_aut...         NaN    

In [None]:
TARGET_COL = 'fantasy_points_ppr'

# Creating test and train sets
train_df = df_k[
    (df_k[TARGET_COL].notna()) &
    (
        (df_k['season'] < 2024) |
        ((df_k['season'] == 2024) & (df_k['week'] < 14))
    )
]

test_df = df_k[(df_k['season'] == 2024) & (df_k['week'] == 14)]


feature_cols = [
    col for col in df_k.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['player_display_name','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [None]:
categorical_cols = ['opponent_team', 'season', 'week', 'player_display_name']

for col in categorical_cols:
    X_train[col] = X_train[col].fillna('Unknown').astype(str)  # Fill NaN with 'Unknown' and convert to string
    X_test[col] = X_test[col].fillna('Unknown').astype(str)  # Fill NaN with 'Unknown' and convert to string

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[col] = X_train[col].fillna('Unknown').astype(str)  # Fill NaN with 'Unknown' and convert to string
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test[col] = X_test[col].fillna('Unknown').astype(str)  # Fill NaN with 'Unknown' and convert to string
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-

0:	learn: 4.3116635	total: 4.13ms	remaining: 4.13s
50:	learn: 4.2747010	total: 169ms	remaining: 3.15s
100:	learn: 4.2467617	total: 311ms	remaining: 2.77s
150:	learn: 4.2172608	total: 451ms	remaining: 2.53s
200:	learn: 4.1921491	total: 597ms	remaining: 2.37s
250:	learn: 4.1682211	total: 746ms	remaining: 2.23s
300:	learn: 4.1392459	total: 885ms	remaining: 2.05s
350:	learn: 4.1148018	total: 1.03s	remaining: 1.9s
400:	learn: 4.0889904	total: 1.21s	remaining: 1.8s
450:	learn: 4.0670395	total: 1.35s	remaining: 1.64s
500:	learn: 4.0416744	total: 1.5s	remaining: 1.49s
550:	learn: 4.0176939	total: 1.65s	remaining: 1.34s
600:	learn: 3.9958751	total: 1.79s	remaining: 1.19s
650:	learn: 3.9701747	total: 1.94s	remaining: 1.04s
700:	learn: 3.9464132	total: 2.12s	remaining: 905ms
750:	learn: 3.9199825	total: 2.27s	remaining: 752ms
800:	learn: 3.8937295	total: 2.42s	remaining: 600ms
850:	learn: 3.8637280	total: 2.56s	remaining: 449ms
900:	learn: 3.8400007	total: 2.71s	remaining: 298ms
950:	learn: 3.814

<catboost.core.CatBoostRegressor at 0x7ed009f87b90>

In [None]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
K_test_pred = X_test.copy()
K_test_pred['predicted_fantasy_points'] = y_pred
K_test_pred['espn_id'] = test_df['espn_id'].apply(lambda x: str(int(x)) if pd.notna(x) else np.nan)

K_test_pred['position'] = "K"

print(K_test_pred[['season', 'week', 'opponent_team', 'predicted_fantasy_points', 'player_display_name', 'espn_id', 'position']])

      season week opponent_team  predicted_fantasy_points player_display_name  \
25864   2024   14       Unknown                  8.484485           Nick Folk   
25870   2024   14       Unknown                  8.266133         Graham Gano   
25908   2024   14       Unknown                  8.917875     Brandon McManus   
25926   2024   14       Unknown                  8.336803      Dustin Hopkins   
25942   2024   14       Unknown                  8.539275       Chris Boswell   
25960   2024   14       Unknown                  8.258203        Cairo Santos   
25977   2024   14       Unknown                  8.876205         Jason Myers   
26045   2024   14       Unknown                  8.088940        Younghoe Koo   
26059   2024   14       Unknown                  8.788159        Jake Elliott   
26089   2024   14       Unknown                  8.678935      Daniel Carlson   
26106   2024   14       Unknown                  8.119678        Eddy Pineiro   
26131   2024   14       Unkn

In [None]:
# Finding standard deviation residuals from trian data
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
ci_lower = np.maximum(ci_lower, 0)


K_test_pred['upper_bound'] = ci_upper
K_test_pred['lower_bound'] = ci_lower

print(K_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id']])

      season week opponent_team  lower_bound  predicted_fantasy_points  \
25864   2024   14       Unknown     2.006322                  8.484485   
25870   2024   14       Unknown     1.787970                  8.266133   
25908   2024   14       Unknown     2.439712                  8.917875   
25926   2024   14       Unknown     1.858640                  8.336803   
25942   2024   14       Unknown     2.061112                  8.539275   
25960   2024   14       Unknown     1.780041                  8.258203   
25977   2024   14       Unknown     2.398042                  8.876205   
26045   2024   14       Unknown     1.610777                  8.088940   
26059   2024   14       Unknown     2.309996                  8.788159   
26089   2024   14       Unknown     2.200772                  8.678935   
26106   2024   14       Unknown     1.641515                  8.119678   
26131   2024   14       Unknown     1.914198                  8.392361   
26149   2024   14       Unknown     1.

In [None]:
# Actuals and predictions
y_true = y_test.dropna()  # Drop NaN values from y_true
y_pred = K_test_pred['predicted_fantasy_points'][y_true.index]

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 23.45
✅ MAE: 3.99


# **DEF CatBoost Model**

In [None]:
df_def = weekly_stats_all[weekly_stats_all['player_id'] == 'defense'].copy()

# Sorting data for time-series purposes
df_def = df_def.sort_values(by=['team', 'season', 'week'])

# Creating lag/rolling features for model
# Takes into account past 3 games (or less, depending on the data available that week)
rolling_cols = [
    'interceptions', 'sacks', 'df_tds', 'total_points_allowed', 'blocked_kicks'
]

for col in rolling_cols:
    df_def[f'{col}_last3'] = df_def.groupby('team')[col].shift(1).rolling(3, min_periods = 1).mean()

# Season averages
expanding_cols = [
    'interceptions', 'sacks', 'total_points_allowed', 'fantasy_points'
]

for col in expanding_cols:
    df_def[col] = pd.to_numeric(df_def[col], errors='coerce')

for col in expanding_cols:
    df_def[f'{col}_season_avg'] = (
        df_def.groupby(['team', 'season'])[col]
        .transform(lambda x: x.shift(1).expanding().mean())
    )

print(df_def.head(10))

      player_id player_name player_display_name position position_group  \
18611   defense         NaN                 NaN      NaN            NaN   
18633   defense         NaN                 NaN      NaN            NaN   
18659   defense         NaN                 NaN      NaN            NaN   
18693   defense         NaN                 NaN      NaN            NaN   
18738   defense         NaN                 NaN      NaN            NaN   
18748   defense         NaN                 NaN      NaN            NaN   
18780   defense         NaN                 NaN      NaN            NaN   
18806   defense         NaN                 NaN      NaN            NaN   
18852   defense         NaN                 NaN      NaN            NaN   
18858   defense         NaN                 NaN      NaN            NaN   

      headshot_url recent_team  season  week season_type  ... cur_roster_slot  \
18611          NaN         NaN    2021     1         NaN  ...             NaN   
18633       

In [None]:
TARGET_COL = 'fantasy_points'

# Creating test and train sets
train_df = df_def[
    (df_def[TARGET_COL].notna()) &
    (
        (df_def['season'] < 2024) |
        ((df_def['season'] == 2024) & (df_def['week'] < 14))
    )
]

test_df = df_def[(df_def['season'] == 2024) & (df_def['week'] == 14)]


feature_cols = [
    col for col in df_def.columns
    if col.endswith('_last3') or col.endswith('_trend') or col.endswith('_season_avg')
] + ['team','season', 'week', 'opponent_team']


X_train = train_df[feature_cols]
y_train = train_df[TARGET_COL]

X_test = test_df[feature_cols]

# for RMSE testing purposes on the test set
y_test = test_df[TARGET_COL]

In [None]:
categorical_cols = ['season', 'week', 'team']

for col in categorical_cols:
    X_train[col] = X_train[col].fillna('Unknown').astype(str)
    X_test[col] = X_test[col].fillna('Unknown').astype(str)

# Create Pool objects
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_cols)
test_pool = Pool(data=X_test, cat_features=categorical_cols)

# Initialize the CatBoost model
model = CatBoostRegressor(
    iterations = 1000,
    learning_rate = 0.01,
    depth=6,
    loss_function='RMSE',
    random_seed=42,
    verbose=50  # or 0 to suppress output
)

# Train
model.fit(train_pool)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[col] = X_train[col].fillna('Unknown').astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test[col] = X_test[col].fillna('Unknown').astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[col] = X_train[col].fillna('Unknown').astype(str)
A value is trying to be set on a 

0:	learn: 5.7263801	total: 3.49ms	remaining: 3.49s
50:	learn: 5.6594540	total: 130ms	remaining: 2.41s
100:	learn: 5.6037054	total: 273ms	remaining: 2.43s
150:	learn: 5.5576553	total: 411ms	remaining: 2.31s
200:	learn: 5.5193042	total: 541ms	remaining: 2.15s
250:	learn: 5.4935801	total: 853ms	remaining: 2.54s
300:	learn: 5.4562785	total: 1.11s	remaining: 2.58s
350:	learn: 5.4272227	total: 1.61s	remaining: 2.98s
400:	learn: 5.4059136	total: 2.19s	remaining: 3.27s
450:	learn: 5.3835135	total: 2.45s	remaining: 2.98s
500:	learn: 5.3648388	total: 2.69s	remaining: 2.68s
550:	learn: 5.3383442	total: 3.04s	remaining: 2.48s
600:	learn: 5.3131021	total: 3.37s	remaining: 2.23s
650:	learn: 5.2860897	total: 3.76s	remaining: 2.02s
700:	learn: 5.2634305	total: 3.91s	remaining: 1.67s
750:	learn: 5.2369504	total: 4.04s	remaining: 1.34s
800:	learn: 5.2149027	total: 4.18s	remaining: 1.04s
850:	learn: 5.1916358	total: 4.32s	remaining: 756ms
900:	learn: 5.1677579	total: 4.48s	remaining: 492ms
950:	learn: 5.

<catboost.core.CatBoostRegressor at 0x7ed00a7e1f50>

In [None]:
y_pred = model.predict(test_pool)

# Attach predictions to test set for review
DEF_test_pred = X_test.copy()
DEF_test_pred['predicted_fantasy_points'] = y_pred
DEF_test_pred['espn_id'] = test_df['espn_id'].apply(lambda x: str(int(x)) if pd.notna(x) else np.nan)
DEF_test_pred['position'] = "DEF"

print(DEF_test_pred[['season', 'week', 'predicted_fantasy_points', 'team', 'espn_id', 'position']])

      season week  predicted_fantasy_points team espn_id position
26831   2024   14                  4.491918  ARI  -16022      DEF
26825   2024   14                  4.646357  ATL  -16001      DEF
26821   2024   14                  6.261044  BUF  -16002      DEF
26829   2024   14                  3.982936  CAR  -16029      DEF
26832   2024   14                  5.516650  CHI  -16003      DEF
26816   2024   14                  4.107146  CIN  -16004      DEF
26830   2024   14                  4.599554  CLE  -16005      DEF
26814   2024   14                  5.626609  DAL  -16015      DEF
26818   2024   14                  4.760775  DET  -16008      DEF
26817   2024   14                  5.585654   GB  -16009      DEF
26834   2024   14                  3.765487  JAX  -16030      DEF
26822   2024   14                  4.565247   KC  -16012      DEF
26811   2024   14                  4.524818   LA  -16014      DEF
26820   2024   14                  5.078927  LAC  -16024      DEF
26833   20

In [None]:
# Finding standard deviation residuals from train data
train_preds = model.predict(train_pool)
residuals = y_train - train_preds
std_resid = residuals.std()

# Creating boom/bust interval
ci_upper = y_pred + 1.645 * std_resid
ci_lower = y_pred - 1.645 * std_resid

# setting minimum to 0
#ci_lower = np.maximum(ci_lower, 0)


DEF_test_pred['upper_bound'] = ci_upper
DEF_test_pred['lower_bound'] = ci_lower

print(DEF_test_pred[['season', 'week', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'team', 'espn_id']])
print(DEF_test_pred.columns.tolist())

      season week  lower_bound  predicted_fantasy_points  upper_bound team  \
26831   2024   14    -4.216260                  4.491918    13.200095  ARI   
26825   2024   14    -4.061820                  4.646357    13.354535  ATL   
26821   2024   14    -2.447133                  6.261044    14.969222  BUF   
26829   2024   14    -4.725242                  3.982936    12.691113  CAR   
26832   2024   14    -3.191528                  5.516650    14.224827  CHI   
26816   2024   14    -4.601032                  4.107146    12.815324  CIN   
26830   2024   14    -4.108624                  4.599554    13.307731  CLE   
26814   2024   14    -3.081568                  5.626609    14.334787  DAL   
26818   2024   14    -3.947403                  4.760775    13.468952  DET   
26817   2024   14    -3.122524                  5.585654    14.293831   GB   
26834   2024   14    -4.942690                  3.765487    12.473665  JAX   
26822   2024   14    -4.142930                  4.565247    13.2

In [None]:
# Actuals and predictions
y_true = y_test.dropna()  # Drop NaN values from y_true
y_pred = DEF_test_pred['predicted_fantasy_points'][y_true.index]

# RMSE and MAE
rmse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAE: {mae:.2f}")

✅ RMSE: 32.36
✅ MAE: 4.49


In [None]:
DEF_test_pred['player_display_name'] = DEF_test_pred['team']
print(DEF_test_pred.head())

print(DEF_test_pred.columns.tolist())

       interceptions_last3  sacks_last3  df_tds_last3  \
26831             0.333333     4.333333      0.000000   
26825             0.000000     2.000000      0.000000   
26821             1.666667     2.333333      0.333333   
26829             1.333333     3.666667      0.000000   
26832             0.333333     2.000000      0.000000   

       total_points_allowed_last3  blocked_kicks_last3  \
26831                   15.666667             0.000000   
26825                   20.000000             0.333333   
26821                   19.666667             0.000000   
26829                   24.666667             0.000000   
26832                   21.333333             0.666667   

       interceptions_season_avg  sacks_season_avg  \
26831                  0.500000          2.833333   
26825                  0.583333          1.250000   
26821                  1.083333          2.333333   
26829                  0.583333          1.750000   
26832                  0.750000          2.

# **Putting together all the prediction data**

In [None]:
QB_test_pred = QB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']]
RB_test_pred = RB_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']]
WR_test_pred = WR_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']]
TE_test_pred = TE_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']]
K_test_pred = K_test_pred[['season', 'week', 'opponent_team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']]
DEF_test_pred = DEF_test_pred[['season', 'week', 'team', 'lower_bound', 'predicted_fantasy_points', 'upper_bound', 'player_display_name', 'espn_id', 'position']]

all_predictions = pd.concat([QB_test_pred, RB_test_pred, WR_test_pred, TE_test_pred, K_test_pred, DEF_test_pred], axis= 0, ignore_index = True)

#all_predictions['espn_id'] = all_predictions['espn_id'].apply(lambda x: int(x) if pd.notna(x) else np.nan)
#all_predictions['espn_id'] = all_predictions['espn_id'].astype('Int64')

all_predictions['espn_id'] = pd.to_numeric(all_predictions['espn_id'], errors='coerce').astype('Int64')

In [None]:
print(all_predictions)

    season week opponent_team  lower_bound  predicted_fantasy_points  \
0     2024   14           MIA     5.165464                 15.398341   
1     2024   14           BUF     6.534220                 16.767097   
2     2024   14           CLE     6.103385                 16.336262   
3     2024   14           MIN     4.531927                 14.764804   
4     2024   14           ARI     7.095135                 17.328012   
..     ...  ...           ...          ...                       ...   
293   2024   14           NaN    -3.282867                  5.425311   
294   2024   14           NaN    -3.731129                  4.977049   
295   2024   14           NaN    -3.539009                  5.169169   
296   2024   14           NaN    -4.667551                  4.040627   
297   2024   14           NaN    -4.562051                  4.146126   

     upper_bound player_display_name  espn_id position team  
0      25.631218       Aaron Rodgers     8439       QB  NaN  
1      26.9

### Data Cleaning Before Uploading

In [None]:
season_stats_all.loc[season_stats_all['player_id'] == 'defense', 'position'] = 'DEF'
weekly_stats_all.loc[weekly_stats_all['player_id'] == 'defense', 'position'] = 'DEF'

season_stats_all.loc[season_stats_all['player_id'] == 'defense', 'player_display_name'] = season_stats_all.loc[season_stats_all['player_id'] == 'defense', 'team']
weekly_stats_all.loc[weekly_stats_all['player_id'] == 'defense', 'player_display_name'] = weekly_stats_all.loc[weekly_stats_all['player_id'] == 'defense', 'team']

season_stats_all.loc[season_stats_all['player_id'] == 'defense', 'PPR_ppg'] = season_stats_all.loc[season_stats_all['player_id'] == 'defense', 'fp_pg']
season_stats_all.loc[season_stats_all['player_id'] == 'defense', 'PPR_points'] = season_stats_all.loc[season_stats_all['player_id'] == 'defense', 'total_fp']

In [None]:
home = weekly_matchup_data[['week', 'home_team', 'home_score', 'away_score']].copy()
home.columns = ['week', 'fantasy_team', 'points_scored', 'points_allowed']

# Create away team rows
away = weekly_matchup_data[['week', 'away_team', 'away_score', 'home_score']].copy()
away.columns = ['week', 'fantasy_team', 'points_scored', 'points_allowed']

# Combine
team_scoring = pd.concat([home, away], ignore_index=True)

# Optional: sort it
team_scoring = team_scoring.sort_values(by=['fantasy_team', 'week']).reset_index(drop=True)

team_scoring['weekly_avg'] = team_scoring.groupby('week')['points_scored'].transform('mean')

# Show the result
print(team_scoring.head())

   week      fantasy_team  points_scored  points_allowed  weekly_avg
0     1  Bucktown Bandits           73.0            96.0        89.2
1     2  Bucktown Bandits           86.0           109.0        95.3
2     3  Bucktown Bandits           86.0           102.0        83.3
3     4  Bucktown Bandits          106.0            47.0        80.5
4     5  Bucktown Bandits           65.0            99.0        86.5


### Getting Schedule Swap Data

In [None]:
# Adding
from itertools import product

teams = sorted(team_scoring['fantasy_team'].unique())
results = []

for team_a, team_b in product(teams, repeat=2):
    if team_a == team_b:
        continue

    # Getting points scored by 'selected' team
    a_data = team_scoring[team_scoring['fantasy_team'] == team_a].sort_values('week').reset_index(drop=True)
    a_scores = a_data['points_scored']

    # Getting points allowed all season for opposing team selected
    b_data = team_scoring[team_scoring['fantasy_team'] == team_b].sort_values('week').reset_index(drop=True)
    b_opponent_allowed = b_data['points_allowed']


    wins = 0
    losses = 0

    for week in range(len(a_scores)):
        a_pts = a_scores[week]
        b_allwd = b_opponent_allowed[week]

        if a_pts > b_allwd:
            wins += 1
        else:
            losses += 1

        results.append({
            'team': team_a,
            'schedule_team': team_b,
            'week': week + 1,
            'points_scored': a_pts,
            'hypothetical_opponent_points': b_allwd,
            'wins': wins,
            'losses': losses,
            'record': f"{wins}-{losses}"
        })

# Create the DataFrame
week_by_week_records = pd.DataFrame(results)

In [None]:
home_df = weekly_matchup_data[['week', 'home_team', 'home_record']].copy()
home_df.columns = ['week', 'team', 'record']

# Create a new DataFrame for away teams
away_df = weekly_matchup_data[['week', 'away_team', 'away_record']].copy()
away_df.columns = ['week', 'team', 'record']

# Combine both into one tidy table
team_week_records = pd.concat([home_df, away_df], ignore_index=True)

# Optional: sort by team and week
team_week_records = team_week_records.sort_values(['team', 'week']).reset_index(drop=True)

# View the result
print(team_week_records)

     week                   team record
0       1       Bucktown Bandits    0-1
1       2       Bucktown Bandits    0-2
2       3       Bucktown Bandits    0-3
3       4       Bucktown Bandits    1-3
4       5       Bucktown Bandits    1-4
..    ...                    ...    ...
135    10  pop-pop's bible study    5-5
136    11  pop-pop's bible study    5-6
137    12  pop-pop's bible study    6-6
138    13  pop-pop's bible study    7-6
139    14  pop-pop's bible study    8-6

[140 rows x 3 columns]


# Putting dataframes back in excel/google drive
*Saving the season & weekly stats data frames, the data frame containing all the player fantasy point predictions, and the data frame containing the schedule & team record, and schedule swap data*

In [None]:
import os

output_path = '/content/drive/MyDrive/all_fantasy_data.xlsx'

if os.path.exists(output_path):
    os.remove(output_path)

with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
    weekly_stats_all.to_excel(writer, sheet_name='Weekly_Stats', index=False)
    season_stats_all.to_excel(writer, sheet_name='Season_Stats', index=False)
    all_predictions.to_excel(writer, sheet_name='Player_Predictions', index=False)
    weekly_matchup_data.to_excel(writer, sheet_name='Team_Schedules', index=False)
    team_scoring.to_excel(writer, sheet_name='Team_Scoring', index=False)
    week_by_week_records.to_excel(writer, sheet_name='Schedule_Swap_Records', index=False)
    team_week_records.to_excel(writer, sheet_name='Team_Records', index=False)

