In [1]:
import build_nfl_model as nfl_mod
from utils.nfl_data_loader import load_data
from utils.verify_requirements import verify_requirements

In [7]:
from importlib import reload

reload(nfl_mod)

<module 'build_nfl_model' from '/Users/carloseduardo/Documents/GitHub/Team-134-CSE-6242-Project/build_nfl_model.py'>

In [3]:
verify_requirements()

Verifying requirements from requirements.txt...
Name: numpy
Version: 1.26.4
Summary: Fundamental package for array computing in Python
Home-page: https://numpy.org
Author: Travis E. Oliphant et al.
Author-email: 
License: Copyright (c) 2005-2023, NumPy Developers.
        All rights reserved.
        
        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions are
        met:
        
            * Redistributions of source code must retain the above copyright
               notice, this list of conditions and the following disclaimer.
        
            * Redistributions in binary form must reproduce the above
               copyright notice, this list of conditions and the following
               disclaimer in the documentation and/or other materials provided
               with the distribution.
        
            * Neither the name of the NumPy Developers nor the names of any
           

# Purpose

The purpose of this notebook is to demonstrate the use of the build_nfl_model.py module. 

This will showcase the process step by step and also showcase it using the method that runs the whole process in it's entirety.

## Load NFL Data

In [4]:
roster_data, pbp_df, schedules_df = load_data(start_year=1999, end_year=2024)

Fetching data from source...
1999 done.
2000 done.
2001 done.
2002 done.
2003 done.
2004 done.
2005 done.
2006 done.
2007 done.
2008 done.
2009 done.
2010 done.
2011 done.
2012 done.
2013 done.
2014 done.
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.


## Kicker Defense

In [8]:
kicker_obj = nfl_mod.NFLModel(position='Kicker', roster_data=roster_data, pbp_df=pbp_df, schedules_df=schedules_df)

Swapping 'posteam' and 'defteam' for 68390 kickoff plays...
Swap complete.
Data processing for 'df_kicker_pbp' completed.
Kicker play-by-play data processing completed successfully.
DataFrames merged successfully into 'df_combined'.


In [9]:
# Preprocess data
kicker_obj.preprocess_data()

# Train and evaluate models
kicker_obj.train_evaluate_model(model_type='LinearRegression')
kicker_obj.train_evaluate_model(model_type='RandomForest')
kicker_obj.build_and_train_lstm()
kicker_obj.evaluate_lstm()
kicker_obj.evaluate_ensemble()

No non-numerical columns to convert.
Training LinearRegression model...
LinearRegression model trained successfully.
Evaluating LinearRegression model...
LinearRegression Evaluation Results - MAE: 3.05, MSE: 16.00, R2: 0.31
Training RandomForest model...
RandomForest model trained successfully.
Evaluating RandomForest model...
RandomForest Evaluation Results - MAE: 2.95, MSE: 15.87, R2: 0.31
LSTM model built and compiled.


  super().__init__(**kwargs)


LSTM training completed.
LSTM Test MAE: 3.01
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Ensemble Test MAE: 2.96


In [10]:
print(kicker_obj.get_results())

      LinearRegression  RandomForest       LSTM   Ensemble
MAE           3.045083      2.950408   3.009468   2.963906
MSE          16.004663     15.866019        NaN  15.528268
R2            0.307957      0.313952        NaN   0.328556
Loss               NaN           NaN  15.646025        NaN


In [25]:
# Generate Ensemble model predictions and save them to a file
predictions_kicker = kicker_obj.process_predictions(ensemble=True, save_to_file='predictions/kicker_predictions.csv')

Generating predictions...
Generating ensemble predictions...
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282us/step
Saving predictions to file: predictions/kicker_predictions.csv...
Predictions successfully saved to predictions/kicker_predictions.csv.


In [26]:
predictions_kicker

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,n_games_career,n_games_season_k,...,xp_made_19y_mean_last5_def,xp_attempt_33y_mean_season_def,xp_attempt_33y_mean_last5_def,xp_made_33y_mean_season_def,xp_made_33y_mean_last5_def,roof,temp,wind,fantasy_points,predicted_fantasy
0,1999_01_ARI_PHI,1999-09-12,1,1999,ARI,PHI,C.Jacke,00-0008080,0,0,...,0.0,0.00,0.0,0.00,0.0,0,0.0,0.0,13,4.257269
1,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,D.Akers,00-0000108,0,0,...,0.0,0.00,0.0,0.00,0.0,0,0.0,0.0,0,4.257269
2,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,N.Johnson,00-0008593,0,0,...,0.0,0.00,0.0,0.00,0.0,0,0.0,0.0,6,4.257269
3,1999_01_BUF_IND,1999-09-12,1,1999,BUF,IND,S.Christie,00-0002975,0,0,...,0.0,0.00,0.0,0.00,0.0,0,0.0,0.0,6,4.257269
4,1999_01_BUF_IND,1999-09-12,1,1999,IND,BUF,M.Vanderjagt,00-0016830,0,0,...,0.0,0.00,0.0,0.00,0.0,0,0.0,0.0,7,4.257269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16384,2024_11_SEA_SF,2024-11-17,11,2024,SEA,SF,J.Myers,00-0031492,150,9,...,0.0,2.22,2.6,2.22,2.6,outdoors,59.0,4.0,12,9.081707
16385,2024_11_SEA_SF,2024-11-17,11,2024,SF,SEA,J.Moody,00-0038562,26,6,...,0.0,2.44,2.6,2.44,2.6,outdoors,59.0,4.0,5,7.361098
16386,2024_11_WAS_PHI,2024-11-14,11,2024,PHI,WAS,B.Mann,00-0036313,30,9,...,0.0,2.10,2.0,2.00,2.0,outdoors,46.0,9.0,0,0.233662
16387,2024_11_WAS_PHI,2024-11-14,11,2024,PHI,WAS,J.Elliott,00-0033787,133,9,...,0.0,2.10,2.0,2.00,2.0,outdoors,46.0,9.0,7,7.098882


# RB_WR

In [13]:
rw_obj = nfl_mod.NFLModel(position='RW', roster_data=roster_data, pbp_df=pbp_df, schedules_df=schedules_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  receiver_rusher_stats['two_points'] = np.where(receiver_rusher_stats['two_point_conv_result'] == 'success',1,0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  receiver_rusher_stats.rename(columns = {'complete_pass':'reception'},inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team['team'] = team['team'].replace({'OAK':'LV', 'STL':'LA', 'SD'

In [14]:
# Preprocess data
rw_obj.preprocess_data()

# Train and evaluate models
rw_obj.train_evaluate_model(model_type='LinearRegression')
rw_obj.train_evaluate_model(model_type='RandomForest')
rw_obj.build_and_train_lstm()
rw_obj.evaluate_lstm()
rw_obj.evaluate_ensemble()

No non-numerical columns to convert.
Training LinearRegression model...
LinearRegression model trained successfully.
Evaluating LinearRegression model...
LinearRegression Evaluation Results - MAE: 5.27, MSE: 54.57, R2: 0.31
Training RandomForest model...
RandomForest model trained successfully.
Evaluating RandomForest model...
RandomForest Evaluation Results - MAE: 5.39, MSE: 56.55, R2: 0.28
LSTM model built and compiled.


  super().__init__(**kwargs)


LSTM training completed.
LSTM Test MAE: 5.25
[1m495/495[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 445us/step
Ensemble Test MAE: 5.28


In [16]:
print(rw_obj.get_results())

      LinearRegression  RandomForest       LSTM   Ensemble
MAE           5.270718      5.388234   5.247021   5.278201
MSE          54.565621     56.548108        NaN  54.443840
R2            0.306753      0.281566        NaN   0.308300
Loss               NaN           NaN  54.037052        NaN


In [17]:
# Generate Ensemble model predictions and save them to a file
predictions_rw = rw_obj.process_predictions(ensemble=True, save_to_file='predictions/rw_predictions.csv')

Generating predictions...
Generating ensemble predictions...
[1m2475/2475[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 282us/step
Saving predictions to file: predictions/rw_predictions.csv...
Predictions successfully saved to predictions/rw_predictions.csv.


In [18]:
predictions_rw

Unnamed: 0,game_id,game_date,week,div_game,posteam,opponent_team,home_team,away_team,weather,stadium,...,two_points_mean_career,two_points_total_career,two_points_mean_season,two_points_total_season,two_points_mean_last5,two_points_total_last5,two_points_last,points_allowed_mean_season,points_allowed_mean_last5,predicted_fantasy
0,2001_01_ATL_SF,2001-09-09,1,1,ATL,SF,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",3COM Park,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,6.682596
1,2001_01_ATL_SF,2001-09-09,1,1,ATL,SF,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",3COM Park,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,6.682596
2,2001_01_ATL_SF,2001-09-09,1,1,ATL,SF,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",3COM Park,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,6.682596
3,2001_01_ATL_SF,2001-09-09,1,1,ATL,SF,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",3COM Park,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,6.682596
4,2001_01_ATL_SF,2001-09-09,1,1,ATL,SF,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",3COM Park,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,6.682596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79169,2024_11_WAS_PHI,2024-11-14,11,1,WAS,PHI,PHI,WAS,"Clear Temp: 46° F, Humidity: 63%, Wind: W 9 mph",Lincoln Financial Field,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.888889,13.0,4.227677
79170,2024_11_WAS_PHI,2024-11-14,11,1,WAS,PHI,PHI,WAS,"Clear Temp: 46° F, Humidity: 63%, Wind: W 9 mph",Lincoln Financial Field,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.888889,13.0,4.003532
79171,2024_11_WAS_PHI,2024-11-14,11,1,WAS,PHI,PHI,WAS,"Clear Temp: 46° F, Humidity: 63%, Wind: W 9 mph",Lincoln Financial Field,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.888889,13.0,6.572102
79172,2024_11_WAS_PHI,2024-11-14,11,1,WAS,PHI,PHI,WAS,"Clear Temp: 46° F, Humidity: 63%, Wind: W 9 mph",Lincoln Financial Field,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.888889,13.0,4.148638


# QB

In [19]:
qb_obj = nfl_mod.NFLModel(position='QB', roster_data=roster_data, pbp_df=pbp_df, schedules_df=schedules_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedules_df_copy.rename(columns = {'gameday':'game_date'}, inplace = True)


In [20]:
# Preprocess data
qb_obj.preprocess_data()

# Train and evaluate models
qb_obj.train_evaluate_model(model_type='LinearRegression')
qb_obj.train_evaluate_model(model_type='RandomForest')
qb_obj.build_and_train_lstm()
qb_obj.evaluate_lstm()
qb_obj.evaluate_ensemble()

No non-numerical columns to convert.
Training LinearRegression model...
LinearRegression model trained successfully.
Evaluating LinearRegression model...
LinearRegression Evaluation Results - MAE: 15.17, MSE: 373.82, R2: 0.32
Training RandomForest model...
RandomForest model trained successfully.
Evaluating RandomForest model...
RandomForest Evaluation Results - MAE: 15.03, MSE: 369.69, R2: 0.33
LSTM model built and compiled.


  super().__init__(**kwargs)


LSTM training completed.
LSTM Test MAE: 14.77
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Ensemble Test MAE: 14.78


In [22]:
print(qb_obj.get_results())

      LinearRegression  RandomForest        LSTM    Ensemble
MAE          15.173099     15.029875   14.773015   14.778619
MSE         373.824252    369.692863         NaN  359.728941
R2            0.322431      0.329919         NaN    0.347979
Loss               NaN           NaN  364.114075         NaN


In [23]:
# Generate Ensemble model predictions and save them to a file
predictions_qb = qb_obj.process_predictions(ensemble=True, save_to_file='predictions/qb_predictions.csv')

Generating predictions...
Generating ensemble predictions...
[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293us/step
Saving predictions to file: predictions/qb_predictions.csv...
Predictions successfully saved to predictions/qb_predictions.csv.


In [24]:
predictions_qb

Unnamed: 0,game_id,game_date,season,week,div_game,home_team,away_team,posteam,opponent_team,weather,...,two_points_mean_career,two_points_total_career,two_points_mean_season,two_points_total_season,two_points_mean_last5,two_points_total_last5,two_points_last,points_allowed_mean_season,points_allowed_mean_last5,predicted_fantasy
0,2001_01_ATL_SF,2001-09-09,2001,1,1,SF,ATL,ATL,SF,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,25.768029
1,2001_01_ATL_SF,2001-09-09,2001,1,1,SF,ATL,ATL,SF,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,25.768029
2,2001_01_ATL_SF,2001-09-09,2001,1,1,SF,ATL,SF,ATL,"partly cloudy Temp: 68° F, Humidity: 63%, Wind...",...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,32.256890
3,2001_01_CHI_BAL,2001-09-09,2001,1,0,BAL,CHI,BAL,CHI,"Mostly cloudy, highs in mid 80's Temp: 83° F, ...",...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,11.283925
4,2001_01_CHI_BAL,2001-09-09,2001,1,0,BAL,CHI,CHI,BAL,"Mostly cloudy, highs in mid 80's Temp: 83° F, ...",...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,20.735244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10345,2024_11_MIN_TEN,2024-11-17,2024,11,0,TEN,MIN,TEN,MIN,"Sunny Temp: 66° F, Humidity: 59%, Wind: SSW 7 mph",...,0.076923,1.0,0.0,0.0,0.0,0.0,0.0,17.444444,19.6,38.538515
10346,2024_11_SEA_SF,2024-11-17,2024,11,1,SF,SEA,SEA,SF,"Cloudy Temp: 59° F, Humidity: 45%, Wind: S 4 mph",...,0.015873,1.0,0.0,0.0,0.0,0.0,0.0,22.444444,24.0,37.322333
10347,2024_11_SEA_SF,2024-11-17,2024,11,1,SF,SEA,SF,SEA,"Cloudy Temp: 59° F, Humidity: 45%, Wind: S 4 mph",...,0.035714,1.0,0.0,0.0,0.0,0.0,0.0,24.555556,27.2,42.584781
10348,2024_11_WAS_PHI,2024-11-14,2024,11,1,PHI,WAS,PHI,WAS,"Clear Temp: 46° F, Humidity: 63%, Wind: W 9 mph",...,0.065217,3.0,0.0,0.0,0.0,0.0,0.0,21.700000,20.4,37.246144
