## Project 4 - NFL Wide Receiver Performance

In [1]:
# import all necessary dependencies
import pandas as pd
import psycopg2
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import StandardScaler
import numpy as np

In [2]:
# read in the raw data files
season_2018_df = pd.read_excel('wr_season_2018.xlsx')
season_2019_df = pd.read_excel('wr_season_2019.xlsx')
season_2020_df = pd.read_excel('wr_season_2020.xlsx')
season_2021_df = pd.read_excel('wr_season_2021.xlsx')
season_2022_df = pd.read_excel('wr_season_2022.xlsx')

## Data Cleaning

In [3]:
# preview new dataframe
season_2018_df.head()

Unnamed: 0,Rank,Name,Team,Pos,GMS,TGTS,REC,PCT,YDS,TD,...,Y/T,Y/R,ATT,YDS.1,AVG,TD.1,FUM,LST,FPTS/G,FPTS
0,1,Tyreek Hill,KC,WR,16,137,87,63.5,1479,12,...,10.8,17.0,22,151,6.9,1,0,0,15.1,241.0
1,2,Antonio Brown,PIT,WR,15,168,104,61.9,1297,15,...,7.7,12.5,0,0,0.0,0,0,0,14.6,219.7
2,3,Davante Adams,GB,WR,15,169,111,65.7,1386,13,...,8.2,12.5,0,0,0.0,0,0,0,14.6,218.6
3,4,DeAndre Hopkins,HOU,WR,16,163,115,70.6,1572,11,...,9.6,13.7,1,-7,-7.0,0,2,2,13.7,218.5
4,5,Julio Jones,ATL,WR,16,170,113,66.5,1677,8,...,9.9,14.8,2,12,6.0,0,2,2,13.3,212.9


In [4]:
# drop rank column from all dataframes since we don't need this value for the model
season_2018_df = season_2018_df.drop(columns=['Rank'])
season_2019_df = season_2019_df.drop(columns=['Rank'])
season_2020_df = season_2020_df.drop(columns=['Rank'])
season_2021_df = season_2021_df.drop(columns=['Rank'])
season_2022_df = season_2022_df.drop(columns=['Rank'])

In [5]:
# rename columns to be clear terminology
season_2018_df = season_2018_df.rename(columns={'Pos': 'Position', 'GMS': 'Games', 'REC': 'Receptions', 'TGTS': 'Targets', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [6]:
# rename columns to be clear terminology
season_2019_df = season_2019_df.rename(columns={'Pos': 'Position', 'GMS': 'Games', 'REC': 'Receptions', 'TGTS': 'Targets', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [7]:
# rename columns to be clear terminology
season_2020_df = season_2020_df.rename(columns={'Pos': 'Position', 'GMS': 'Games', 'TGTS': 'Targets', 'REC': 'Receptions', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [8]:
# rename columns to be clear terminology
season_2021_df = season_2021_df.rename(columns={'Pos': 'Position', 'GMS': 'Games', 'TGTS': 'Targets', 'REC': 'Receptions', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [9]:
# rename columns to be clear terminology
season_2022_df = season_2022_df.rename(columns={'REC': 'Receptions', 'TGTS': 'Targets', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [10]:
# check to make sure columns have been renamed
season_2018_df.head()

Unnamed: 0,Name,Team,Position,Games,Targets,Receptions,Percentage,Yards,Touchdowns,Long,Yards_per_target,Yards_per_reception,Attempts,Rushing_yards,Average_rushing_yards,Rushing_touchdown,Fumbles,Lost_yards,Fantasy_points_per_game,Fantasy_points
0,Tyreek Hill,KC,WR,16,137,87,63.5,1479,12,75,10.8,17.0,22,151,6.9,1,0,0,15.1,241.0
1,Antonio Brown,PIT,WR,15,168,104,61.9,1297,15,78,7.7,12.5,0,0,0.0,0,0,0,14.6,219.7
2,Davante Adams,GB,WR,15,169,111,65.7,1386,13,57,8.2,12.5,0,0,0.0,0,0,0,14.6,218.6
3,DeAndre Hopkins,HOU,WR,16,163,115,70.6,1572,11,49,9.6,13.7,1,-7,-7.0,0,2,2,13.7,218.5
4,Julio Jones,ATL,WR,16,170,113,66.5,1677,8,58,9.9,14.8,2,12,6.0,0,2,2,13.3,212.9


In [11]:
# add column to each dataframe that contains the year of play
season_2018_df['Year'] = '2018'
season_2019_df['Year'] = '2019'
season_2020_df['Year'] = '2020'
season_2021_df['Year'] = '2021'

In [12]:
# concat all dataframes that will be used to train the model
all_dfs = [season_2018_df, season_2019_df, season_2020_df, season_2021_df]
results = pd.concat(all_dfs)

In [13]:
# write the clean dataframes to csv for future use
season_2018_df.to_csv('wr_season_2018_clean.csv', index=False)

In [14]:
# write the clean dataframes to csv for future use
season_2019_df.to_csv('wr_season_2019_clean.csv', index=False)

In [15]:
# write the clean dataframes to csv for future use
season_2020_df.to_csv('wr_season_2020_clean.csv', index=False)

In [16]:
# write the clean dataframes to csv for future use
season_2021_df.to_csv('wr_season_2021_clean.csv', index=False)

In [17]:
# write the clean dataframes to csv for future use
season_2022_df.to_csv('wr_season_2022_clean.csv', index=False)

## Data Preprocessing

In [18]:
# drop unnecessary columns before fitting model
results = results.drop(columns=['Team', 'Position', 'Year', 'Games'])

In [19]:
# set name column as index
results = results.set_index('Name')

In [20]:
# view new df containing converted data
results

Unnamed: 0_level_0,Targets,Receptions,Percentage,Yards,Touchdowns,Long,Yards_per_target,Yards_per_reception,Attempts,Rushing_yards,Average_rushing_yards,Rushing_touchdown,Fumbles,Lost_yards,Fantasy_points_per_game,Fantasy_points
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Tyreek Hill,137,87,63.5,1479,12,75,10.8,17.0,22,151,6.9,1,0,0,15.1,241.0
Antonio Brown,168,104,61.9,1297,15,78,7.7,12.5,0,0,0.0,0,0,0,14.6,219.7
Davante Adams,169,111,65.7,1386,13,57,8.2,12.5,0,0,0.0,0,0,0,14.6,218.6
DeAndre Hopkins,163,115,70.6,1572,11,49,9.6,13.7,1,-7,-7.0,0,2,2,13.7,218.5
Julio Jones,170,113,66.5,1677,8,58,9.9,14.8,2,12,6.0,0,2,2,13.3,212.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Malik Taylor,3,2,66.7,14,0,7,4.7,7.0,0,0,0.0,0,1,1,-0.1,-0.6
Racey McMath,6,2,33.3,8,0,9,1.3,4.0,0,0,0.0,0,1,1,-0.1,-1.2
Travis Benjamin,5,0,0.0,0,0,0,0.0,0.0,0,0,0.0,0,1,1,-0.2,-2.0
J.J. Koski,0,0,0.0,0,0,0,0.0,0.0,0,0,0.0,0,1,1,-0.4,-2.0


In [21]:
# y is the target and x is the features
y = results['Touchdowns']
X = results.drop(columns=['Touchdowns'])

# create the training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=.20)

In [22]:
# create a StandardScaler instance
scaler = StandardScaler()

# fit the StandardScaler
X_scaler = scaler.fit(X_train)

# scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [23]:
# check the shape of X_test dataset for creating the model
X_test.shape

(198, 15)

## Model Creation

In [24]:
# define the model
nn = Sequential()

# first hidden layer
nn.add(Dense(units=70, activation = 'relu', input_dim = X_test.shape[1]))

# second hidden layer
nn.add(Dense(units=40, activation='relu'))

# third hidden layer
nn.add(Dense(units=20, activation='relu'))

# output layer
nn.add(Dense(units=1, activation='relu'))

# check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 70)                1120      
                                                                 
 dense_1 (Dense)             (None, 40)                2840      
                                                                 
 dense_2 (Dense)             (None, 20)                820       
                                                                 
 dense_3 (Dense)             (None, 1)                 21        
                                                                 
Total params: 4801 (18.75 KB)
Trainable params: 4801 (18.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [25]:
# compile the model (mae or mse loss functions)
nn.compile(loss="mse", optimizer="adam", metrics=["mae"])

In [26]:
# train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=60)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [27]:
# evaluate the model using the test data
model_loss, mae = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Mean Absolute Error: {mae}")

7/7 - 0s - loss: 0.1400 - mae: 0.2001 - 48ms/epoch - 7ms/step
Loss: 0.14004144072532654, Mean Absolute Error: 0.20012250542640686


## Predictions

In [28]:
# set name column as index
season_2022_df = season_2022_df.set_index('Name')

In [29]:
# drop touchdown column so this is not factored into predictions
season_2022_df = season_2022_df.drop(columns='Touchdowns')

In [30]:
# display dataframe to verify the structure is correct
season_2022_df.head()

Unnamed: 0_level_0,Targets,Receptions,Percentage,Yards,Long,Yards_per_target,Yards_per_reception,Attempts,Rushing_yards,Average_rushing_yards,Rushing_touchdown,Fumbles,Lost_yards,Fantasy_points_per_game,Fantasy_points
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Justin Jefferson,184,128,69.6,1809,64,9.8,14.1,4,24,6.0,1,0,0,14.2,240.66
Davante Adams,180,100,55.6,1516,60,8.4,15.2,3,-1,-0.3,0,1,0,13.9,235.5
Tyreek Hill,170,119,70.0,1710,64,10.1,14.4,7,32,4.6,2,1,0,13.4,228.2
A.J. Brown,146,88,60.3,1496,78,10.2,17.0,0,0,0.0,0,2,2,12.4,211.6
Stefon Diggs,154,108,70.1,1429,53,9.3,13.2,1,-3,-3.0,0,1,0,13.0,208.6


In [74]:
# create variable to input any given player for making predictions
selected_player = season_2022_df.loc['Davante Adams']

# view selected player stats
selected_player

Targets                     180.0
Receptions                  100.0
Percentage                   55.6
Yards                      1516.0
Long                         60.0
Yards_per_target              8.4
Yards_per_reception          15.2
Attempts                      3.0
Rushing_yards                -1.0
Average_rushing_yards        -0.3
Rushing_touchdown             0.0
Fumbles                       1.0
Lost_yards                    0.0
Fantasy_points_per_game      13.9
Fantasy_points              235.5
Name: Davante Adams, dtype: float64

In [75]:
# convert player data to array that contains the values of features we want to provide the model
# this is how it will make its predictions
player_array = np.array(selected_player)

# view structure of the array
player_array

array([ 1.800e+02,  1.000e+02,  5.560e+01,  1.516e+03,  6.000e+01,
        8.400e+00,  1.520e+01,  3.000e+00, -1.000e+00, -3.000e-01,
        0.000e+00,  1.000e+00,  0.000e+00,  1.390e+01,  2.355e+02])

In [76]:
# convert the player array to a shape usable by the X_scaler
converted_array = player_array.reshape(1, -1)

In [77]:
# scale the data using same X_scaler as used for the model
record_to_predict = X_scaler.transform(converted_array)



In [78]:
# give the model player feature data for predictions
y_pred = nn.predict(record_to_predict)



In [79]:
# view the predictions made by the model
print(y_pred)

[[14.219835]]


In [37]:
# create variable as array to input all of season 2022 data to make predictions with
whole_season_array = np.array(season_2022_df)

# view structure of the array
whole_season_array

array([[ 1.8400e+02,  1.2800e+02,  6.9600e+01, ...,  0.0000e+00,
         1.4200e+01,  2.4066e+02],
       [ 1.8000e+02,  1.0000e+02,  5.5600e+01, ...,  0.0000e+00,
         1.3900e+01,  2.3550e+02],
       [ 1.7000e+02,  1.1900e+02,  7.0000e+01, ...,  0.0000e+00,
         1.3400e+01,  2.2820e+02],
       ...,
       [ 2.0000e+00,  2.0000e+00,  1.0000e+02, ...,  1.0000e+00,
        -2.0000e-01, -7.0000e-01],
       [ 2.0000e+00,  1.0000e+00,  5.0000e+01, ...,  2.0000e+00,
        -1.0000e-01, -1.4000e+00],
       [ 0.0000e+00,  0.0000e+00,  0.0000e+00, ...,  1.0000e+00,
        -7.0000e-01, -2.0000e+00]])

In [38]:
# scale the data using same X_scaler as used for the model
season_to_predict = X_scaler.transform(whole_season_array)



In [39]:
# give the model the season data to use for predictions
y_pred_whole_season = nn.predict(season_to_predict)

# view the predictions made by the model
print(y_pred_whole_season)

[[ 9.23223   ]
 [14.219835  ]
 [ 7.8401904 ]
 [10.723847  ]
 [11.106193  ]
 [ 8.66509   ]
 [ 8.443152  ]
 [ 8.5587435 ]
 [ 5.715875  ]
 [ 6.475435  ]
 [ 7.3176537 ]
 [ 7.642876  ]
 [ 8.102401  ]
 [ 5.0834336 ]
 [ 7.2511206 ]
 [ 5.924616  ]
 [ 7.0966225 ]
 [ 5.475593  ]
 [ 6.2776093 ]
 [ 6.783983  ]
 [ 3.845008  ]
 [ 5.8027573 ]
 [ 4.86895   ]
 [ 7.064618  ]
 [ 6.6183176 ]
 [ 3.681797  ]
 [ 3.5953894 ]
 [ 4.9348    ]
 [ 6.016469  ]
 [ 3.9385319 ]
 [ 3.8406005 ]
 [ 5.725636  ]
 [ 1.2376425 ]
 [ 2.6864488 ]
 [ 5.8875365 ]
 [ 5.350651  ]
 [ 4.1035876 ]
 [ 4.293883  ]
 [ 2.661629  ]
 [ 3.6822548 ]
 [ 2.700815  ]
 [ 3.8352628 ]
 [ 7.2472887 ]
 [ 4.5609803 ]
 [ 1.6817781 ]
 [ 0.7518611 ]
 [ 2.808231  ]
 [ 2.7672267 ]
 [ 3.4731812 ]
 [ 3.0796804 ]
 [ 2.4797032 ]
 [ 2.0243087 ]
 [ 4.1208806 ]
 [ 3.5521705 ]
 [ 2.945683  ]
 [ 4.177954  ]
 [ 1.0147225 ]
 [ 3.1801734 ]
 [ 3.0006778 ]
 [ 1.5933142 ]
 [ 2.9557352 ]
 [ 2.5595932 ]
 [ 2.2453017 ]
 [ 3.116411  ]
 [ 1.8286594 ]
 [ 3.8719463 ]
 [ 2.04530

In [40]:
# turn array of predictions into new dataframe
season_2022_predictions = pd.DataFrame({'Predicted_touchdowns': y_pred_whole_season[:, 0]})

In [41]:
# create a new column of player names by taking the index from original dataframe
season_2022_predictions['Name'] = season_2022_df.index

In [42]:
# view new predictions dataframe
season_2022_predictions

Unnamed: 0,Predicted_touchdowns,Name
0,9.232230,Justin Jefferson
1,14.219835,Davante Adams
2,7.840190,Tyreek Hill
3,10.723847,A.J. Brown
4,11.106193,Stefon Diggs
...,...,...
249,0.000000,Braylon Sanders
250,0.000000,Ihmir Smith-Marsette
251,0.324544,Deonte Harty
252,0.000000,KaVontae Turpin


In [43]:
# write dataframe to csv for future use
season_2022_predictions.to_csv('season_2022_predictions.csv')