In [1]:
# import all necessary dependencies
import pandas as pd
import pathlib
from pathlib import Path
from sqlalchemy import create_engine, inspect, text
from sqlalchemy.orm import Session
import matplotlib.pyplot as plt
from sqlalchemy.types import String, Float, Integer
import psycopg2
import sklearn as skl
import tensorflow as tf
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import StandardScaler

In [2]:
# read in the three raw data files
#file = Path('wr_season_2019.xlsx')
season_2019_df = pd.read_excel('wr_season_2019.xlsx')
season_2020_df = pd.read_excel('wr_season_2020.xlsx')
season_2021_df = pd.read_excel('wr_season_2021.xlsx')

## Data Cleaning

In [3]:
# preview new dataframe
season_2019_df.head()

Unnamed: 0,Rank,Name,Team,Pos,GMS,TGTS,REC,PCT,YDS,TD,...,Y/T,Y/R,ATT,YDS.1,AVG,TD.1,FUM,LST,FPTS/G,FPTS
0,1,Michael Thomas,NO,WR,16,185,149,80.5,1725,9,...,9.3,11.6,1,-9,-9.0,0,1,0,14.1,225.6
1,2,Chris Godwin,TB,WR,14,120,86,71.7,1333,9,...,11.1,15.5,1,8,8.0,0,0,0,13.6,190.1
2,3,Kenny Golladay,DET,WR,16,116,65,56.0,1190,11,...,10.3,18.3,0,0,0.0,0,1,1,11.4,183.0
3,4,Cooper Kupp,LAR,WR,16,134,94,70.1,1161,10,...,8.7,12.4,2,4,2.0,0,3,0,11.0,176.5
4,5,Julio Jones,ATL,WR,15,157,99,63.1,1394,6,...,8.9,14.1,2,-3,-1.5,0,1,0,11.7,175.1


In [4]:
# preview new dataframe
season_2020_df.head()

Unnamed: 0,Rank,Name,Team,Pos,GMS,TGTS,REC,PCT,YDS,TD,...,Y/T,Y/R,ATT,YDS.1,AVG,TD.1,FUM,LST,FPTS/G,FPTS
0,1,Davante Adams,GB,WR,14,148,115,77.7,1374,18,...,9.3,11.9,0,0,0.0,0,1,1,17.4,243.4
1,2,Tyreek Hill,KC,WR,15,135,87,64.4,1276,15,...,9.5,14.7,13,123,9.5,2,1,0,16.1,241.9
2,3,Stefon Diggs,BUF,WR,16,166,127,76.5,1535,8,...,9.2,12.1,1,1,1.0,0,0,0,12.6,201.6
3,4,Calvin Ridley,ATL,WR,15,143,90,62.9,1374,9,...,9.6,15.3,5,1,0.2,0,1,1,12.8,191.5
4,5,DK Metcalf,SEA,WR,16,129,83,64.3,1303,10,...,10.1,15.7,0,0,0.0,0,1,1,11.8,188.3


In [5]:
# preview new dataframe
season_2021_df.head()

Unnamed: 0,Rank,Name,Team,Pos,GMS,TGTS,REC,PCT,YDS,TD,...,Y/T,Y/R,ATT,YDS.1,AVG,TD.1,FUM,LST,FPTS/G,FPTS
0,1,Cooper Kupp,LAR,WR,17,191,145,75.9,1947,16,...,10.2,13.4,4,18,4.5,0,0,0,17.3,294.5
1,2,Deebo Samuel,SF,WR,16,121,77,63.6,1405,6,...,11.6,18.2,59,365,6.2,8,4,2,16.4,261.96
2,3,Ja'Marr Chase,CIN,WR,17,128,81,63.3,1455,13,...,11.4,18.0,7,21,3.0,0,2,1,13.2,223.6
3,4,Justin Jefferson,MIN,WR,17,167,108,64.7,1616,10,...,9.7,15.0,6,14,2.3,0,1,1,13.1,222.4
4,5,Davante Adams,GB,WR,16,169,123,72.8,1553,11,...,9.2,12.6,0,0,0.0,0,0,0,13.8,221.3


In [6]:
# drop rank column from all dataframes since we don't need this value for the model
season_2019_df = season_2019_df.drop(columns=['Rank'])
season_2020_df = season_2020_df.drop(columns=['Rank'])
season_2021_df = season_2021_df.drop(columns=['Rank'])

In [7]:
# rename columns to be clear terminology
season_2019_df = season_2019_df.rename(columns={'Pos': 'Position', 'GMS': 'Games', 'REC': 'Receptions', 'TGTS': 'Targets', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [8]:
# rename columns to be clear terminology
season_2020_df = season_2020_df.rename(columns={'Pos': 'Position', 'GMS': 'Games', 'TGTS': 'Targets', 'REC': 'Receptions', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [9]:
# rename columns to be clear terminology
season_2021_df = season_2021_df.rename(columns={'Pos': 'Position', 'GMS': 'Games', 'TGTS': 'Targets', 'REC': 'Receptions', 'PCT': 'Percentage',
                               'YDS': 'Yards', 'TD': 'Touchdowns', 'LNG': 'Long', 'Y/T': 'Yards_per_target', 'Y/R': 'Yards_per_reception', 'ATT': 'Attempts',
                               'TD.1': 'Rushing_touchdown', 'YDS.1': 'Rushing_yards', 'AVG': 'Average_rushing_yards', 'FUM': 'Fumbles', 'LST': 'Lost_yards',
                               'FPTS/G': 'Fantasy_points_per_game', 'FPTS': 'Fantasy_points'})

In [10]:
# check to make sure columns have been renamed
season_2021_df.head()

Unnamed: 0,Name,Team,Position,Games,Targets,Receptions,Percentage,Yards,Touchdowns,Long,Yards_per_target,Yards_per_reception,Attempts,Rushing_yards,Average_rushing_yards,Rushing_touchdown,Fumbles,Lost_yards,Fantasy_points_per_game,Fantasy_points
0,Cooper Kupp,LAR,WR,17,191,145,75.9,1947,16,59,10.2,13.4,4,18,4.5,0,0,0,17.3,294.5
1,Deebo Samuel,SF,WR,16,121,77,63.6,1405,6,83,11.6,18.2,59,365,6.2,8,4,2,16.4,261.96
2,Ja'Marr Chase,CIN,WR,17,128,81,63.3,1455,13,82,11.4,18.0,7,21,3.0,0,2,1,13.2,223.6
3,Justin Jefferson,MIN,WR,17,167,108,64.7,1616,10,56,9.7,15.0,6,14,2.3,0,1,1,13.1,222.4
4,Davante Adams,GB,WR,16,169,123,72.8,1553,11,59,9.2,12.6,0,0,0.0,0,0,0,13.8,221.3


In [11]:
# add column to each dataframe that contains the year of play
season_2019_df['Year'] = '2019'
season_2020_df['Year'] = '2020'
season_2021_df['Year'] = '2021'

In [62]:
# write the clean dataframes to csv for future use
season_2019_df.to_csv('wr_season_2019_clean.csv', index=False)

In [63]:
# write the clean dataframes to csv for future use
season_2020_df.to_csv('wr_season_2020_clean.csv', index=False)

In [64]:
# write the clean dataframes to csv for future use
season_2021_df.to_csv('wr_season_2021_clean.csv', index=False)

In [12]:
# check datatypes for creating sql table
season_2019_df.dtypes

Name                        object
Team                        object
Position                    object
Games                        int64
Targets                      int64
Receptions                   int64
Percentage                 float64
Yards                        int64
Touchdowns                   int64
Long                         int64
Yards_per_target           float64
Yards_per_reception        float64
Attempts                     int64
Rushing_yards                int64
Average_rushing_yards      float64
Rushing_touchdown            int64
Fumbles                      int64
Lost_yards                   int64
Fantasy_points_per_game    float64
Fantasy_points             float64
Year                        object
dtype: object

In [39]:
# create engine to connect to postgresql database
engine = create_engine('postgresql://postgres:postgres@localhost:5432/nfl_db')

# write season_2019_df to a postgres table
season_2019_df.to_sql('season_2019', engine, index= False, if_exists='replace', chunksize = 500,
                 dtype = {'Name': String,
                  'Team': String,
                  'Position': String,
                  'Games': Integer,
                  'Targets': Integer,
                  'Receptions': Integer,
                  'Percentage': Float,
                  'Yards': Integer,
                  'Touchdowns': Integer,
                  'Long': Integer,
                  'Yards_per_target': Float,
                  'Yards_per_recption': Float,
                  'Attempts': Integer,
                  'Rushing_yards': Integer,
                  'Average_rushing_yards': Float,
                  'Rushing_touchdown': Integer,
                  'Fumbles': Integer,
                  'Lost_yards': Integer,
                  'Fantasy_points_per_game': Float,
                  'Fantasy_points': Float,
                  'Year': String})

236

In [40]:
# write season_2020_df to a postgres table
season_2020_df.to_sql('season_2020', engine, index= False, if_exists='replace', chunksize = 500,
                 dtype = {'Name': String,
                  'Team': String,
                  'Position': String,
                  'Games': Integer,
                  'Targets': Integer,
                  'Receptions': Integer,
                  'Percentage': Float,
                  'Yards': Integer,
                  'Touchdowns': Integer,
                  'Long': Integer,
                  'Yards_per_target': Float,
                  'Yards_per_recption': Float,
                  'Attempts': Integer,
                  'Rushing_yards': Integer,
                  'Average_rushing_yards': Float,
                  'Rushing_touchdown': Integer,
                  'Fumbles': Integer,
                  'Lost_yards': Integer,
                  'Fantasy_points_per_game': Float,
                  'Fantasy_points': Float,
                  'Year': String})

248

In [41]:
# write season_2021_df to a postgres table
season_2021_df.to_sql('season_2021', engine, index= False, if_exists='replace', chunksize = 500,
                 dtype = {'Name': String,
                  'Team': String,
                  'Position': String,
                  'Games': Integer,
                  'Targets': Integer,
                  'Receptions': Integer,
                  'Percentage': Float,
                  'Yards': Integer,
                  'Touchdowns': Integer,
                  'Long': Integer,
                  'Yards_per_target': Float,
                  'Yards_per_recption': Float,
                  'Attempts': Integer,
                  'Rushing_yards': Integer,
                  'Average_rushing_yards': Float,
                  'Rushing_touchdown': Integer,
                  'Fumbles': Integer,
                  'Lost_yards': Integer,
                  'Fantasy_points_per_game': Float,
                  'Fantasy_points': Float,
                  'Year': String})

269

In [33]:
# with engine.connect() as con:
#     con.execute(text("ALTER TABLE season_2019 ADD PRIMARY KEY (Name);"))
#     con.execute(text('Select * from season_2019 limit(10)')).fetchall()

In [37]:
insp = inspect(engine)
print(insp.get_table_names())

['nfl', 'people', 'fauna_vertebrate', 'season_2019', 'road_accidents', 'accidents_by_state']


In [38]:
columns = insp.get_columns('season_2019')
for column in columns:
    print(column['name'], column['type'])

Name VARCHAR
Team VARCHAR
Position VARCHAR
Games INTEGER
Targets INTEGER
Receptions INTEGER
Percentage DOUBLE PRECISION
Yards INTEGER
Touchdowns INTEGER
Long INTEGER
Yards_per_target DOUBLE PRECISION
Yards_per_reception DOUBLE PRECISION
Attempts INTEGER
Rushing_yards INTEGER
Average_rushing_yards DOUBLE PRECISION
Rushing_touchdown INTEGER
Fumbles INTEGER
Lost_yards INTEGER
Fantasy_points_per_game DOUBLE PRECISION
Fantasy_points DOUBLE PRECISION
Year VARCHAR


## Data Preprocessing

In [None]:
# drop name column since it's not categorical
pre_2019_df = season_2019_df.drop(columns=['Name'])

In [None]:
# create dummy columns for team and poisiton categorical values
pre_2019_df = pd.get_dummies(data=pre_2019_df, columns=['Team', 'Position'])

In [None]:
# view new df containing converted data
pre_2019_df.head()

In [None]:
pre_2019_df.shape

In [None]:
# y is the target and x is the features
# for this case we're training on touchdown performance first
y = pre_2019_df['Touchdowns']
X = pre_2019_df.drop(columns=['Touchdowns'])

# create the training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# create a StandardScaler instance
scaler = StandardScaler()

# fit the StandardScaler
X_scaler = scaler.fit(X_train)

# scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
X_test.shape

## Model Creation

In [None]:
# define the model
nn = Sequential()

# first hidden layer
nn.add(Dense(units=90, activation = 'relu', input_dim = X_test.shape[1]))

# second hidden layer
nn.add(Dense(units=20, activation='relu'))

# output layer
nn.add(Dense(units=1, activation='sigmoid'))

# check the structure of the model
nn.summary()

In [None]:
# compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=40)

In [None]:
# evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")