In [1]:
# Import Dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [2]:
# Load file into dataframe
file_path = "./NFL.csv"
nfl_df = pd.read_csv(file_path)
nfl_df.head()

Unnamed: 0,Year,Player,Age,School,Height,Weight,Sprint_40yd,Vertical_Jump,Bench_Press_Reps,Broad_Jump,Agility_3cone,Shuttle,Drafted..tm.rnd.yr.,BMI,Player_Type,Position_Type,Position,Drafted
0,2009,Beanie Wells\WellCh00,20.0,Ohio St.,1.8542,106.594207,4.38,85.09,25.0,325.12,,,Arizona Cardinals / 1st / 31st pick / 2009,31.004194,offense,backs_receivers,RB,Yes
1,2009,Will Davis\DaviWi99,22.0,Illinois,1.8796,118.387609,4.84,83.82,27.0,292.1,7.38,4.45,Arizona Cardinals / 6th / 204th pick / 2009,33.510073,defense,defensive_lineman,DE,Yes
2,2009,Herman Johnson\JohnHe23,24.0,LSU,2.0066,165.107623,5.5,,21.0,,,,Arizona Cardinals / 5th / 167th pick / 2009,41.005821,offense,offensive_lineman,OG,Yes
3,2009,Rashad Johnson\JohnRa98,23.0,Alabama,1.8034,92.079251,4.49,93.98,15.0,304.8,7.09,4.23,Arizona Cardinals / 3rd / 95th pick / 2009,28.312463,defense,defensive_back,FS,Yes
4,2009,Cody Brown\BrowCo96,22.0,Connecticut,1.8796,110.676538,4.76,92.71,26.0,304.8,7.1,4.4,Arizona Cardinals / 2nd / 63rd pick / 2009,31.327425,defense,line_backer,OLB,Yes


In [3]:
# Drop columns that we won't use
bad_columns = ['Year', 'Player', 'Age', 'School', 'Drafted..tm.rnd.yr.', 'Player_Type', 'Position']
nfl_df = nfl_df.drop(bad_columns, axis=1)
nfl_df.head()

Unnamed: 0,Height,Weight,Sprint_40yd,Vertical_Jump,Bench_Press_Reps,Broad_Jump,Agility_3cone,Shuttle,BMI,Position_Type,Drafted
0,1.8542,106.594207,4.38,85.09,25.0,325.12,,,31.004194,backs_receivers,Yes
1,1.8796,118.387609,4.84,83.82,27.0,292.1,7.38,4.45,33.510073,defensive_lineman,Yes
2,2.0066,165.107623,5.5,,21.0,,,,41.005821,offensive_lineman,Yes
3,1.8034,92.079251,4.49,93.98,15.0,304.8,7.09,4.23,28.312463,defensive_back,Yes
4,1.8796,110.676538,4.76,92.71,26.0,304.8,7.1,4.4,31.327425,line_backer,Yes


In [4]:
# ENCODE THE POSITION_TYPE
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
nfl_df_encoded = pd.DataFrame(enc.fit_transform(nfl_df[['Position_Type', 'Drafted']].values.reshape(-2,2)))

# Rename encoded columns
nfl_df_encoded.columns = enc.get_feature_names(['Position_Type', 'Drafted'])
nfl_df_encoded.head()



Unnamed: 0,Position_Type_backs_receivers,Position_Type_defensive_back,Position_Type_defensive_lineman,Position_Type_kicking_specialist,Position_Type_line_backer,Position_Type_offensive_lineman,Position_Type_other_special,Drafted_No,Drafted_Yes
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [5]:
# Merge the two DataFrames together and drop the Position_Type column
nfl_df = nfl_df.merge(nfl_df_encoded,left_index=True,right_index=True).drop(["Position_Type", "Drafted"],1)
nfl_df.head()

  


Unnamed: 0,Height,Weight,Sprint_40yd,Vertical_Jump,Bench_Press_Reps,Broad_Jump,Agility_3cone,Shuttle,BMI,Position_Type_backs_receivers,Position_Type_defensive_back,Position_Type_defensive_lineman,Position_Type_kicking_specialist,Position_Type_line_backer,Position_Type_offensive_lineman,Position_Type_other_special,Drafted_No,Drafted_Yes
0,1.8542,106.594207,4.38,85.09,25.0,325.12,,,31.004194,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1.8796,118.387609,4.84,83.82,27.0,292.1,7.38,4.45,33.510073,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,2.0066,165.107623,5.5,,21.0,,,,41.005821,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
3,1.8034,92.079251,4.49,93.98,15.0,304.8,7.09,4.23,28.312463,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.8796,110.676538,4.76,92.71,26.0,304.8,7.1,4.4,31.327425,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [6]:
# Drop all other null values
nfl_df_encoded = nfl_df.dropna().drop("Drafted_No", axis=1)
nfl_df_encoded

Unnamed: 0,Height,Weight,Sprint_40yd,Vertical_Jump,Bench_Press_Reps,Broad_Jump,Agility_3cone,Shuttle,BMI,Position_Type_backs_receivers,Position_Type_defensive_back,Position_Type_defensive_lineman,Position_Type_kicking_specialist,Position_Type_line_backer,Position_Type_offensive_lineman,Position_Type_other_special,Drafted_Yes
1,1.8796,118.387609,4.84,83.82,27.0,292.10,7.38,4.45,33.510073,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,1.8034,92.079251,4.49,93.98,15.0,304.80,7.09,4.23,28.312463,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.8796,110.676538,4.76,92.71,26.0,304.80,7.10,4.40,31.327425,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
7,2.0320,140.160042,5.32,55.88,19.0,238.76,7.87,4.88,33.945078,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
8,1.8796,120.655570,4.53,88.90,28.0,304.80,7.46,4.43,34.152029,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3468,1.9558,146.056743,5.05,74.93,21.0,261.62,8.34,4.87,38.183236,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3469,1.8034,95.254398,4.48,91.44,17.0,297.18,7.03,4.20,29.288755,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3470,1.8542,97.522360,4.43,96.52,13.0,307.34,7.00,4.22,28.365539,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3474,1.8034,88.904105,4.47,100.33,15.0,342.90,6.82,4.14,27.336171,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Create dataframe for features
X = nfl_df_encoded.drop("Drafted_Yes", axis=1)

# Create target variable
y = nfl_df_encoded.Drafted_Yes.values       

# Seperate into training and testing sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=5, stratify=y)

# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
X_train_scaled

array([[ 1.52230405,  1.42831588,  2.23158403, ..., -0.36283299,
         2.00773697, -0.03928371],
       [-0.71673352, -0.91576638, -0.63126226, ..., -0.36283299,
        -0.49807321, -0.03928371],
       [-0.71673352, -0.33512215,  0.45025745, ...,  2.75608898,
        -0.49807321, -0.03928371],
       ...,
       [ 0.40278526, -0.31361681, -0.34497763, ...,  2.75608898,
        -0.49807321, -0.03928371],
       [ 0.77595819, -0.89426104, -0.72669047, ..., -0.36283299,
        -0.49807321, -0.03928371],
       [-1.08990645, -0.91576638, -1.07659391, ..., -0.36283299,
        -0.49807321, -0.03928371]])

In [9]:
# Create the Keras Sequential model
nn_model = tf.keras.models.Sequential()

# Add Dense Layers
nn_model.add(tf.keras.layers.Dense(units=10, activation='tanh', input_dim = 16))
nn_model.add(tf.keras.layers.Dense(units=31, activation='tanh'))
nn_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [10]:
# Check the structure of the Sequential model
nn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                170       
                                                                 
 dense_1 (Dense)             (None, 31)                341       
                                                                 
 dense_2 (Dense)             (None, 1)                 32        
                                                                 
Total params: 543
Trainable params: 543
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [12]:
# Fit the model to the training data
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [13]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

14/14 - 1s - loss: 0.5681 - accuracy: 0.7367 - 645ms/epoch - 46ms/step
Loss: 0.5681436657905579, Accuracy: 0.7367205619812012
