## Building The Model
-----

### Setup:
---

In [260]:
# Import dependencies
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf

In [261]:
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [262]:
# Import the Combined Data from /Wrangling_Data_Exports

# For PC
# wrangled_df = pd.read_csv("Wrangling_Data_Exports/Wrangled_Data.csv", index_col=0)

# For Colab
wrangled_df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Wrangling_Data_Exports/Wrangled_Data.csv", index_col=0)

# Display Dataframe
wrangled_df.head(5)

Unnamed: 0,Year,Month,Day,Time (EST),Season Week,Venue,City,State,Weather Condition,Temperature (F),...,Home Team Pre-Game Season W/L Ratio,Home Team Pre-Game Season Avg Points For,Home Team Pre-Game Season Avg Points Against,Outcome,Away Team,Away Team Pre-Game Season W Streak,Away Team Pre-Game Season L Streak,Away Team Pre-Game Season W/L Ratio,Away Team Pre-Game Season Avg Points For,Away Team Pre-Game Season Avg Points Against
0,2022.0,September,25,14:25,3.0,State Farm Stadium,Glendale,Arizona,None (retractable roof closed),72.0,...,0.5,25.0,33.5,2.0,Los Angeles Rams,1.0,0.0,0.5,20.5,29.0
1,2022.0,November,6,16:05,9.0,State Farm Stadium,Glendale,Arizona,Sunny,76.0,...,0.38,22.75,26.25,2.0,Seattle Seahawks,3.0,0.0,0.62,26.25,24.88
2,2022.0,November,27,16:05,12.0,State Farm Stadium,Glendale,Arizona,None (retractable roof closed),72.0,...,0.36,21.82,26.91,2.0,Los Angeles Chargers,0.0,2.0,0.5,22.7,25.8
3,2022.0,December,12,20:15,14.0,State Farm Stadium,Glendale,Arizona,None (retractable roof closed),72.0,...,0.33,22.0,26.75,2.0,New England Patriots,0.0,2.0,0.5,20.75,18.83
4,2022.0,December,25,20:20,16.0,State Farm Stadium,Glendale,Arizona,Fair,69.0,...,0.29,20.86,26.57,2.0,Tampa Bay Buccaneers,0.0,2.0,0.43,17.64,20.57


### Prepare the Features and Target Arrays:
---

In [263]:
# Create Target Array
targets= wrangled_df["Outcome"].astype(int).values

# Display Array
targets

array([2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1,
       1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2,
       1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1,
       2, 2, 1, 2, 1, 1, 1, 2, 2, 1, 3, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2,
       2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2,
       1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1,
       1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 2,
       1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 3, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1,
       1, 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2,
       1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1])

In [264]:
# Create Features Dataframe
features_df = wrangled_df.drop(columns=["Outcome"])

# Testing Dropping More Columns
features_df = wrangled_df.drop(columns=["Year"])
features_df = wrangled_df.drop(columns=["Day"])

# Display Weather Condition Options (Now Parsed Down)
features_df["Weather Condition"].value_counts()

Mostly cloudy                     52
Sunny                             43
None (retractable roof closed)    33
None (indoor stadium)             30
Fair                              20
Partly cloudy                     16
Raining                            8
Showers                            3
Cold                               1
Name: Weather Condition, dtype: int64

In [265]:
# Git Dummies
dummies_features_df = pd.get_dummies(features_df)

# Display Dataframe
dummies_features_df.columns

Index(['Year', 'Season Week', 'Temperature (F)',
       'Home Team Pre-Game Season W Streak',
       'Home Team Pre-Game Season L Streak',
       'Home Team Pre-Game Season W/L Ratio',
       'Home Team Pre-Game Season Avg Points For',
       'Home Team Pre-Game Season Avg Points Against', 'Outcome',
       'Away Team Pre-Game Season W Streak',
       ...
       'Away Team_New Orleans Saints', 'Away Team_New York Giants',
       'Away Team_New York Jets', 'Away Team_Philadelphia Eagles',
       'Away Team_Pittsburgh Steelers', 'Away Team_San Francisco 49ers',
       'Away Team_Seattle Seahawks', 'Away Team_Tampa Bay Buccaneers',
       'Away Team_Tennessee Titans', 'Away Team_Washington Commanders'],
      dtype='object', length=197)

In [267]:
# Grab Dummies Column Names
dummies_columns_array =[]

for column in dummies_features_df.columns:
  dummies_columns_array.append(column)

dummies_columns_array


['Year',
 'Season Week',
 'Temperature (F)',
 'Home Team Pre-Game Season W Streak',
 'Home Team Pre-Game Season L Streak',
 'Home Team Pre-Game Season W/L Ratio',
 'Home Team Pre-Game Season Avg Points For',
 'Home Team Pre-Game Season Avg Points Against',
 'Outcome',
 'Away Team Pre-Game Season W Streak',
 'Away Team Pre-Game Season L Streak',
 'Away Team Pre-Game Season W/L Ratio',
 'Away Team Pre-Game Season Avg Points For',
 'Away Team Pre-Game Season Avg Points Against',
 'Month_December',
 'Month_January',
 'Month_November',
 'Month_October',
 'Month_September',
 'Time (EST)_12:00',
 'Time (EST)_12:30',
 'Time (EST)_13:00',
 'Time (EST)_13:25',
 'Time (EST)_13:30',
 'Time (EST)_14:00',
 'Time (EST)_14:25',
 'Time (EST)_15:05',
 'Time (EST)_15:25',
 'Time (EST)_15:30',
 'Time (EST)_16:05',
 'Time (EST)_16:25',
 'Time (EST)_16:30',
 'Time (EST)_18:15',
 'Time (EST)_19:15',
 'Time (EST)_19:20',
 'Time (EST)_19:30',
 'Time (EST)_20:15',
 'Time (EST)_20:20',
 'Venue_AT&T Stadium',
 'V

In [268]:
# Export List of Columns
with open("/content/drive/MyDrive/Colab Notebooks/Features_Names.txt","w") as outfile:
  outfile.write(f'{dummies_columns_array}')

In [269]:
# Create an Array from the Features Dataframe with Dummies
features_array = dummies_features_df.values

features_array

array([[2022.,    3.,   72., ...,    0.,    0.,    0.],
       [2022.,    9.,   76., ...,    0.,    0.,    0.],
       [2022.,   12.,   72., ...,    0.,    0.,    0.],
       ...,
       [2022.,   12.,   49., ...,    0.,    0.,    0.],
       [2022.,   17.,   62., ...,    0.,    0.,    0.],
       [2022.,   18.,   42., ...,    0.,    0.,    0.]])

In [270]:
# Scale the Data

# Create a Scaler
scaler = StandardScaler()

# Fit the StandardScaler
features_scaler = scaler.fit(features_array)

# Scale the Features
scaled_features = features_scaler.transform(features_array)

scaled_features

array([[ 0.        , -1.19258603,  0.57341892, ..., -0.18128389,
        -0.19425717, -0.16744367],
       [ 0.        , -0.05019593,  0.82995611, ..., -0.18128389,
        -0.19425717, -0.16744367],
       [ 0.        ,  0.52099912,  0.57341892, ..., -0.18128389,
        -0.19425717, -0.16744367],
       ...,
       [ 0.        ,  0.52099912, -0.90166991, ..., -0.18128389,
        -0.19425717, -0.16744367],
       [ 0.        ,  1.47299087, -0.06792405, ..., -0.18128389,
        -0.19425717, -0.16744367],
       [ 0.        ,  1.66338922, -1.35060999, ..., -0.18128389,
        -0.19425717, -0.16744367]])

### Prepare the PCA Model:
---

In [271]:
# Create Primary Component Analysis Model With 95% Explainability
pca_model=PCA(n_components=0.95)

In [272]:
# FIt PCA Model
pca_features = pca_model.fit_transform(scaled_features)

pca_features

array([[ 6.96995310e-02, -5.49283542e+00, -2.23541183e+00, ...,
         2.27157894e+00, -4.42448745e-01,  3.71510758e-01],
       [-8.93413437e-01, -5.32021245e+00, -1.73092975e+00, ...,
        -1.02575071e+00,  1.73783292e+00,  5.62636521e-03],
       [-7.70212781e-02, -5.89780912e+00, -2.50877422e+00, ...,
        -4.17875091e-02,  1.08849865e-01, -1.16981781e+00],
       ...,
       [-8.15594439e-01,  1.86457473e+00, -1.43181331e-01, ...,
        -3.64834098e-01,  8.96451220e-02, -3.55491853e-01],
       [-6.52497474e-01,  7.82278653e-01, -1.05481586e+00, ...,
        -9.34557396e-01, -2.54032242e-01,  5.31289442e-01],
       [-1.60686492e+00,  9.31904453e-01, -8.15847374e-01, ...,
         1.59455320e-01,  1.95624727e+00, -5.02752949e-01]])

In [273]:
# Check Number of PCA Features
pca_features_count = len(pca_features[0])

print(f'PCA Features: {pca_features_count}')

PCA Features: 71


### Prepare the Data for Training Models:
---

In [274]:
# Train/Test Split The Data
X_train, X_test, y_train, y_test = train_test_split(pca_features ,targets,random_state=1)

### Prepare the Random Forest Model:
---

In [275]:
# Create The Model/Models
rf_model = RandomForestClassifier(random_state=1, n_estimators=1000).fit(X_train, y_train)

In [276]:
# Check Training and Testing Scores

print(f'Training Score: {rf_model.score(X_train, y_train)}')
print(f'Testing Score: {rf_model.score(X_test, y_test)}')

Training Score: 1.0
Testing Score: 0.7818181818181819


### Prepare the Nural Network Model:
---

In [277]:
# Count features
features_count = len(X_train[0])
print(f'Total Features: {features_count}')

Total Features: 71


In [278]:
# Define Nural Network Model

nn = tf.keras.models.Sequential()

# Input layer
nn.add(tf.keras.layers.Dense(units=4, activation="relu", input_dim=features_count))

# Second layer
nn.add(tf.keras.layers.Dense(units=2, activation="relu"))

# Third layer
nn.add(tf.keras.layers.Dense(units=2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_30 (Dense)            (None, 4)                 288       
                                                                 
 dense_31 (Dense)            (None, 2)                 10        
                                                                 
 dense_32 (Dense)            (None, 2)                 6         
                                                                 
 dense_33 (Dense)            (None, 1)                 3         
                                                                 
Total params: 307 (1.20 KB)
Trainable params: 307 (1.20 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [279]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam",metrics=["accuracy"])

In [281]:
# Train the model
fit_nn = nn.fit(X_train, y_train, epochs=500)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [282]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: -6.1325e+03 - accuracy: 0.5818 - 36ms/epoch - 18ms/step
Loss: -6132.478515625, Accuracy: 0.581818163394928


### Export the Trained Model:
---

In [283]:
# Export the Trained Model to ../04-Creating_Applicaton/Building_Model_Exports as a HDF5 file
rf_model.save("/content/drive/MyDrive/Colab Notebooks/NFL_Random_Forest.HDF5")


AttributeError: 'RandomForestClassifier' object has no attribute 'save'