### Import Dependencies

In [1]:
import pandas as pd
import numpy as np
import os
import re
import matplotlib as plt
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.callbacks import ModelCheckpoint

### Formulas

In [2]:
# Generate our categorical variable list
def cat_columns(df):
    cats = df.dtypes[df.dtypes == "object"].index.tolist()
    return cats

In [3]:
# Encode categorical columns and merge with primary dataframe
def encode_merge(df, cat_list):
    for i in cat_list:
        encode_df = pd.DataFrame(enc.fit_transform(df[i].values.reshape(-1,1)))
        encode_df.columns = enc.get_feature_names([i])
        df = df.merge(encode_df,left_index=True,right_index=True).drop(i,1)
    return df

### Transform/Scale/Model

In [53]:
allteam_df = pd.read_csv('Resources/allteam_csvs/allteam_df.csv')
allteam_df = allteam_df.drop(columns='Unnamed: 0')
allteam_df = allteam_df.drop(columns=['Year', "Day"])
allteam_df

Unnamed: 0,Week,Base_Team,Opp,Home,After_Bye,Wins Tally,PF Tally,PA Tally,OPassY Tally,ORushY Tally,...,TO_lost Tally,DPassY Tally,DRushY Tally,DTotYd Tally,TO_won Tally,Head_Coach,OC,DC,QB,Results
0,1,49ers,Buccaneers,0.0,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,Kyle Shanahan,Kyle Shanahan,Robert Saleh,Jimmy Garoppolo,1.0
1,2,49ers,Bengals,0.0,0,1.0,31.0,17.0,158.0,98.0,...,2.0,174.0,121.0,295.0,4.0,Kyle Shanahan,Kyle Shanahan,Robert Saleh,Jimmy Garoppolo,1.0
2,3,49ers,Steelers,1.0,0,2.0,72.0,34.0,470.0,357.0,...,3.0,465.0,146.0,611.0,5.0,Kyle Shanahan,Kyle Shanahan,Robert Saleh,Jimmy Garoppolo,1.0
3,5,49ers,Browns,1.0,1,3.0,96.0,54.0,738.0,525.0,...,8.0,625.0,225.0,850.0,7.0,Kyle Shanahan,Kyle Shanahan,Robert Saleh,Jimmy Garoppolo,1.0
4,6,49ers,Rams,0.0,0,4.0,127.0,57.0,909.0,800.0,...,8.0,703.0,327.0,1030.0,11.0,Kyle Shanahan,Kyle Shanahan,Robert Saleh,Jimmy Garoppolo,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5115,13,Titans,Jaguars,1.0,0,5.0,257.0,218.0,1985.0,1294.0,...,21.0,2745.0,1223.0,3968.0,21.0,Jeff Fisher,Mike Heimerdinger,Chuck Cecil,Vince Young,0.0
5116,14,Titans,Colts,1.0,0,5.0,263.0,235.0,2148.0,1351.0,...,23.0,2864.0,1481.0,4345.0,21.0,Jeff Fisher,Mike Heimerdinger,Chuck Cecil,Vince Young,0.0
5117,15,Titans,Texans,1.0,0,5.0,291.0,265.0,2392.0,1472.0,...,25.0,3176.0,1568.0,4744.0,21.0,Jeff Fisher,Mike Heimerdinger,Chuck Cecil,Vince Young,1.0
5118,16,Titans,Chiefs,0.0,0,6.0,322.0,282.0,2604.0,1619.0,...,26.0,3469.0,1598.0,5067.0,22.0,Jeff Fisher,Mike Heimerdinger,Chuck Cecil,Vince Young,0.0


In [54]:
enc = OneHotEncoder(sparse=False)

In [55]:
encoded_df = encode_merge(allteam_df, cat_list=cat_columns(allteam_df))

In [56]:
list(encoded_df.columns.values)

['Week',
 'Home',
 'After_Bye',
 'Wins Tally',
 'PF Tally',
 'PA Tally',
 'OPassY Tally',
 'ORushY Tally',
 'OTotYd Tally',
 'TO_lost Tally',
 'DPassY Tally',
 'DRushY Tally',
 'DTotYd Tally',
 'TO_won Tally',
 'Results',
 'Base_Team_49ers',
 'Base_Team_Bears',
 'Base_Team_Bengals',
 'Base_Team_Bills',
 'Base_Team_Broncos',
 'Base_Team_Browns',
 'Base_Team_Buccaneers',
 'Base_Team_Cardinals',
 'Base_Team_Chargers',
 'Base_Team_Chiefs',
 'Base_Team_Colts',
 'Base_Team_Cowboys',
 'Base_Team_Dolphins',
 'Base_Team_Eagles',
 'Base_Team_Falcons',
 'Base_Team_Giants',
 'Base_Team_Jaguars',
 'Base_Team_Jets',
 'Base_Team_Lions',
 'Base_Team_Packers',
 'Base_Team_Panthers',
 'Base_Team_Patriots',
 'Base_Team_Raiders',
 'Base_Team_Rams',
 'Base_Team_Ravens',
 'Base_Team_Saints',
 'Base_Team_Seahawks',
 'Base_Team_Steelers',
 'Base_Team_Texans',
 'Base_Team_Titans',
 'Base_Team_Vikings',
 'Base_Team_Washington',
 'Opp_49ers',
 'Opp_Bears',
 'Opp_Bengals',
 'Opp_Bills',
 'Opp_Broncos',
 'Opp_Brow

In [57]:
# Fit the StandardScaler
scaler = StandardScaler()

scaler.fit(encoded_df)
scaled_data = scaler.transform(encoded_df)

In [60]:
# Create a DataFrame with the scaled data
transformed_scaled_data = pd.DataFrame(scaled_data, columns=encoded_df.columns)
transformed_scaled_data = transformed_scaled_data.drop(columns=['Results', 'Week', 'Home', 'After_Bye'])
transformed_scaled_data['Results'] = encoded_df['Results']
transformed_scaled_data['Week'] = encoded_df['Week']
transformed_scaled_data['Home'] = encoded_df['Home']
transformed_scaled_data['After_Bye'] = encoded_df['After_Bye']
transformed_scaled_data.head()

Unnamed: 0,Wins Tally,PF Tally,PA Tally,OPassY Tally,ORushY Tally,OTotYd Tally,TO_lost Tally,DPassY Tally,DRushY Tally,DTotYd Tally,...,QB_Tom Savage,QB_Tony Romo,QB_Trevor Siemian,QB_Tyrod Taylor,QB_Vince Young,QB_Zach Mettenberger,Results,Week,Home,After_Bye
0,-1.273555,-1.515705,-1.544165,-1.571823,-1.498599,-1.595272,-1.415325,-1.601472,-1.523287,-1.602728,...,-0.055989,-0.110715,-0.074154,-0.094165,-0.052363,-0.054206,1.0,1,0.0,0
1,-0.933444,-1.240405,-1.39018,-1.432808,-1.322319,-1.439375,-1.162818,-1.445319,-1.301703,-1.422016,...,-0.055989,-0.110715,-0.074154,-0.094165,-0.052363,-0.054206,1.0,2,0.0,0
2,-0.593334,-0.876299,-1.236196,-1.158298,-0.856437,-1.091651,-1.036564,-1.184168,-1.255921,-1.228438,...,-0.055989,-0.110715,-0.074154,-0.094165,-0.052363,-0.054206,1.0,3,1.0,0
3,-0.253223,-0.663164,-1.055038,-0.922501,-0.554242,-0.826139,-0.405295,-1.04058,-1.11125,-1.08203,...,-0.055989,-0.110715,-0.074154,-0.094165,-0.052363,-0.054206,1.0,5,1.0,1
4,0.086888,-0.387864,-1.027864,-0.772048,-0.059579,-0.554536,-0.405295,-0.970581,-0.92446,-0.971765,...,-0.055989,-0.110715,-0.074154,-0.094165,-0.052363,-0.054206,1.0,6,0.0,0


In [61]:
transformed_scaled_data = transformed_scaled_data.dropna()

In [62]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  100
hidden_nodes_layer2 =  75
hidden_nodes_layer3 =  50
hidden_nodes_layer4 =  25
hidden_nodes_layer5 =  10
hidden_nodes_layer6 =  5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Add hidden layers
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="relu"))
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 100)               44700     
_________________________________________________________________
dense_18 (Dense)             (None, 75)                7575      
_________________________________________________________________
dense_19 (Dense)             (None, 50)                3800      
_________________________________________________________________
dense_20 (Dense)             (None, 25)                1275      
_________________________________________________________________
dense_21 (Dense)             (None, 10)                260       
_________________________________________________________________
dense_22 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_23 (Dense)             (None, 1)                

In [63]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

40/40 - 0s - loss: 2.5101 - accuracy: 0.5656
Loss: 2.510105848312378, Accuracy: 0.565625011920929


In [65]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the logistic regression model
log_classifier = LogisticRegression(solver="lbfgs",max_iter=200)

# Train the model
log_classifier.fit(X_train,y_train)

# Evaluate the model
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Logistic regression model accuracy: 0.601


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [66]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 =  5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Add hidden layers
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 8)                 3576      
_________________________________________________________________
dense_25 (Dense)             (None, 5)                 45        
_________________________________________________________________
dense_26 (Dense)             (None, 1)                 6         
Total params: 3,627
Trainable params: 3,627
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
E

Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [67]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

40/40 - 0s - loss: 1.3151 - accuracy: 0.5805
Loss: 1.315056562423706, Accuracy: 0.5804687738418579


In [68]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create the SVM model
svm = SVC(kernel='linear')

# Train the model
svm.fit(X_train, y_train)

# Evaluate the model
y_pred = svm.predict(X_test_scaled)
print(f" SVM model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 SVM model accuracy: 0.565


In [25]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.613
 Random forest predictive accuracy: 0.613


## Weekly Modeling

In [4]:
week1 = pd.read_csv('Resources/weekly_csvs/week1.csv')
week2 = pd.read_csv('Resources/weekly_csvs/week2.csv')
week3 = pd.read_csv('Resources/weekly_csvs/week3.csv')
week4 = pd.read_csv('Resources/weekly_csvs/week4.csv')
week5 = pd.read_csv('Resources/weekly_csvs/week5.csv')
week6 = pd.read_csv('Resources/weekly_csvs/week6.csv')
week7 = pd.read_csv('Resources/weekly_csvs/week7.csv')
week8 = pd.read_csv('Resources/weekly_csvs/week8.csv')
week9 = pd.read_csv('Resources/weekly_csvs/week9.csv')
week10 = pd.read_csv('Resources/weekly_csvs/week10.csv')
week11 = pd.read_csv('Resources/weekly_csvs/week11.csv')
week12 = pd.read_csv('Resources/weekly_csvs/week12.csv')
week13 = pd.read_csv('Resources/weekly_csvs/week13.csv')
week14 = pd.read_csv('Resources/weekly_csvs/week14.csv')
week15 = pd.read_csv('Resources/weekly_csvs/week15.csv')
week16 = pd.read_csv('Resources/weekly_csvs/week16.csv')
week17 = pd.read_csv('Resources/weekly_csvs/week17.csv')

In [5]:
week1 = week1.drop(columns='Unnamed: 0')
week2 = week2.drop(columns='Unnamed: 0')
week3 = week3.drop(columns='Unnamed: 0')
week4 = week4.drop(columns='Unnamed: 0')
week5 = week5.drop(columns='Unnamed: 0')
week6 = week6.drop(columns='Unnamed: 0')
week7 = week7.drop(columns='Unnamed: 0')
week8 = week8.drop(columns='Unnamed: 0')
week9 = week9.drop(columns='Unnamed: 0')
week10 = week10.drop(columns='Unnamed: 0')
week11 = week11.drop(columns='Unnamed: 0')
week12 = week12.drop(columns='Unnamed: 0')
week13 = week13.drop(columns='Unnamed: 0')
week14 = week14.drop(columns='Unnamed: 0')
week15 = week15.drop(columns='Unnamed: 0')
week16 = week16.drop(columns='Unnamed: 0')
week17 = week17.drop(columns='Unnamed: 0')

### Week 1 - 58.7%

In [9]:
enc = OneHotEncoder(sparse=False)
encoded_df = encode_merge(week1, cat_list=cat_columns(week1))

# Fit the StandardScaler
scaler = StandardScaler()

scaler.fit(encoded_df)
scaled_data = scaler.transform(encoded_df)

# Create a DataFrame with the scaled data
transformed_scaled_data = pd.DataFrame(scaled_data, columns=encoded_df.columns)
transformed_scaled_data = transformed_scaled_data.drop(columns=['Results', 'Week', 'Home', 'After_Bye', 'Year'])
transformed_scaled_data['Year'] = encoded_df['Year']
transformed_scaled_data['Results'] = encoded_df['Results']
transformed_scaled_data['Week'] = encoded_df['Week']
transformed_scaled_data['Home'] = encoded_df['Home']
transformed_scaled_data['After_Bye'] = encoded_df['After_Bye']
transformed_scaled_data.head()

Unnamed: 0,Wins Tally,PF Tally,PA Tally,OPassY Tally,ORushY Tally,OTotYd Tally,TO_lost Tally,DPassY Tally,DRushY Tally,DTotYd Tally,...,QB_Tony Romo,QB_Trevor Siemian,QB_Tyrod Taylor,QB_Vince Young,QB_Zach Mettenberger,Year,Results,Week,Home,After_Bye
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.112867,-0.09759,-0.09759,-0.056166,-0.056166,2019,1.0,1,0.0,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.112867,-0.09759,-0.09759,-0.056166,-0.056166,2018,0.0,1,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.112867,-0.09759,-0.09759,-0.056166,-0.056166,2017,0.0,1,1.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.112867,-0.09759,-0.09759,-0.056166,-0.056166,2016,1.0,1,1.0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.112867,-0.09759,-0.09759,-0.056166,-0.056166,2015,1.0,1,1.0,0


In [10]:
transformed_scaled_data = transformed_scaled_data.dropna()

In [14]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  300
hidden_nodes_layer2 =  225
hidden_nodes_layer3 =  150
hidden_nodes_layer4 =  100
hidden_nodes_layer5 =  75
hidden_nodes_layer6 =  25

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Add hidden layers
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="relu"))
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 300)               134700    
_________________________________________________________________
dense_8 (Dense)              (None, 225)               67725     
_________________________________________________________________
dense_9 (Dense)              (None, 150)               33900     
_________________________________________________________________
dense_10 (Dense)             (None, 100)               15100     
_________________________________________________________________
dense_11 (Dense)             (None, 75)                7575      
_________________________________________________________________
dense_12 (Dense)             (None, 25)                1900      
_________________________________________________________________
dense_13 (Dense)             (None, 1)                

In [15]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 1.4851 - accuracy: 0.5875
Loss: 1.4850810766220093, Accuracy: 0.5874999761581421


### Week 17 - 61.3%

In [16]:
enc = OneHotEncoder(sparse=False)
encoded_df = encode_merge(week17, cat_list=cat_columns(week17))

# Fit the StandardScaler
scaler = StandardScaler()

scaler.fit(encoded_df)
scaled_data = scaler.transform(encoded_df)

# Create a DataFrame with the scaled data
transformed_scaled_data = pd.DataFrame(scaled_data, columns=encoded_df.columns)
transformed_scaled_data = transformed_scaled_data.drop(columns=['Results', 'Week', 'Home', 'After_Bye', 'Year'])
transformed_scaled_data['Year'] = encoded_df['Year']
transformed_scaled_data['Results'] = encoded_df['Results']
transformed_scaled_data['Week'] = encoded_df['Week']
transformed_scaled_data['Home'] = encoded_df['Home']
transformed_scaled_data['After_Bye'] = encoded_df['After_Bye']

transformed_scaled_data = transformed_scaled_data.dropna()

In [22]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  300
hidden_nodes_layer2 =  225
hidden_nodes_layer3 =  150
hidden_nodes_layer4 =  100
hidden_nodes_layer5 =  75
hidden_nodes_layer6 =  25

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Add hidden layers
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="relu"))
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 300)               132600    
_________________________________________________________________
dense_29 (Dense)             (None, 225)               67725     
_________________________________________________________________
dense_30 (Dense)             (None, 150)               33900     
_________________________________________________________________
dense_31 (Dense)             (None, 100)               15100     
_________________________________________________________________
dense_32 (Dense)             (None, 75)                7575      
_________________________________________________________________
dense_33 (Dense)             (None, 25)                1900      
_________________________________________________________________
dense_34 (Dense)             (None, 1)                

In [23]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 1.4408 - accuracy: 0.6000
Loss: 1.4408079385757446, Accuracy: 0.6000000238418579


In [26]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.613
