### Import Dependencies

In [1]:
import pandas as pd
import numpy as np
import os
import re
import matplotlib as plt
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.callbacks import ModelCheckpoint

### Formulas

In [2]:
# Encode categorical columns and merge with primary dataframe
def encode_merge(df, cat_list):
    for i in cat_list:
        encode_df = pd.DataFrame(enc.fit_transform(df[i].values.reshape(-1,1)))
        encode_df.columns = enc.get_feature_names([i])
        df = df.merge(encode_df,left_index=True,right_index=True).drop(i,1)
    return df

### Transform/Scale/Model

In [5]:
allteam_df = pd.read_csv('Resources/all_team_data.csv')
allteam_df

Unnamed: 0.1,Unnamed: 0,Week,Day,Results,Entering Wins,Home,Opp,PF,PA,OPassY,ORushY,TO_lost,DPassY,DRushY,TO_won,week_after_bye,year,Home_Team
0,0,1.0,Sun,1.0,0.0,0.0,Buccaneers,31.0,17.0,158.0,98.0,2.0,174.0,121.0,4.0,0.0,2019,49ers
1,1,2.0,Sun,1.0,1.0,0.0,Bengals,41.0,17.0,312.0,259.0,1.0,291.0,25.0,1.0,0.0,2019,49ers
2,2,3.0,Sun,1.0,2.0,1.0,Steelers,24.0,20.0,268.0,168.0,5.0,160.0,79.0,2.0,0.0,2019,49ers
3,4,5.0,Other,1.0,3.0,1.0,Browns,31.0,3.0,171.0,275.0,0.0,78.0,102.0,4.0,1.0,2019,49ers
4,5,6.0,Sun,1.0,4.0,0.0,Rams,20.0,7.0,232.0,99.0,2.0,48.0,109.0,1.0,0.0,2019,49ers
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,178,13.0,Sun,0.0,4.0,0.0,Packers,16.0,34.0,172.0,97.0,1.0,274.0,136.0,0.0,0.0,2010,49ers
167,179,14.0,Sun,1.0,4.0,1.0,Seahawks,40.0,21.0,241.0,95.0,0.0,277.0,84.0,5.0,0.0,2010,49ers
168,180,15.0,Other,0.0,5.0,0.0,Chargers,7.0,34.0,131.0,61.0,1.0,266.0,108.0,0.0,0.0,2010,49ers
169,181,16.0,Sun,0.0,5.0,0.0,Rams,17.0,25.0,246.0,85.0,2.0,275.0,60.0,1.0,0.0,2010,49ers


In [None]:
enc = OneHotEncoder(sparse=False)
encoded_df = encode_merge(standard_df, cat_list=cat_columns(standard_df))

In [None]:
# Fit the StandardScaler
scaler = StandardScaler()

scaler.fit(encoded_df)
scaled_data = scaler.transform(encoded_df)

# Create a DataFrame with the scaled data
transformed_scaled_data = pd.DataFrame(scaled_data, columns=encoded_df.columns)
transformed_scaled_data = transformed_scaled_data.drop(columns=['Results', 'Week', 'year'])
transformed_scaled_data['Results'] = encoded_df['Results']
transformed_scaled_data['Week'] = encoded_df['Week']
transformed_scaled_data['Year'] = encoded_df['year']
transformed_scaled_data.head()

In [None]:
transformed_scaled_data = transformed_scaled_data.dropna()

In [None]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results", "Year"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  100
hidden_nodes_layer2 =  75
hidden_nodes_layer3 =  50
hidden_nodes_layer4 =  25
hidden_nodes_layer5 =  10
hidden_nodes_layer6 =  5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Add hidden layers
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="relu"))
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="relu"))
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results", "Year"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the logistic regression model
log_classifier = LogisticRegression(solver="lbfgs",max_iter=200)

# Train the model
log_classifier.fit(X_train,y_train)

# Evaluate the model
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")

In [None]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results", "Year"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 =  5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Add hidden layers
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))
# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Check the structure of the model
nn.summary()

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results", "Year"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create the SVM model
svm = SVC(kernel='linear')

# Train the model
svm.fit(X_train, y_train)

# Evaluate the model
y_pred = svm.predict(X_test_scaled)
print(f" SVM model accuracy: {accuracy_score(y_test,y_pred):.3f}")

In [None]:
# Split our preprocessed data into our features and target arrays
y = transformed_scaled_data["Results"].values
X = transformed_scaled_data.drop(["Results", "Year"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")