# Predicting Win/Loss with Logistic Regression

Load Data
-----

In [1]:
import pandas as pd
cleaned_data = pd.read_csv("data/nfl_filtered.csv")

Fit scikit-learn model
----

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

headers = cleaned_data.columns
# set up X and Y
y = cleaned_data[["home_wn"]]
X = cleaned_data[['Spread Favorite', 'Over Under','hmhalfsc', 'hpyd',
                          'hpatt', 'hcomp', 'hypa', 'hcomppct', 'hint',
                          'hryd', 'hratt', 'hypr', 'awhalfsc', 'apyd',
                          'apatt', 'acomp', 'aypa', 'acomppct', 'aint',
                          'aryd', 'aratt', 'aypr']]
# split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# fit model to training data
log_reg = LogisticRegression(solver="lbfgs", max_iter=750).fit(X_train,y_train.values.ravel())
pred = log_reg.predict(X_test)
print(f"{accuracy_score(y_test, pred):.4f}")

0.8174


In [3]:
conf_mtx = confusion_matrix(y_test, pred)
pd.DataFrame(conf_mtx, columns=["Predicted L", "Predicted W"], index=["Actual L", "Actual W"])

Unnamed: 0,Predicted L,Predicted W
Actual L,184,57
Actual W,48,286


Evaluation Metric
----

In [4]:
print(f'Precision: {conf_mtx[1,1] / (conf_mtx[1,1] + conf_mtx[0,1]):.3f}')
print(f'Recall: {conf_mtx[1,1] / (conf_mtx[1,0] + conf_mtx[1,1]):.3f}')
print(f'Accuracy: {(conf_mtx[0,0] + conf_mtx[1,1]) / conf_mtx.sum():.3f}')

Precision: 0.834
Recall: 0.856
Accuracy: 0.817


In [5]:
f'This model correctly predicted the winner and loser of 575 NFL games with {(conf_mtx[0,0] + conf_mtx[1,1]) / conf_mtx.sum():.3f}% accuracy using statistics from the first half of play'

'This model correctly predicted the winner and loser of 575 NFL games with 0.817% accuracy using statistics from the first half of play'