# Logistic Regression Baseline

The following notebook will serve as a notebook to implement a Logistic Regression model to obtain a baseline measurement for our loss function in developing our models.

In [0]:
%run /Workspace/Repos/anthony.m.quagliata@vanderbilt.edu/NFL-Capstone/03-Models/Model_Evaluation_Functions

## Read Data and Import Libraries

In [0]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import warnings
from pyspark.sql import SparkSession
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

In [0]:
# Define the directory where your data is located
directory = "/dbfs/mnt/nfl/"

# Read in all your datasets
games = pd.read_csv(f"{directory}games.csv")
players = pd.read_csv(f"{directory}players.csv")
plays = pd.read_csv(f"{directory}plays.csv")
tackles = pd.read_csv(f"{directory}tackles.csv")
train = pd.read_csv(f"{directory}train.csv")
val = pd.read_csv(f"{directory}val.csv")
test = pd.read_csv(f"{directory}test.csv")

In [0]:
train.head()

## Logistic Regression Model

In [0]:
#import libraries
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np


cols_to_remove = ["Week", "gameId", "playId", "nflId", "frameId", "gamePlayId", "tackle_single", "tackle_multiple"]
x_train = train.drop(columns = cols_to_remove)
y_train = train["tackle_multiple"]
x_val = val.drop(columns = cols_to_remove)
y_val = val["tackle_multiple"]
x_test = test.drop(columns = cols_to_remove)
y_test = test["tackle_multiple"] 

In [0]:
lr_pipeline = Pipeline([('scaler', MinMaxScaler()),  # MinMaxScaler for feature scaling
    ('classifier', LogisticRegression())  # Logistic Regression Classifier
])

lr_pipeline.fit(x_train, y_train)

## Evaluation Metrics

In [0]:
probabilities_train = lr_pipeline.predict_proba(x_train)[:,1]
log_loss(y_train, probabilities_train)

In [0]:
probabilities_val = lr_pipeline.predict_proba(x_val)[:,1]
log_loss(y_val, probabilities_val)

In [0]:
probabilities_test = lr_pipeline.predict_proba(x_test)[:,1]
log_loss(y_test, probabilities_test)

In [0]:
plotROC(y_train, probabilities_train)

In [0]:
plotROC(y_val, probabilities_val)

In [0]:
plotROC(y_test, probabilities_test)

In [0]:
youdens_j_value_train = youdens_j(y_train, probabilities_train)
youdens_j_value_val = youdens_j(y_val, probabilities_val)
youdens_j_value_test = youdens_j(y_test, probabilities_test)

In [0]:
binaryClfMetrics(y_train, probabilities_train, threshold=youdens_j_value_train)

In [0]:
binaryClfMetrics(y_val, probabilities_val, threshold=youdens_j_value_val)

In [0]:
binaryClfMetrics(y_test, probabilities_test, threshold=youdens_j_value_test)

In [0]:
highest_avg_acc_per_play(train, probabilities_train)

In [0]:
highest_avg_acc_per_play(val, probabilities_val)

In [0]:
highest_avg_acc_per_play(test, probabilities_test)

In [0]:
acc_frame_tackle(train,probabilities_train)

In [0]:
acc_frame_tackle(val,probabilities_val)

In [0]:
acc_frame_tackle(test, probabilities_test)

## Model Visualizations

In [0]:
# tracking_1 = pd.read_csv(f"{directory}tracking_week_1.csv")
# tracking_2 = pd.read_csv(f"{directory}tracking_week_2.csv")
# tracking_3 = pd.read_csv(f"{directory}tracking_week_3.csv")
# tracking_4 = pd.read_csv(f"{directory}tracking_week_4.csv")
# tracking_5 = pd.read_csv(f"{directory}tracking_week_5.csv")
# tracking_6 = pd.read_csv(f"{directory}tracking_week_6.csv")
# tracking_7 = pd.read_csv(f"{directory}tracking_week_7.csv")
# tracking_8 = pd.read_csv(f"{directory}tracking_week_8.csv")
# tracking_9 = pd.read_csv(f"{directory}tracking_week_9.csv")
# tracking = pd.concat([tracking_1,tracking_2,tracking_3,tracking_4,tracking_5,tracking_6,tracking_7,tracking_8,tracking_9], axis = 0).reset_index(drop = True)
# del tracking_1
# del tracking_2
# del tracking_3
# del tracking_4
# del tracking_5
# del tracking_6
# del tracking_7
# del tracking_8
# del tracking_9

In [0]:
%run /Workspace/Repos/anthony.m.quagliata@vanderbilt.edu/NFL-Capstone/03-Models/Model_Evaluation_Functions

In [0]:
animate_play_probs(games,tracking,plays,players,val,probabilities,2022090800,167).show() 