Importing the required packages.

In [172]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import StackingClassifier

Importing the data from the .csv storage.

In [173]:
data = pd.DataFrame()

# Import the local data for the plays
for year in range(2020, 2024):
    filename = f"play_by_play_{year}_new.csv"
    if os.path.exists(filename):
        year_data = pd.read_csv(filename)
        data = pd.concat([data, year_data], ignore_index=True)
    else:
        print(f"{filename} not found.")

data.fillna(0, inplace=True)
random_state_choice = 43

Creating the features for the models.

In [174]:
                                                                                                                            # Basic Features
data['red_zone'] = (data['yardline_100'] <= 20).astype(int)                                                                 # Is this within the 20 yard line (red zone)?
data['is_home_team'] = (data['home_team'] == data['posteam']).astype(int)                                                   # Is this the home team?
data['is_late_season'] = (data['week'] >= 10).astype(int)                                                                   # Is this late in the season?
data['is_shotgun'] = data['shotgun'].astype(int)                                                                            # Is the team lined up in shotgun formation?
data['is_no_huddle'] = data['no_huddle'].astype(int)                                                                        # Huddle prior to the play?
data['near_end_of_half'] = (data['half_seconds_remaining'] < 240).astype(int)                                               # Is it within 4 minutes of the end of the half?
data['near_end_of_game'] = (data['game_seconds_remaining'] < 240).astype(int)                                               # Is it within 4 minutes of the end of the g?

                                                                                                                            # Advanced Features
data['score_diff'] = data.apply(                                                                                            # - This one can't even be used cause it doesn't help but it's useful for the others.
    lambda row: row['home_score'] - row['away_score'] if row['is_home_team'] == 1
    else row['away_score'] - row['home_score'], axis=1
)
data['late_cozy_offense'] = ((data['half_seconds_remaining'] <= 240) & (data['score_diff'] > 0)).astype(int)                # - Late in the game, team is in the lead (run usually more likely).
data['late_desperate_offense'] = ((data['half_seconds_remaining'] <= 240) & (data['score_diff'] < 0)).astype(int)           # - Late in the game, team is behind (pass usually more likely).
data['late_super_desperate_offense'] = ((data['half_seconds_remaining'] <= 240) & (data['score_diff'] < -8)).astype(int)    # - Late in the game, team is very behind (pass usually more likely).
data['rolling_avg_yards'] = data['yards_gained'].rolling(window=3, min_periods=1).mean()                                    # - Attempt at a momentum feature.
data['previous_play_run'] = (data['play_type'].shift(1) == 'run').astype(int)                                               # - Was the previous play a run?
data['previous_play_pass'] = (data['play_type'].shift(1) == 'pass').astype(int)                                             # - Was the previous play a pass?
data['past_conversion_success'] = (data['third_down_converted'].shift(1).astype(bool)                                       # - Recent third/fourth down conversion success?
                                 | data['fourth_down_converted'].shift(1).astype(bool)).astype(int)

Selecting the features to be included in the training.

In [175]:
# The features to be included in the models.
selected_features = [
    'down',
    'ydstogo',
    'red_zone',
    'is_home_team',
    'is_late_season',
    'is_shotgun',
    'is_no_huddle',
    'near_end_of_half',
    'near_end_of_game',
    'late_cozy_offense',
    'late_desperate_offense',
    'late_super_desperate_offense',
    'rolling_avg_yards',
    'previous_play_run',
    'previous_play_pass',
    'past_conversion_success',
    ]

Preparing the dataset for training by filtering out irrelevant plays such as kickoffs and punts, selecting the desired features, encoding the target variable, and splitting the data into training/testing sets at a 70/30 split.

In [176]:
# Play type is our target variable; filter out unwanted plays like punts and kickoffs
filtered_data = data[data['play_type'].isin(['pass', 'run'])]  # Limit to pass/run plays
filtered_data = filtered_data[filtered_data['down'] != 0] # For some reason some "0th down" plays were still left over

X = filtered_data[selected_features]
y = filtered_data['play_type'].apply(lambda x: 1 if x == 'pass' else 0)  # Encode 'pass' as 1, 'run' as 0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state_choice)

Creating the Decision Tree, Logistic Regression, and Stacked models.

In [177]:
# Creating Decision Tree model. Depth of 9 turned out to be optimal.
dt_model = DecisionTreeClassifier(max_depth=9, random_state=random_state_choice)  # Limit depth to avoid overfitting
dt_model.fit(X_train, y_train)

# Creating Logistic Regression model. Depth of 9 turned out to be optimal.
lr_model = LogisticRegression(max_iter=1000, random_state=random_state_choice)  # Increased iterations for convergence
lr_model.fit(X_train, y_train)

stacked_model = StackingClassifier(
    estimators=[
        ('decision_tree', dt_model),
        ('logistic_regression', lr_model)
    ],
    final_estimator=LogisticRegression(random_state=random_state_choice)
)

stacked_model.fit(X_train, y_train)

Making and evaluating predictions for the decision tree model.

In [178]:
# Making predictions based on decision tree model.
dt_predictions = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
dt_precision = precision_score(y_test, dt_predictions)
dt_recall = recall_score(y_test, dt_predictions)
dt_f1 = f1_score(y_test, dt_predictions)

Making and evaluating predictions for the linear regression model.

In [179]:
# Making predictions based on regression model.
lr_predictions = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_predictions)
lr_precision = precision_score(y_test, lr_predictions)
lr_recall = recall_score(y_test, lr_predictions)
lr_f1 = f1_score(y_test, lr_predictions)

Making and evaluating predictions for the stacked model.

In [180]:
# Making prediction based on stacked model.
stacked_predictions = stacked_model.predict(X_test)
accuracy = accuracy_score(y_test, stacked_predictions)
precision = precision_score(y_test, stacked_predictions)
recall = recall_score(y_test, stacked_predictions)
f1 = f1_score(y_test, stacked_predictions)

The evaluation metrics of the three models, incuding Accuracy, Precision, Recall, and F1 Score.

In [181]:
print("Model Performances:")
print("")

print("Decision Tree Performance:")
print(f"Accuracy: {dt_accuracy}, Precision: {dt_precision}, Recall: {dt_recall}, F1 Score: {dt_f1}")

print("\nLogistic Regression Performance:")
print(f"Accuracy: {lr_accuracy}, Precision: {lr_precision}, Recall: {lr_recall}, F1 Score: {lr_f1}")

print("\nStacking Performance:")
print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")

Model Performances:

Decision Tree Performance:
Accuracy: 0.7300811464907702, Precision: 0.7400837734474595, Recall: 0.8273545339793965, F1 Score: 0.7812896527857884

Logistic Regression Performance:
Accuracy: 0.7130925829260191, Precision: 0.7173693203612651, Recall: 0.8376562563622297, F1 Score: 0.7728604703584041

Stacking Performance:
Accuracy: 0.7321691263702368, Precision: 0.7420920135721844, Recall: 0.828209617655442, F1 Score: 0.7827894088669951


This cell organizes the prediction results of the stacked model into a more readable format, and prepares them for printing. The head of the dataset is printed at the bottom.


In [182]:
# Preparing results for printing.
stacked_results = X_test.copy()
stacked_results['actual_play'] = y_test.map({1: 'pass', 0: 'run'})
stacked_results['predicted_play'] = stacked_predictions
stacked_results['predicted_play'] = stacked_results['predicted_play'].map({1: 'pass', 0: 'run'})
stacked_results['yardline_100'] = filtered_data.loc[X_test.index, 'yardline_100'].astype(int)
stacked_results['down'] = stacked_results['down'].astype(int)

# Labeling if the actual play matches the predicted play, i.e. correct prediction.
stacked_results['match'] = stacked_results.apply(
    lambda row: '✔' if row['actual_play'] == row['predicted_play'] else '✘', axis=1
)

# Columns to be displayed in printout.
display_cols = [
    'down',
    'ydstogo',
    'yardline_100',
    #'near_end_of_half', # Broke in notebook form for some reason.
    'predicted_play',
    'actual_play',
    'match',
]

# Printing out prediction results
print("Predictions:")
print("")
print(stacked_results[display_cols].head())

Predictions:

        down  ydstogo  yardline_100 predicted_play actual_play match
177519     1       10            35           pass        pass     ✔
165036     1       10            73            run         run     ✔
170745     2        5            56           pass        pass     ✔
77298      1       20            90           pass         run     ✘
163466     2        7             8           pass         run     ✘


Single game play type prediction. Alter the feature values and investigate how it changes the predicted play type.

In [183]:
single_play = {
    'down': 3,
    'ydstogo': 8,
    'red_zone': 1,
    'is_home_team': 1,
    'is_late_season': 1,
    'is_shotgun': 1,
    'is_no_huddle': 0,
    'near_end_of_half': 1,
    'near_end_of_game': 0,
    'late_cozy_offense': 0,
    'late_desperate_offense': 0,
    'late_super_desperate_offense': 0,
    'rolling_avg_yards': 5,
    'previous_play_run': 0,
    'previous_play_pass': 1,
    'past_conversion_success': 1
}

single_play_df = pd.DataFrame([single_play])
pred = stacked_model.predict(single_play_df)[0]
pred_type = 'Pass' if pred == 1 else 'Run'

print("Single Play Prediction:")
# print(f"Features: {single_play}")
print(pred_type)

Single Play Prediction:
Pass
