# Predicting the Next Play


## Imports

## Prepare Data

In [137]:
from metaflow import Flow
import pandas
from sklearn.preprocessing import LabelEncoder
from sklearn import model_selection
import xgboost
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score
run = Flow('NFLStatsFlow').latest_successful_run

In [138]:
new_df = run.data.baltimore_df.groupby(['full_play_type'])['play_id'].count().to_frame()
percentage_df = new_df.apply(lambda x: x/x.sum())
totals_df = pandas.concat([percentage_df, new_df], axis = 1)
totals_df

Unnamed: 0_level_0,play_id,play_id
full_play_type,Unnamed: 1_level_1,Unnamed: 2_level_1
pass_left_deep,0.030784,33
pass_left_short,0.097948,105
pass_middle_deep,0.020522,22
pass_middle_short,0.165112,177
pass_right_deep,0.041045,44
pass_right_short,0.159515,171
run_left,0.152985,164
run_middle,0.109142,117
run_right,0.222948,239


In [139]:
list(run.data.baltimore_df.columns)

['play_id',
 'game_id',
 'home_team',
 'away_team',
 'posteam',
 'posteam_type',
 'defteam',
 'side_of_field',
 'yardline_100',
 'game_date',
 'quarter_seconds_remaining',
 'half_seconds_remaining',
 'game_seconds_remaining',
 'game_half',
 'quarter_end',
 'drive',
 'sp',
 'qtr',
 'down',
 'goal_to_go',
 'time',
 'yrdln',
 'ydstogo',
 'ydsnet',
 'desc',
 'play_type',
 'yards_gained',
 'shotgun',
 'no_huddle',
 'qb_dropback',
 'qb_kneel',
 'qb_spike',
 'qb_scramble',
 'pass_length',
 'pass_location',
 'air_yards',
 'yards_after_catch',
 'run_location',
 'run_gap',
 'field_goal_result',
 'kick_distance',
 'extra_point_result',
 'two_point_conv_result',
 'home_timeouts_remaining',
 'away_timeouts_remaining',
 'timeout',
 'timeout_team',
 'td_team',
 'posteam_timeouts_remaining',
 'defteam_timeouts_remaining',
 'total_home_score',
 'total_away_score',
 'posteam_score',
 'defteam_score',
 'score_differential',
 'posteam_score_post',
 'defteam_score_post',
 'score_differential_post',
 'no_sc

In [140]:
# PASS IN FULL FEATURE SET
final_df = run.data.baltimore_df[[
## STANDARD METRICS
#     'game_seconds_remaining',
    'yardline_100',
    'down',
    'ydstogo',
    'shotgun',
    'score_differential',
    'total_home_score',
    'total_away_score',
    'quarter_seconds_remaining',
#     'half_seconds_remaining',
    'qtr',
    'goal_to_go',
    'no_huddle',
    'posteam_timeouts_remaining',
    'defteam_timeouts_remaining',
## CUSTOM METRICS
     'previous_play_in_drive',
     'drive_yards_gained',
     'game_yards_gained',
     'drive_rushing_yards_gained',
     'game_rushing_yards_gained',
     'drive_passing_yards_gained',
     'game_passing_yards_gained',
     'drive_sack',
     'game_sack',
     'drive_incomplete_pass',
     'game_incomplete_pass',
     'drive_no_huddle',
     'game_no_huddle',
     'drive_interception',
     'game_interception',
     'drive_first_down_rush',
     'game_first_down_rush',
     'drive_first_down_pass',
     'game_first_down_pass',
     'drive_first_down_penalty',
     'game_first_down_penalty',
     'game_third_down_converted',
     'game_third_down_failed',
     'game_fumble',
#      'drive_qb_hit',
     'game_qb_hit',
     'drive_rush_attempt',
     'game_rush_attempt',
     'drive_pass_attempt',
     'game_pass_attempt',
     'game_pass_touchdown',
     'game_rush_touchdown',
### GOAL METRIC
    'full_play_type'
]]


final_df= pandas.get_dummies(data=final_df, columns=['previous_play_in_drive'])
X = final_df.loc[:, final_df.columns != 'full_play_type']
Y = final_df.full_play_type

# Encode string class Values
label_encoder = LabelEncoder()
label_encoder = label_encoder.fit(Y)
label_encoded_y = label_encoder.transform(Y)
seed = 13
test_size = 0.2

# Randomizing
X_train, X_test, y_train, y_test = \
    model_selection.train_test_split(X, label_encoded_y, test_size=test_size, random_state=seed)

# Fit GBC model
GBC_model = xgboost.XGBClassifier(learning_rate=0.01,n_estimators=400)
GBC_model.fit(X_train, y_train)

# Make predictions
GBC_y_pred = GBC_model.predict(X_test)
GBC_predictions = [round(value) for value in GBC_y_pred]
# Evaluate Predictions
GBC_accuracy = accuracy_score(y_test, GBC_predictions)

CLF_model = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=500, alpha=0.0001,
                     solver='sgd', verbose=10,  random_state=21,tol=0.000000001)
CLF_model.fit(X_train, y_train)
CLF_model_y_pred = CLF_model.predict(X_test)
CLF_model_accuracy_score = accuracy_score(y_test, CLF_model_y_pred)
CLF_model_cm = confusion_matrix(y_test, CLF_model_y_pred)

KeyError: "['drive_incomplete_pass', 'game_third_down_failed', 'drive_sack', 'game_rush_touchdown', 'game_pass_touchdown', 'game_incomplete_pass', 'drive_rush_attempt', 'drive_first_down_penalty', 'drive_first_down_pass', 'drive_passing_yards_gained', 'game_first_down_penalty', 'game_interception', 'game_first_down_rush', 'game_third_down_converted', 'game_rush_attempt', 'drive_yards_gained', 'game_no_huddle', 'game_first_down_pass', 'drive_no_huddle', 'drive_pass_attempt', 'game_fumble', 'game_passing_yards_gained', 'game_rushing_yards_gained', 'drive_first_down_rush', 'game_qb_hit', 'game_sack', 'game_yards_gained', 'drive_interception', 'game_pass_attempt', 'drive_rushing_yards_gained'] not in index"

In [134]:
print("GBC Accuracy: %.2f%%" % (GBC_accuracy * 100.0))
print("CLF Accuracy: %.2f%%" % (CLF_model_accuracy_score * 100.0))
print(classification_report(CLF_model_y_pred,predictions))

GBC Accuracy: 23.26%
CLF Accuracy: 15.81%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         0
         1.0       0.50      0.08      0.13        13
         2.0       0.00      0.00      0.00         0
         3.0       0.38      0.74      0.51        27
         5.0       0.10      0.18      0.13        11
         6.0       0.26      0.19      0.22        27
         7.0       0.36      0.21      0.26        24
         8.0       0.61      0.56      0.58       113

   micro avg       0.45      0.45      0.45       215
   macro avg       0.28      0.24      0.23       215
weighted avg       0.47      0.45      0.44       215

