# Predicting the Next Play


## Imports

In [142]:
from metaflow import Flow
import pandas
from sklearn.preprocessing import LabelEncoder
from sklearn import model_selection
import xgboost
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score
run = Flow('NFLStatsFlow').latest_successful_run

In [143]:
new_df = run.data.baltimore_df.groupby(['full_play_type'])['play_id'].count().to_frame()
percentage_df = new_df.apply(lambda x: x/x.sum())
totals_df = pandas.concat([percentage_df, new_df], axis = 1)
totals_df

Unnamed: 0_level_0,play_id,play_id
full_play_type,Unnamed: 1_level_1,Unnamed: 2_level_1
pass_left_deep,0.030784,33
pass_left_short,0.097948,105
pass_middle_deep,0.020522,22
pass_middle_short,0.165112,177
pass_right_deep,0.041045,44
pass_right_short,0.159515,171
run_left,0.152985,164
run_middle,0.109142,117
run_right,0.222948,239


In [156]:
# PASS IN FULL FEATURE SET
final_df = run.data.baltimore_df[[
## STANDARD METRICS
    'game_seconds_remaining',
    'yardline_100',
    'down',
    'ydstogo',
    'shotgun',
    'score_differential',
    'total_home_score',
    'total_away_score',
    'quarter_seconds_remaining',
    'half_seconds_remaining',
    'qtr',
    'goal_to_go',
    'no_huddle',
    'posteam_timeouts_remaining',
    'defteam_timeouts_remaining',
## CUSTOM METRICS
     'previous_play_in_drive',
     'drive_yards_gained',
     'game_yards_gained',
     'drive_rushing_yards_gained',
     'game_rushing_yards_gained',
     'drive_passing_yards_gained',
     'game_passing_yards_gained',
     'drive_sack',
     'game_sack',
     'drive_incomplete_pass',
     'game_incomplete_pass',
     'drive_no_huddle',
     'game_no_huddle',
     'drive_interception',
     'game_interception',
     'drive_first_down_rush',
     'game_first_down_rush',
     'drive_first_down_pass',
     'game_first_down_pass',
     'drive_first_down_penalty',
     'game_first_down_penalty',
     'game_third_down_converted',
     'game_third_down_failed',
     'game_fumble',
#      'drive_qb_hit',
     'game_qb_hit',
     'drive_rush_attempt',
     'game_rush_attempt',
     'drive_pass_attempt',
     'game_pass_attempt',
     'game_pass_touchdown',
     'game_rush_touchdown',
### GOAL METRIC
    'full_play_type'
]]


final_df= pandas.get_dummies(data=final_df, columns=['previous_play_in_drive'])
X = final_df.loc[:, final_df.columns != 'full_play_type']
Y = final_df.full_play_type

# Encode string class Values
label_encoder = LabelEncoder()
label_encoder = label_encoder.fit(Y)
label_encoded_y = label_encoder.transform(Y)
seed = 13
test_size = 0.2

# Randomizing
X_train, X_test, y_train, y_test = \
    model_selection.train_test_split(X, label_encoded_y, test_size=test_size, random_state=seed)

# Fit GBC model
GBC_model = xgboost.XGBClassifier(learning_rate=0.01,n_estimators=400)
GBC_model.fit(X_train, y_train)

# Make predictions
GBC_y_pred = GBC_model.predict(X_test)
GBC_predictions = [round(value) for value in GBC_y_pred]
# Evaluate Predictions
GBC_accuracy = accuracy_score(y_test, GBC_predictions)

CLF_model = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=500, alpha=0.0001,
                     solver='sgd', verbose=10,  random_state=21,tol=0.000000001)
CLF_model.fit(X_train, y_train)
CLF_model_y_pred = CLF_model.predict(X_test)
CLF_model_accuracy_score = accuracy_score(y_test, CLF_model_y_pred)
CLF_model_cm = confusion_matrix(y_test, CLF_model_y_pred)

Iteration 1, loss = 18.36763326
Iteration 2, loss = 5.55853946
Iteration 3, loss = 2.96176022
Iteration 4, loss = 2.43221646
Iteration 5, loss = 2.30920204
Iteration 6, loss = 2.27964079
Iteration 7, loss = 2.25821231
Iteration 8, loss = 2.23475042
Iteration 9, loss = 2.21378751
Iteration 10, loss = 2.19845930
Iteration 11, loss = 2.17176543
Iteration 12, loss = 2.15470711
Iteration 13, loss = 2.14040346
Iteration 14, loss = 2.13357674
Iteration 15, loss = 2.13156599
Iteration 16, loss = 2.12775187
Iteration 17, loss = 2.12431986
Iteration 18, loss = 2.12108523
Iteration 19, loss = 2.11793417
Iteration 20, loss = 2.11502608
Iteration 21, loss = 2.11227457
Iteration 22, loss = 2.10970901
Iteration 23, loss = 2.10722499
Iteration 24, loss = 2.10500236
Iteration 25, loss = 2.10278961
Iteration 26, loss = 2.10079518
Iteration 27, loss = 2.09867562
Iteration 28, loss = 2.09680029
Iteration 29, loss = 2.09491401
Iteration 30, loss = 2.09314939
Iteration 31, loss = 2.09145208
Iteration 32, lo

Iteration 260, loss = 2.00825018
Iteration 261, loss = 2.00821871
Iteration 262, loss = 2.00814440
Iteration 263, loss = 2.00808584
Iteration 264, loss = 2.00803233
Iteration 265, loss = 2.00800402
Iteration 266, loss = 2.00796557
Iteration 267, loss = 2.00793055
Iteration 268, loss = 2.00788076
Iteration 269, loss = 2.00782258
Iteration 270, loss = 2.00775623
Iteration 271, loss = 2.00772933
Iteration 272, loss = 2.00769554
Iteration 273, loss = 2.00767126
Iteration 274, loss = 2.00765314
Iteration 275, loss = 2.00761334
Iteration 276, loss = 2.00760013
Iteration 277, loss = 2.00757817
Iteration 278, loss = 2.00757996
Iteration 279, loss = 2.00752138
Iteration 280, loss = 2.00751841
Iteration 281, loss = 2.00743677
Iteration 282, loss = 2.00744333
Iteration 283, loss = 2.00740780
Iteration 284, loss = 2.00735719
Iteration 285, loss = 2.00729603
Iteration 286, loss = 2.00726844
Iteration 287, loss = 2.00725195
Iteration 288, loss = 2.00718119
Iteration 289, loss = 2.00714862
Iteration 

In [157]:
print("GBC Accuracy: %.2f%%" % (GBC_accuracy * 100.0))
print("CLF Accuracy: %.2f%%" % (CLF_model_accuracy_score * 100.0))
print(classification_report(CLF_model_y_pred,predictions))

GBC Accuracy: 26.51%
CLF Accuracy: 21.86%
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         0
         1.0       0.00      0.00      0.00         0
         2.0       0.00      0.00      0.00         0
         3.0       0.00      0.00      0.00         0
         5.0       0.00      0.00      0.00         0
         6.0       0.00      0.00      0.00         0
         7.0       0.00      0.00      0.00         0
         8.0       1.00      0.48      0.65       215

   micro avg       0.48      0.48      0.48       215
   macro avg       0.12      0.06      0.08       215
weighted avg       1.00      0.48      0.65       215



In [158]:
list(run.data.baltimore_df.columns)

['play_id',
 'game_id',
 'home_team',
 'away_team',
 'posteam',
 'posteam_type',
 'defteam',
 'side_of_field',
 'yardline_100',
 'game_date',
 'quarter_seconds_remaining',
 'half_seconds_remaining',
 'game_seconds_remaining',
 'game_half',
 'quarter_end',
 'drive',
 'sp',
 'qtr',
 'down',
 'goal_to_go',
 'time',
 'yrdln',
 'ydstogo',
 'ydsnet',
 'desc',
 'play_type',
 'yards_gained',
 'shotgun',
 'no_huddle',
 'qb_dropback',
 'qb_kneel',
 'qb_spike',
 'qb_scramble',
 'pass_length',
 'pass_location',
 'air_yards',
 'yards_after_catch',
 'run_location',
 'run_gap',
 'field_goal_result',
 'kick_distance',
 'extra_point_result',
 'two_point_conv_result',
 'home_timeouts_remaining',
 'away_timeouts_remaining',
 'timeout',
 'timeout_team',
 'td_team',
 'posteam_timeouts_remaining',
 'defteam_timeouts_remaining',
 'total_home_score',
 'total_away_score',
 'posteam_score',
 'defteam_score',
 'score_differential',
 'posteam_score_post',
 'defteam_score_post',
 'score_differential_post',
 'no_sc