In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn import metrics 
import shap

In [86]:
default_params = {
    'objective': 'multi:softprob',
    'num_class' : 3,
    'seed' : 11062025,
}

In [3]:
data = pd.read_csv('Data/processed/data_for_model.csv')
pbp_data = pd.read_parquet('Data/play_by_play_2023.parquet')
pbp_data = pbp_data[['play_id', 'old_game_id', 'cp']]
pbp_data['play_id'] = pbp_data['play_id'].astype(int)
pbp_data['game_id'] = pbp_data['old_game_id'].astype(int)
supplementary_data = pd.read_csv('Data/supplementary_data.csv')
data = data.merge(pbp_data, on = ['game_id', 'play_id'], how = 'inner')

  supplementary_data = pd.read_csv('Data/supplementary_data.csv')


In [4]:
for col in data.select_dtypes(include=['int64']).columns:
    data[col] = data[col].astype(int)
for col in data.select_dtypes(include=['float64']).columns:
    data[col] = data[col].astype(float)
data['player_height'] = data['player_height'].apply(lambda x: int(x.split('-')[0])*12 + int(x.split('-')[1]))
data['seconds_left'] = data['game_clock'].apply(lambda x: int(x.split(':')[0])*60 + int(x.split(':')[1]))
extra_seconds = {1: 60*15*3, 2: 60*15*2, 3: 60*15, 4: 0}
data['seconds_left'] = data['seconds_left'] + data['quarter'].map(extra_seconds)
data['player_position'] = data['player_position'].astype('category')
data['play_action'] = data['play_action'].astype('category')
data['dropback_type'] = data['dropback_type'].astype('category')
data['team_coverage_type'] = data['team_coverage_type'].astype('category')
data['route_of_targeted_receiver'] = data['route_of_targeted_receiver'].astype('category')
data['qb_hit'] = data['qb_hit'].astype('category')
data['down'] = data['down'].astype('category')
data['outcome'] = data['outcome'].astype('category')

In [5]:
X = data.drop(['game_id', 'play_id', 'player_name', 'disruption', 'pd', 'int', 'quarter', 'game_clock',
               'pass_result', 'outcome', 'old_game_id', 'cp'], axis = 1)

enc = LabelEncoder()
y = data[['outcome', 'week']].copy()
y['outcome'] = enc.fit_transform(y['outcome']) #0 = complete, 1 = disruption, 2 = incomplete

In [43]:
def get_model_performance(file_path, data, training_data, validation_data, y_train, y_validate, week):
    loaded_model = xgb.Booster()
    loaded_model.load_model(file_path)

    validation_probabilities = loaded_model.predict(validation_data)
    validation_predictions = np.argmax(validation_probabilities, axis=1)
    validation_counts = np.bincount(validation_predictions)
    validation_accuracy = metrics.accuracy_score(validation_predictions, y_validate)
    #print(f'Validation data has {(np.sum(y_validate == 0, axis=0).values[0])} completions, {(np.sum(y_validate == 2, axis=0).values[0])} incompletions and {(np.sum(y_validate == 1, axis=0).values[0])} breakups')
    #print(f'Predicted {validation_counts[0]} completions, {validation_counts[2]} incompletions, and {validation_counts[1]} breakups, Accuracy: {np.round(validation_accuracy, 5)}')
    
    incompletion_predictions = [1 if x == 2 else x for x in validation_predictions]
    validation_incompletions = [1 if x == 2 else x for x in y_validate.outcome]
    validation_incompletion_accuracy = metrics.accuracy_score(incompletion_predictions, validation_incompletions)
    #print(f'Prediction incompletion accuracy {np.round(validation_incompletion_accuracy, 5)}')

    incompletion_probability = 1 - data['cp']
    start_idx, end_idx = data.index[data['week'] == week][[0, -1]]
    incompletion_probability = incompletion_probability[start_idx:end_idx+1]
    predicted_incompletion_probability = 1 - validation_probabilities[:, 0]
    incompletion_rmse = np.sqrt(np.mean((incompletion_probability - predicted_incompletion_probability)**2))
    #print(f'Validation Incompletion Rate RMSE {np.round(incompletion_rmse, 5)}')

    validation_roc = metrics.roc_auc_score(y_validate, validation_probabilities, average='weighted', multi_class='ovr')
    #print(f'ROC on validation: {np.round(validation_roc, 5)}')
    validation_results = [(np.sum(y_validate == 0, axis=0).values[0]), (np.sum(y_validate == 2, axis=0).values[0]), (np.sum(y_validate == 1, axis=0).values[0])]
    predicted_results = [validation_counts[0], validation_counts[2], validation_counts[1]]
    return validation_accuracy, validation_roc, validation_incompletion_accuracy, incompletion_rmse, validation_results, predicted_results

In [109]:
def train_with_cv(X, y, params, print_update, num_boost_round):
    results_df = pd.DataFrame(columns=['Week', 'accuracy', 'ROC','incompletion_accuracy', 'incompletion_RMSE',
                                       'val_results', 'pred_results'])
    for week in range(1, 18):
        if print_update: print(f'Training with Week {week} excluded')
        X_train = X[X['week'] != week].drop(['week'], axis = 1)
        X_validate = X[X['week'] == week].drop(['week'], axis = 1)
        y_train = y[y['week'] != week].drop(['week'], axis = 1)
        y_validate = y[y['week'] == week].drop(['week'], axis = 1)
        Xy_train = xgb.DMatrix(X_train, y_train, enable_categorical = True)
        Xy_validate = xgb.DMatrix(X_validate, y_validate, enable_categorical = True)
        
        booster = xgb.train(params, Xy_train, num_boost_round = num_boost_round)
        booster.save_model("temp_cv_model.json")

        acc, roc, incompletion_acc, rmse, val_results, pred_results = get_model_performance('temp_cv_model.json', data, Xy_train, Xy_validate, y_train, y_validate, week)
        results_df.loc[len(results_df), :] = [week, acc, roc, incompletion_acc, rmse, val_results, pred_results]
    return results_df

In [58]:
def compare_models(m1, m2, m1_name, m2_name):
    print('Accuracy')
    print(np.round(np.mean(m1['accuracy']), 5))
    print(np.round(np.mean(m2['accuracy']), 5))
    
    print('ROC')
    print(np.round(np.mean(m1['ROC']), 5))
    print(np.round(np.mean(m2['ROC']), 5))
    
    print('Incompletion Accuracy')
    print(np.round(np.mean(m1['incompletion_accuracy']), 5))
    print(np.round(np.mean(m2['incompletion_accuracy']), 5))
    
    print('Incompletion RMSE')
    print(np.round(np.mean(m1['incompletion_RMSE']), 5))
    print(np.round(np.mean(m2['incompletion_RMSE']), 5))
    
    completions = np.sum([x[0] for x in m1['val_results']])
    incompletions = np.sum([x[1] for x in m1['val_results']])
    deflections = np.sum([x[2] for x in m1['val_results']])
    print(f'Actual Results: {completions} completions, {incompletions} incompletions, {deflections} deflections')
    
    completions = np.sum([x[0] for x in m1['pred_results']])
    incompletions = np.sum([x[1] for x in m1['pred_results']])
    deflections = np.sum([x[2] for x in m1['pred_results']])
    print(f'{m1_name}: {completions} completions, {incompletions} incompletions, {deflections} deflections')
    
    completions = np.sum([x[0] for x in m2['pred_results']])
    incompletions = np.sum([x[1] for x in m2['pred_results']])
    deflections = np.sum([x[2] for x in m2['pred_results']])
    print(f'{m2_name} Model: {completions} completions, {incompletions} incompletions, {deflections} deflections')

# Model training + Eval

In [113]:
# Default model
regular_model_results = train_with_cv(X, y, default_params, True, 30)
regular_model_results

Training with Week 1 excluded
Training with Week 2 excluded
Training with Week 3 excluded
Training with Week 4 excluded
Training with Week 5 excluded
Training with Week 6 excluded
Training with Week 7 excluded
Training with Week 8 excluded
Training with Week 9 excluded
Training with Week 10 excluded
Training with Week 11 excluded
Training with Week 12 excluded
Training with Week 13 excluded
Training with Week 14 excluded
Training with Week 15 excluded
Training with Week 16 excluded
Training with Week 17 excluded


Unnamed: 0,Week,accuracy,ROC,incompletion_accuracy,incompletion_RMSE,val_results,pred_results
0,1,0.634855,0.603402,0.647303,0.208656,"[154, 73, 14]","[191, 45, 5]"
1,2,0.72,0.665394,0.72,0.19842,"[166, 74, 10]","[210, 38, 2]"
2,3,0.683258,0.688705,0.687783,0.214195,"[136, 73, 12]","[185, 30, 6]"
3,4,0.641791,0.581692,0.646766,0.197986,"[138, 52, 11]","[169, 29, 3]"
4,5,0.589189,0.60017,0.616216,0.218413,"[107, 68, 10]","[150, 33, 2]"
5,6,0.691542,0.7405,0.696517,0.201728,"[122, 66, 13]","[165, 35, 1]"
6,7,0.687179,0.672728,0.702564,0.222971,"[128, 62, 5]","[164, 27, 4]"
7,8,0.639423,0.62524,0.644231,0.219875,"[133, 63, 12]","[171, 31, 6]"
8,9,0.630682,0.650912,0.653409,0.21574,"[107, 59, 10]","[144, 30, 2]"
9,10,0.617021,0.615713,0.62234,0.210477,"[104, 76, 8]","[159, 28, 1]"


In [83]:
# Drop game context
X_no_game = X.drop(['down', 'yards_to_go', 'seconds_left', 'defensive_point_diff'], axis = 1)
model_no_game_results = train_with_cv(X_no_game, y, default_params, True)
model_no_game_results

Training with Week 1 excluded
Training with Week 2 excluded
Training with Week 3 excluded
Training with Week 4 excluded
Training with Week 5 excluded
Training with Week 6 excluded
Training with Week 7 excluded
Training with Week 8 excluded
Training with Week 9 excluded
Training with Week 10 excluded
Training with Week 11 excluded
Training with Week 12 excluded
Training with Week 13 excluded
Training with Week 14 excluded
Training with Week 15 excluded
Training with Week 16 excluded
Training with Week 17 excluded


Unnamed: 0,Week,accuracy,ROC,incompletion_accuracy,incompletion_RMSE,val_results,pred_results
0,1,0.643154,0.631285,0.651452,0.155237,"[154, 73, 14]","[208, 27, 6]"
1,2,0.732,0.745366,0.748,0.151964,"[166, 74, 10]","[221, 25, 4]"
2,3,0.674208,0.691597,0.683258,0.184619,"[136, 73, 12]","[178, 34, 9]"
3,4,0.661692,0.574129,0.681592,0.160137,"[138, 52, 11]","[170, 25, 6]"
4,5,0.572973,0.560377,0.589189,0.154844,"[107, 68, 10]","[163, 20, 2]"
5,6,0.686567,0.722895,0.686567,0.153149,"[122, 66, 13]","[177, 22, 2]"
6,7,0.666667,0.627595,0.671795,0.178701,"[128, 62, 5]","[168, 25, 2]"
7,8,0.620192,0.6234,0.629808,0.176913,"[133, 63, 12]","[176, 26, 6]"
8,9,0.647727,0.672947,0.659091,0.156768,"[107, 59, 10]","[157, 17, 2]"
9,10,0.632979,0.605746,0.632979,0.152166,"[104, 76, 8]","[167, 20, 1]"


In [114]:
compare_models(regular_model_results, model_no_game_results, 'Regular Model', 'No Game Context')

Accuracy
0.65725
0.6605
ROC
0.64485
0.64215
Incompletion Accuracy
0.66665
0.66865
Incompletion RMSE
0.20907
0.15853
Actual Results: 2201 completions, 1073 incompletions, 162 deflections
Regular Model: 2814 completions, 571 incompletions, 51 deflections
No Game Context Model: 2952 completions, 424 incompletions, 60 deflections


# Regular Model better, trying last second vs regular model

In [57]:
last_ten_frames = pd.read_csv('Data/processed/last_ten_frames_of_input.csv')
X_last_second = data.merge(last_ten_frames, on = ['game_id', 'play_id', 'player_name'], how = 'inner')
X_last_second = X_last_second.drop(['game_id', 'play_id', 'player_name', 'disruption', 'pd', 'int', 'quarter', 'game_clock',
               'pass_result', 'outcome', 'old_game_id', 'cp', 'player_role'], axis = 1)

model_last_second_results = train_with_cv(X_last_second, y, default_params, True)
model_last_second_results

Training with Week 1 excluded
Training with Week 2 excluded
Training with Week 3 excluded
Training with Week 4 excluded
Training with Week 5 excluded
Training with Week 6 excluded
Training with Week 7 excluded
Training with Week 8 excluded
Training with Week 9 excluded
Training with Week 10 excluded
Training with Week 11 excluded
Training with Week 12 excluded
Training with Week 13 excluded
Training with Week 14 excluded
Training with Week 15 excluded
Training with Week 16 excluded
Training with Week 17 excluded


Unnamed: 0,Week,accuracy,ROC,incompletion_accuracy,incompletion_RMSE,val_results,pred_results
0,1,0.655602,0.624064,0.659751,0.155247,"[154, 73, 14]","[214, 22, 5]"
1,2,0.712,0.693252,0.72,0.149063,"[166, 74, 10]","[218, 31, 1]"
2,3,0.674208,0.647663,0.683258,0.172332,"[136, 73, 12]","[186, 31, 4]"
3,4,0.691542,0.592992,0.696517,0.160659,"[138, 52, 11]","[181, 18, 2]"
4,5,0.6,0.576902,0.621622,0.164505,"[107, 68, 10]","[165, 18, 2]"
5,6,0.676617,0.745711,0.676617,0.140053,"[122, 66, 13]","[179, 21, 1]"
6,7,0.671795,0.652235,0.682051,0.171562,"[128, 62, 5]","[172, 19, 4]"
7,8,0.644231,0.62242,0.658654,0.177505,"[133, 63, 12]","[174, 27, 7]"
8,9,0.670455,0.669102,0.681818,0.154888,"[107, 59, 10]","[155, 20, 1]"
9,10,0.595745,0.63256,0.595745,0.150554,"[104, 76, 8]","[168, 19, 1]"


In [64]:
compare_models(regular_model_results, model_last_second_results, 'Regular Model', 'Last Second')

Accuracy
0.66363
0.66059
ROC
0.6468
0.63978
Incompletion Accuracy
0.67362
0.66922
Incompletion RMSE
0.15893
0.15729
Actual Results: 2201 completions, 1073 incompletions, 162 deflections
Regular Model: 2941 completions, 438 incompletions, 57 deflections
Last Second Model: 2980 completions, 416 incompletions, 40 deflections


# Regular model best, grid search

In [116]:
# https://www.slideshare.net/slideshow/owen-zhangopen-sourcetoolsanddscompetitions1/49003320#2
# https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/#h-what-are-xgboost-nbsp-parameters

models_trained = 0
num_round = 100
subsample  = 1
xg_gamma = 0
xg_alpha = 0
xg_lambda = 1
seed = 11052025

params = {'objective': 'multi:softprob',
          'num_class' : 3,
          'seed' : 11052025,
          'subsample' : 1,
          'gamma' : 0,
          'alpha' : 0,
          'lambda' : 1,
         }
num_boost_round = 30
grid_search_model_results = {}

for eta in [0.01, 0.05, 0.1, 0.2, 0.3]:
    for max_depth in [4, 5, 6, 7]:
        for min_child_weight in [170/(2301+1133+170), (1133 + 170)/(2301+1133+170), 1]:
            for colsample_by_tree in [0.3, 0.4, 0.5]: 
                params['eta'] = eta
                params['max_depth'] = max_depth
                params['min_child_weight'] = min_child_weight
                params['colsample_bytree'] = colsample_by_tree
                grid_search_id = f'{eta} {max_depth} {min_child_weight} {colsample_by_tree}'
                model_results = train_with_cv(X, y, params, False, num_boost_round)
                grid_search_model_results[grid_search_id] = {}
                grid_search_model_results[grid_search_id]['results'] = model_results
                grid_search_model_results[grid_search_id]['accuracy'] = np.round(np.mean(model_results['accuracy']), 5)

                models_trained+=1
                if models_trained % 10 == 0: print(f'{models_trained}/{5*4*3*3} trained')

10/120 trained
20/120 trained
30/120 trained
40/120 trained
50/120 trained
60/120 trained
70/120 trained
80/120 trained
90/120 trained
100/120 trained
110/120 trained
120/120 trained
130/120 trained
140/120 trained
150/120 trained
160/120 trained
170/120 trained
180/120 trained


In [128]:
model_ids = []
model_accuracies = []
model_rocs = []
for model_id in grid_search_model_results.keys():
    model_ids.append(model_id)
    model_results = grid_search_model_results[model_id]['results']
    model_accuracies.append(np.round(np.mean(model_results['accuracy']), 5))
    model_rocs.append(np.round(np.mean(model_results['ROC']), 5))
grid_search_model_accuracies = pd.DataFrame({'model_id' : model_ids, 'accuracy' : model_accuracies,
                                            'roc' : model_rocs})
grid_search_model_accuracies = grid_search_model_accuracies.sort_values(by='accuracy', ascending=False)

In [129]:
grid_search_model_accuracies #eta = 0.2, max_depth = 4, min_child_weight = 1, colsample_by_tree = 0.5

Unnamed: 0,model_id,accuracy,roc
116,0.2 4 1 0.5,0.66931,0.65084
113,0.2 4 0.36154273029966705 0.5,0.66820,0.65176
26,0.01 6 1 0.5,0.66739,0.64862
83,0.1 5 0.04716981132075472 0.5,0.66716,0.64549
122,0.2 5 0.36154273029966705 0.5,0.66678,0.65095
...,...,...,...
159,0.3 5 1 0.3,0.64776,0.63356
169,0.3 6 1 0.4,0.64713,0.63754
172,0.3 7 0.04716981132075472 0.4,0.64594,0.63592
168,0.3 6 1 0.3,0.64478,0.63092
