Train model using 18-fold validation and perform hyperparameter tuning

In [19]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn import metrics 
import shap
import json
import pickle

In [2]:
default_params = {
    'objective': 'multi:softprob',
    'num_class' : 3,
    'seed' : 11062025,
}

# Import and Format Data

In [3]:
data = pd.read_csv('Data/processed/data_for_model.csv')
pbp_data = pd.read_parquet('Data/play_by_play_2023.parquet')
pbp_data = pbp_data[['play_id', 'old_game_id', 'cp']]
pbp_data['play_id'] = pbp_data['play_id'].astype(int)
pbp_data['game_id'] = pbp_data['old_game_id'].astype(int)
supplementary_data = pd.read_csv('Data/supplementary_data.csv')
data = data.merge(pbp_data, on = ['game_id', 'play_id'], how = 'inner')

  supplementary_data = pd.read_csv('Data/supplementary_data.csv')


In [4]:
for col in data.select_dtypes(include=['int64']).columns:
    data[col] = data[col].astype(int)
for col in data.select_dtypes(include=['float64']).columns:
    data[col] = data[col].astype(float)
data['player_height'] = data['player_height'].apply(lambda x: int(x.split('-')[0])*12 + int(x.split('-')[1]))
data['seconds_left'] = data['game_clock'].apply(lambda x: int(x.split(':')[0])*60 + int(x.split(':')[1]))
extra_seconds = {1: 60*15*3, 2: 60*15*2, 3: 60*15, 4: 0}
data['seconds_left'] = data['seconds_left'] + data['quarter'].map(extra_seconds)
data['player_position'] = data['player_position'].astype('category')
data['play_action'] = data['play_action'].astype('category')
data['dropback_type'] = data['dropback_type'].astype('category')
data['team_coverage_type'] = data['team_coverage_type'].astype('category')
data['route_of_targeted_receiver'] = data['route_of_targeted_receiver'].astype('category')
data['qb_hit'] = data['qb_hit'].astype('category')
data['down'] = data['down'].astype('category')
data['outcome'] = data['outcome'].astype('category')

In [5]:
X = data.drop(['game_id', 'play_id', 'player_name', 'disruption', 'pd', 'int', 'quarter', 'game_clock',
               'pass_result', 'outcome', 'old_game_id', 'cp'], axis = 1)

enc = LabelEncoder()
y = data[['outcome', 'week']].copy()
y['outcome'] = enc.fit_transform(y['outcome']) #0 = complete, 1 = disruption, 2 = incomplete

In [6]:
# Given a model, validation and training data, will return validation accuracy, roc, incompletion accuracy + rmse
# as well as validation and predicted outcomes
def get_model_performance(file_path, data, training_data, validation_data, y_train, y_validate, week):
    loaded_model = xgb.Booster()
    loaded_model.load_model(file_path)

    validation_probabilities = loaded_model.predict(validation_data)
    validation_predictions = np.argmax(validation_probabilities, axis=1)
    validation_counts = np.bincount(validation_predictions)
    validation_accuracy = metrics.accuracy_score(validation_predictions, y_validate)
    incompletion_predictions = [1 if x == 2 else x for x in validation_predictions]
    validation_incompletions = [1 if x == 2 else x for x in y_validate.outcome]
    validation_incompletion_accuracy = metrics.accuracy_score(incompletion_predictions, validation_incompletions)
    incompletion_probability = 1 - data['cp']
    if week > 0:
        start_idx, end_idx = data.index[data['week'] == week][[0, -1]]
    else: 
        start_idx = 0
        end_idx = len(data)
    incompletion_probability = incompletion_probability[start_idx:end_idx+1]
    predicted_incompletion_probability = 1 - validation_probabilities[:, 0]
    incompletion_rmse = np.sqrt(np.mean((incompletion_probability - predicted_incompletion_probability)**2))

    validation_roc = metrics.roc_auc_score(y_validate, validation_probabilities, average='weighted', multi_class='ovr')
    validation_results = [(np.sum(y_validate == 0, axis=0).values[0]), (np.sum(y_validate == 2, axis=0).values[0]), (np.sum(y_validate == 1, axis=0).values[0])]
    predicted_results = [validation_counts[0], validation_counts[2], validation_counts[1]]
    return validation_accuracy, validation_roc, validation_incompletion_accuracy, incompletion_rmse, validation_results, predicted_results

In [7]:
# Given training data and parameters, will perform 18-fold cross validation and store model performance on each fold 
def train_with_cv(X, y, params, print_update, num_boost_round):
    results_df = pd.DataFrame(columns=['Week', 'accuracy', 'ROC','incompletion_accuracy', 'incompletion_RMSE',
                                       'val_results', 'pred_results'])
    for week in range(1, 18):
        if print_update: print(f'Training with Week {week} excluded')
        X_train = X[X['week'] != week].drop(['week'], axis = 1)
        X_validate = X[X['week'] == week].drop(['week'], axis = 1)
        y_train = y[y['week'] != week].drop(['week'], axis = 1)
        y_validate = y[y['week'] == week].drop(['week'], axis = 1)
        Xy_train = xgb.DMatrix(X_train, y_train, enable_categorical = True)
        Xy_validate = xgb.DMatrix(X_validate, y_validate, enable_categorical = True)
        
        booster = xgb.train(params, Xy_train, num_boost_round = num_boost_round)
        booster.save_model("models/temp_cv_model.json")

        acc, roc, incompletion_acc, rmse, val_results, pred_results = get_model_performance('temp_cv_model.json', data, Xy_train, Xy_validate, y_train, y_validate, week)
        results_df.loc[len(results_df), :] = [week, acc, roc, incompletion_acc, rmse, val_results, pred_results]
    return results_df

In [8]:
# Given 2 models, will print model performance for comparisons
def compare_models(m1, m2, m1_name, m2_name):
    print('Accuracy')
    print(np.round(np.mean(m1['accuracy']), 5))
    print(np.round(np.mean(m2['accuracy']), 5))
    
    print('ROC')
    print(np.round(np.mean(m1['ROC']), 5))
    print(np.round(np.mean(m2['ROC']), 5))
    
    print('Incompletion Accuracy')
    print(np.round(np.mean(m1['incompletion_accuracy']), 5))
    print(np.round(np.mean(m2['incompletion_accuracy']), 5))
    
    print('Incompletion RMSE')
    print(np.round(np.mean(m1['incompletion_RMSE']), 5))
    print(np.round(np.mean(m2['incompletion_RMSE']), 5))
    
    completions = np.sum([x[0] for x in m1['val_results']])
    incompletions = np.sum([x[1] for x in m1['val_results']])
    deflections = np.sum([x[2] for x in m1['val_results']])
    print(f'Actual Results: {completions} completions, {incompletions} incompletions, {deflections} deflections')
    
    completions = np.sum([x[0] for x in m1['pred_results']])
    incompletions = np.sum([x[1] for x in m1['pred_results']])
    deflections = np.sum([x[2] for x in m1['pred_results']])
    print(f'{m1_name}: {completions} completions, {incompletions} incompletions, {deflections} deflections')
    
    completions = np.sum([x[0] for x in m2['pred_results']])
    incompletions = np.sum([x[1] for x in m2['pred_results']])
    deflections = np.sum([x[2] for x in m2['pred_results']])
    print(f'{m2_name} Model: {completions} completions, {incompletions} incompletions, {deflections} deflections')

# Model training + Eval

In [9]:
# Default model
regular_model_results = train_with_cv(X, y, default_params, True, 30)
regular_model_results

Training with Week 1 excluded
Training with Week 2 excluded
Training with Week 3 excluded
Training with Week 4 excluded
Training with Week 5 excluded
Training with Week 6 excluded
Training with Week 7 excluded
Training with Week 8 excluded
Training with Week 9 excluded
Training with Week 10 excluded
Training with Week 11 excluded
Training with Week 12 excluded
Training with Week 13 excluded
Training with Week 14 excluded
Training with Week 15 excluded
Training with Week 16 excluded
Training with Week 17 excluded


Unnamed: 0,Week,accuracy,ROC,incompletion_accuracy,incompletion_RMSE,val_results,pred_results
0,1,0.634855,0.603402,0.647303,0.208656,"[154, 73, 14]","[191, 45, 5]"
1,2,0.72,0.665394,0.72,0.19842,"[166, 74, 10]","[210, 38, 2]"
2,3,0.683258,0.688705,0.687783,0.214195,"[136, 73, 12]","[185, 30, 6]"
3,4,0.641791,0.581692,0.646766,0.197986,"[138, 52, 11]","[169, 29, 3]"
4,5,0.589189,0.60017,0.616216,0.218413,"[107, 68, 10]","[150, 33, 2]"
5,6,0.691542,0.7405,0.696517,0.201728,"[122, 66, 13]","[165, 35, 1]"
6,7,0.687179,0.672728,0.702564,0.222971,"[128, 62, 5]","[164, 27, 4]"
7,8,0.639423,0.62524,0.644231,0.219875,"[133, 63, 12]","[171, 31, 6]"
8,9,0.630682,0.650912,0.653409,0.21574,"[107, 59, 10]","[144, 30, 2]"
9,10,0.617021,0.615713,0.62234,0.210477,"[104, 76, 8]","[159, 28, 1]"


In [10]:
# Drop game context
X_no_game = X.drop(['down', 'yards_to_go', 'seconds_left', 'defensive_point_diff'], axis = 1)
model_no_game_results = train_with_cv(X_no_game, y, default_params, True, 100)
model_no_game_results

Training with Week 1 excluded
Training with Week 2 excluded
Training with Week 3 excluded
Training with Week 4 excluded
Training with Week 5 excluded
Training with Week 6 excluded
Training with Week 7 excluded
Training with Week 8 excluded
Training with Week 9 excluded
Training with Week 10 excluded
Training with Week 11 excluded
Training with Week 12 excluded
Training with Week 13 excluded
Training with Week 14 excluded
Training with Week 15 excluded
Training with Week 16 excluded
Training with Week 17 excluded


Unnamed: 0,Week,accuracy,ROC,incompletion_accuracy,incompletion_RMSE,val_results,pred_results
0,1,0.614108,0.598517,0.618257,0.26935,"[154, 73, 14]","[200, 36, 5]"
1,2,0.724,0.69077,0.732,0.257388,"[166, 74, 10]","[209, 39, 2]"
2,3,0.683258,0.682316,0.696833,0.283728,"[136, 73, 12]","[167, 48, 6]"
3,4,0.656716,0.588433,0.676617,0.265539,"[138, 52, 11]","[169, 29, 3]"
4,5,0.583784,0.58584,0.605405,0.271496,"[107, 68, 10]","[154, 28, 3]"
5,6,0.676617,0.697308,0.681592,0.27178,"[122, 66, 13]","[164, 35, 2]"
6,7,0.692308,0.655733,0.707692,0.288617,"[128, 62, 5]","[149, 42, 4]"
7,8,0.625,0.651811,0.634615,0.288757,"[133, 63, 12]","[169, 32, 7]"
8,9,0.642045,0.653907,0.653409,0.293484,"[107, 59, 10]","[140, 33, 3]"
9,10,0.585106,0.580405,0.585106,0.269214,"[104, 76, 8]","[160, 26, 2]"


In [11]:
compare_models(regular_model_results, model_no_game_results, 'Regular Model', 'No Game Context')
# regular model accuracy slightly better

Accuracy
0.65725
0.65449
ROC
0.64485
0.63446
Incompletion Accuracy
0.66665
0.6641
Incompletion RMSE
0.20907
0.27271
Actual Results: 2201 completions, 1073 incompletions, 162 deflections
Regular Model: 2814 completions, 571 incompletions, 51 deflections
No Game Context Model: 2789 completions, 590 incompletions, 57 deflections


# Regular Model better, trying last second vs regular model

In [12]:
last_ten_frames = pd.read_csv('Data/processed/last_ten_frames_of_input.csv')
X_last_second = data.merge(last_ten_frames, on = ['game_id', 'play_id', 'player_name'], how = 'inner')
X_last_second = X_last_second.drop(['game_id', 'play_id', 'player_name', 'disruption', 'pd', 'int', 'quarter', 'game_clock',
               'pass_result', 'outcome', 'old_game_id', 'cp', 'player_role'], axis = 1)

model_last_second_results = train_with_cv(X_last_second, y, default_params, True, 100)
model_last_second_results

Training with Week 1 excluded
Training with Week 2 excluded
Training with Week 3 excluded
Training with Week 4 excluded
Training with Week 5 excluded
Training with Week 6 excluded
Training with Week 7 excluded
Training with Week 8 excluded
Training with Week 9 excluded
Training with Week 10 excluded
Training with Week 11 excluded
Training with Week 12 excluded
Training with Week 13 excluded
Training with Week 14 excluded
Training with Week 15 excluded
Training with Week 16 excluded
Training with Week 17 excluded


Unnamed: 0,Week,accuracy,ROC,incompletion_accuracy,incompletion_RMSE,val_results,pred_results
0,1,0.647303,0.605263,0.651452,0.274848,"[154, 73, 14]","[206, 31, 4]"
1,2,0.72,0.695816,0.728,0.255111,"[166, 74, 10]","[212, 37, 1]"
2,3,0.647059,0.680064,0.656109,0.291327,"[136, 73, 12]","[176, 39, 6]"
3,4,0.651741,0.627026,0.651741,0.271682,"[138, 52, 11]","[166, 31, 4]"
4,5,0.589189,0.582536,0.610811,0.275652,"[107, 68, 10]","[159, 24, 2]"
5,6,0.671642,0.756394,0.686567,0.253215,"[122, 66, 13]","[167, 32, 2]"
6,7,0.687179,0.653529,0.692308,0.261969,"[128, 62, 5]","[160, 34, 1]"
7,8,0.634615,0.614811,0.644231,0.284831,"[133, 63, 12]","[165, 37, 6]"
8,9,0.642045,0.688408,0.647727,0.273849,"[107, 59, 10]","[143, 32, 1]"
9,10,0.617021,0.657481,0.62766,0.270376,"[104, 76, 8]","[152, 35, 1]"


In [13]:
compare_models(regular_model_results, model_last_second_results, 'Regular Model', 'Last Second')
# regular model accuracy slightly better

Accuracy
0.65725
0.65346
ROC
0.64485
0.64708
Incompletion Accuracy
0.66665
0.66182
Incompletion RMSE
0.20907
0.27489
Actual Results: 2201 completions, 1073 incompletions, 162 deflections
Regular Model: 2814 completions, 571 incompletions, 51 deflections
Last Second Model: 2814 completions, 577 incompletions, 45 deflections


# Regular model best, grid search

In [14]:
models_trained = 0
num_round = 100
subsample  = 1
xg_gamma = 0
xg_alpha = 0
xg_lambda = 1
seed = 11052025

params = {'objective': 'multi:softprob',
          'num_class' : 3,
          'seed' : 11052025,
          'subsample' : 1,
          'gamma' : 0,
          'alpha' : 0,
          'lambda' : 1,
         }
num_boost_round = 30
grid_search_model_results = {}

for eta in [0.01, 0.05, 0.1, 0.2, 0.3]:
    for max_depth in [4, 5, 6, 7]:
        for min_child_weight in [170/(2301+1133+170), (1133 + 170)/(2301+1133+170), 1]:
            for colsample_by_tree in [0.3, 0.4, 0.5]: 
                params['eta'] = eta
                params['max_depth'] = max_depth
                params['min_child_weight'] = min_child_weight
                params['colsample_bytree'] = colsample_by_tree
                grid_search_id = f'{eta} {max_depth} {min_child_weight} {colsample_by_tree}'
                model_results = train_with_cv(X, y, params, False, num_boost_round)
                grid_search_model_results[grid_search_id] = {}
                grid_search_model_results[grid_search_id]['results'] = model_results
                grid_search_model_results[grid_search_id]['accuracy'] = np.round(np.mean(model_results['accuracy']), 5)

                models_trained+=1
                if models_trained % 10 == 0: print(f'{models_trained}/{5*4*3*3} trained')

10/180 trained
20/180 trained
30/180 trained
40/180 trained
50/180 trained
60/180 trained
70/180 trained
80/180 trained
90/180 trained
100/180 trained
110/180 trained
120/180 trained
130/180 trained
140/180 trained
150/180 trained
160/180 trained
170/180 trained
180/180 trained


In [22]:
with open('models/gridsearch_results.pickle', 'wb') as handle:
    pickle.dump(grid_search_model_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [23]:
with open('models/gridsearch_results.pickle', 'rb') as handle:
    grid_search_model_results = pickle.load(handle)

In [15]:
model_ids = []
model_accuracies = []
model_rocs = []
model_prediction_outcomes = []
model_incomplete_accuracy = []
model_incomplete_rmse = []
for model_id in grid_search_model_results.keys():
    model_ids.append(model_id)
    model_results = grid_search_model_results[model_id]['results']
    model_accuracies.append(np.round(np.mean(model_results['accuracy']), 5))
    model_rocs.append(np.round(np.mean(model_results['ROC']), 5))
    model_incomplete_accuracy.append(np.round(np.mean(model_results['incompletion_accuracy']), 5))
    model_incomplete_rmse.append(np.round(np.mean(model_results['incompletion_RMSE']), 5))
    completions = np.sum([x[0] for x in model_results['pred_results']])
    incompletions = np.sum([x[1] for x in model_results['pred_results']])
    deflections = np.sum([x[2] for x in model_results['pred_results']])
    model_prediction_outcomes.append([completions, incompletions, deflections])
    
    
grid_search_model_accuracies = pd.DataFrame({'model_id' : model_ids, 'accuracy' : model_accuracies, 'roc' : model_rocs, 
                                             'incomplete_accuracy' : model_incomplete_accuracy,
                                             'incomplete_rmse' : model_incomplete_rmse,
                                             'pred_results' : model_prediction_outcomes})
grid_search_model_accuracies = grid_search_model_accuracies.sort_values(by='accuracy', ascending=False).reset_index(drop = True)
grid_search_model_accuracies['completions'] = [x[0] for x in grid_search_model_accuracies['pred_results']]
grid_search_model_accuracies['incompletions'] = [x[1] for x in grid_search_model_accuracies['pred_results']]
grid_search_model_accuracies['deflections'] = [x[2] for x in grid_search_model_accuracies['pred_results']]

In [40]:
grid_search_model_accuracies.sort_values(by='accuracy', ascending=False) # best model by accuracy is first

Unnamed: 0,model_id,accuracy,roc,incomplete_accuracy,incomplete_rmse,pred_results,completions,incompletions,deflections
0,0.2 4 1 0.5,0.66931,0.65084,0.67389,0.14582,"[3041, 362, 33]",3041,362,33
1,0.2 4 0.36154273029966705 0.5,0.66820,0.65176,0.67337,0.14759,"[3035, 368, 33]",3035,368,33
2,0.01 6 1 0.5,0.66739,0.64862,0.67075,0.23753,"[3125, 301, 10]",3125,301,10
3,0.1 5 0.04716981132075472 0.5,0.66716,0.64549,0.67043,0.12423,"[3094, 317, 25]",3094,317,25
4,0.2 5 0.36154273029966705 0.5,0.66678,0.65095,0.67568,0.15868,"[2979, 419, 38]",2979,419,38
...,...,...,...,...,...,...,...,...,...
175,0.3 5 1 0.3,0.64776,0.63356,0.65586,0.17222,"[2895, 503, 38]",2895,503,38
176,0.3 6 1 0.4,0.64713,0.63754,0.65517,0.19100,"[2851, 553, 32]",2851,553,32
177,0.3 7 0.04716981132075472 0.4,0.64594,0.63592,0.65150,0.20880,"[2906, 502, 28]",2906,502,28
178,0.3 6 1 0.3,0.64478,0.63092,0.65205,0.18592,"[2889, 518, 29]",2889,518,29


# Train model on best performer of grid search

In [43]:
eta = 0.2
max_depth = 4
min_child_weight = 1
colsample_bytree = 0.5

params ={'objective': 'multi:softprob',
          'num_class' : 3,
          'seed' : 11052025,
          'subsample' : 1,
          'gamma' : 0,
          'alpha' : 0,
          'lambda' : 1,
          'eta' : eta,
          'max_depth' : max_depth,
          'min_child_weight' : min_child_weight,
          'colsample_bytree' : colsample_bytree
         }
X_train = X.drop(['week'], axis = 1)
y_train = y.drop(['week'], axis = 1)
Xy_train = xgb.DMatrix(X_train, y_train, enable_categorical = True)
booster = xgb.train(params, Xy_train, num_boost_round = 30)
booster.save_model("models/final_model_accuracy.json")

get_model_performance('models/final_model_accuracy.json', data, Xy_train, Xy_train, y_train, y_train, 0)

(0.7735849056603774,
 0.9071488477479336,
 0.7780244173140954,
 np.float64(0.1690531180819348),
 [np.int64(2301), np.int64(1133), np.int64(170)],
 [np.int64(3077), np.int64(442), np.int64(85)])

In [None]:
 validation_accuracy, validation_roc, validation_incompletion_accuracy, incompletion_rmse, validation_results, predicted_results