In [113]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier

In [114]:
hustle = pd.read_csv('model_data.csv')
hustle['WINNER_cat'] = hustle['WINNER'].astype('category')

In [115]:
away = hustle.iloc[::2]

In [116]:
home = hustle[1::2]

In [117]:
X_hr = home[['DEFLECTIONS_RATIO', 'CONTEST_RATE', 'CONTEST_RATE_2', 'CONTEST_RATE_3', 'CHARGES_RATIO', 'SCREEN_AST_RATIO', 'SCREEN_AST_PTS_RATIO', 'LOOSE_RATIO', 'OFF_BOXOUT_RATIO', 'DEF_BOXOUT_RATIO', 'BOXOUT_TM_RBS_RATIO']].values
y_hr = home['WINNER'].values

X_hr_train, X_hr_test, y_hr_train, y_hr_test = train_test_split(X_hr, y_hr, stratify = y_hr, random_state = 858, test_size = .3)

In [118]:
X_ar = away[['DEFLECTIONS_RATIO', 'CONTEST_RATE', 'CONTEST_RATE_2', 'CONTEST_RATE_3', 'CHARGES_RATIO', 'SCREEN_AST_RATIO', 'SCREEN_AST_PTS_RATIO', 'LOOSE_RATIO', 'OFF_BOXOUT_RATIO', 'DEF_BOXOUT_RATIO', 'BOXOUT_TM_RBS_RATIO']].values
y_ar = away['WINNER'].values

X_ar_train, X_ar_test, y_ar_train, y_ar_test = train_test_split(X_ar, y_ar, stratify = y_ar, random_state = 858, test_size = .3)

In [119]:
X_hd = home[['DEFLECTIONS_DIFF', 'CONTEST_RATE', 'CONTEST_RATE_2', 'CONTEST_RATE_3', 'CHARGES_DIFF', 'SCREEN_AST_DIFF', 'SCREEN_AST_PTS_DIFF', 'LOOSE_DIFFERENCE', 'OFF_BOXOUT_DIFF', 'DEF_BOXOUT_DIFF', 'BOXOUT_TM_RBS_DIFF']].values
y_hd = home['WINNER'].values

X_hd_train, X_hd_test, y_hd_train, y_hd_test = train_test_split(X_hd, y_hd, stratify = y_hd, random_state = 858, test_size = .3)

In [120]:
X_ad = away[['DEFLECTIONS_DIFF', 'CONTEST_RATE', 'CONTEST_RATE_2', 'CONTEST_RATE_3', 'CHARGES_DIFF', 'SCREEN_AST_DIFF', 'SCREEN_AST_PTS_DIFF', 'LOOSE_DIFFERENCE', 'OFF_BOXOUT_DIFF', 'DEF_BOXOUT_DIFF', 'BOXOUT_TM_RBS_DIFF']].values
y_ad = away['WINNER'].values

X_ad_train, X_ad_test, y_ad_train, y_ad_test = train_test_split(X_ad, y_ad, stratify = y_ad, random_state = 858, test_size = .3)

In [121]:
param_grid = {'xgb__learning_rate': [.05, .1, .15],
              'xgb__max_depth': [2, 3, 4],
              'xgb__n_estimators': [19, 20, 21, 22, 23],
              'xgb__subsample': [.4, .5, .6],
              'xgb__use_label_encoder': [False],
              'xgb__eval_metric': ['logloss']}

modXGB = Pipeline([('xgb', XGBClassifier())])

gs = GridSearchCV(estimator = modXGB, param_grid = param_grid, cv = 5)
gs.fit(X_hr_train, y_hr_train)

In [87]:
# Fit XGBClassifier with tuned parameters
xgb = Pipeline([('xgb', XGBClassifier(
    learning_rate = gs.best_params_['xgb__learning_rate'],
    max_depth = gs.best_params_['xgb__max_depth'], 
    n_estimators = gs.best_params_['xgb__n_estimators'],
    subsample = gs.best_params_['xgb__subsample'],
    use_label_encoder = gs.best_params_['xgb__use_label_encoder'],
    eval_metric = gs.best_params_['xgb__eval_metric']))])

xgb.fit(X_hr_train, y_hr_train)

# Predictions and predicted probabilities for training set
xgb_hr_train = xgb.predict(X_hr_train)
xgb_hr_train_prob = xgb.predict_proba(X_hr_train)[:,1]

# Predictions and predicted probabilities for test set
xgb_hr_test = xgb.predict(X_hr_test)
xgb_hr_test_prob = xgb.predict_proba(X_hr_test)[:,1]

In [85]:
print(metrics.classification_report(y_hr_train, xgb_hr_train))

              precision    recall  f1-score   support

           0       0.78      0.59      0.67       696
           1       0.74      0.88      0.80       921

    accuracy                           0.75      1617
   macro avg       0.76      0.73      0.74      1617
weighted avg       0.76      0.75      0.74      1617



In [88]:
print(metrics.classification_report(y_hr_test, xgb_hr_test))

              precision    recall  f1-score   support

           0       0.53      0.41      0.46       298
           1       0.62      0.73      0.67       395

    accuracy                           0.59       693
   macro avg       0.57      0.57      0.56       693
weighted avg       0.58      0.59      0.58       693



In [None]:
param_grid = {'xgb__learning_rate': [.05, .1, .15],
              'xgb__max_depth': [2, 3, 4],
              'xgb__n_estimators': [19, 20, 21, 22, 23],
              'xgb__subsample': [.4, .5, .6],
              'xgb__use_label_encoder': [False],
              'xgb__eval_metric': ['logloss']}

modXGB = Pipeline([('xgb', XGBClassifier())])

gs = GridSearchCV(estimator = modXGB, param_grid = param_grid, cv = 5)
gs.fit(X_ar_train, y_ar_train)

In [89]:
# Fit XGBClassifier with tuned parameters
xgb = Pipeline([('xgb', XGBClassifier(
    learning_rate = gs.best_params_['xgb__learning_rate'],
    max_depth = gs.best_params_['xgb__max_depth'], 
    n_estimators = gs.best_params_['xgb__n_estimators'],
    subsample = gs.best_params_['xgb__subsample'],
    use_label_encoder = gs.best_params_['xgb__use_label_encoder'],
    eval_metric = gs.best_params_['xgb__eval_metric']))])

xgb.fit(X_ar_train, y_ar_train)

# Predictions and predicted probabilities for training set
xgb_ar_train = xgb.predict(X_ar_train)
xgb_ar_train_prob = xgb.predict_proba(X_ar_train)[:,1]

# Predictions and predicted probabilities for test set
xgb_ar_test = xgb.predict(X_ar_test)
xgb_ar_test_prob = xgb.predict_proba(X_ar_test)[:,1]

In [90]:
print(metrics.classification_report(y_ar_train, xgb_ar_train))

              precision    recall  f1-score   support

           0       0.74      0.86      0.80       921
           1       0.77      0.61      0.68       696

    accuracy                           0.75      1617
   macro avg       0.76      0.73      0.74      1617
weighted avg       0.75      0.75      0.75      1617



In [91]:
print(metrics.classification_report(y_ar_test, xgb_ar_test))

              precision    recall  f1-score   support

           0       0.64      0.76      0.69       395
           1       0.58      0.44      0.50       298

    accuracy                           0.62       693
   macro avg       0.61      0.60      0.60       693
weighted avg       0.61      0.62      0.61       693



In [None]:
param_grid = {'xgb__learning_rate': [.05, .1, .15],
              'xgb__max_depth': [2, 3, 4],
              'xgb__n_estimators': [19, 20, 21, 22, 23],
              'xgb__subsample': [.4, .5, .6],
              'xgb__use_label_encoder': [False],
              'xgb__eval_metric': ['logloss']}

modXGB = Pipeline([('xgb', XGBClassifier())])

gs = GridSearchCV(estimator = modXGB, param_grid = param_grid, cv = 5)
gs.fit(X_hd_train, y_hd_train)

In [92]:
# Fit XGBClassifier with tuned parameters
xgb = Pipeline([('xgb', XGBClassifier(
    learning_rate = gs.best_params_['xgb__learning_rate'],
    max_depth = gs.best_params_['xgb__max_depth'], 
    n_estimators = gs.best_params_['xgb__n_estimators'],
    subsample = gs.best_params_['xgb__subsample'],
    use_label_encoder = gs.best_params_['xgb__use_label_encoder'],
    eval_metric = gs.best_params_['xgb__eval_metric']))])

xgb.fit(X_hd_train, y_hd_train)

# Predictions and predicted probabilities for training set
xgb_hd_train = xgb.predict(X_hd_train)
xgb_hd_train_prob = xgb.predict_proba(X_hd_train)[:,1]

# Predictions and predicted probabilities for test set
xgb_hd_test = xgb.predict(X_hd_test)
xgb_hd_test_prob = xgb.predict_proba(X_hd_test)[:,1]

In [95]:
print(metrics.classification_report(y_hd_train, xgb_hd_train))

              precision    recall  f1-score   support

           0       0.75      0.57      0.65       696
           1       0.72      0.86      0.79       921

    accuracy                           0.73      1617
   macro avg       0.74      0.71      0.72      1617
weighted avg       0.74      0.73      0.73      1617



In [94]:
print(metrics.classification_report(y_hd_test, xgb_hd_test))

              precision    recall  f1-score   support

           0       0.51      0.39      0.44       298
           1       0.61      0.72      0.66       395

    accuracy                           0.58       693
   macro avg       0.56      0.55      0.55       693
weighted avg       0.57      0.58      0.56       693



In [None]:
param_grid = {'xgb__learning_rate': [.05, .1, .15],
              'xgb__max_depth': [2, 3, 4],
              'xgb__n_estimators': [19, 20, 21, 22, 23],
              'xgb__subsample': [.4, .5, .6],
              'xgb__use_label_encoder': [False],
              'xgb__eval_metric': ['logloss']}

modXGB = Pipeline([('xgb', XGBClassifier())])

gs = GridSearchCV(estimator = modXGB, param_grid = param_grid, cv = 5)
gs.fit(X_ad_train, y_ad_train)

In [96]:
# Fit XGBClassifier with tuned parameters
xgb = Pipeline([('xgb', XGBClassifier(
    learning_rate = gs.best_params_['xgb__learning_rate'],
    max_depth = gs.best_params_['xgb__max_depth'], 
    n_estimators = gs.best_params_['xgb__n_estimators'],
    subsample = gs.best_params_['xgb__subsample'],
    use_label_encoder = gs.best_params_['xgb__use_label_encoder'],
    eval_metric = gs.best_params_['xgb__eval_metric']))])

xgb.fit(X_hd_train, y_hd_train)

# Predictions and predicted probabilities for training set
xgb_ad_train = xgb.predict(X_ad_train)
xgb_ad_train_prob = xgb.predict_proba(X_ad_train)[:,1]

# Predictions and predicted probabilities for test set
xgb_ad_test = xgb.predict(X_ad_test)
xgb_ad_test_prob = xgb.predict_proba(X_ad_test)[:,1]

In [98]:
print(metrics.classification_report(y_ad_train, xgb_ad_train))

              precision    recall  f1-score   support

           0       0.71      0.41      0.52       921
           1       0.50      0.78      0.61       696

    accuracy                           0.57      1617
   macro avg       0.60      0.59      0.56      1617
weighted avg       0.62      0.57      0.55      1617



In [99]:
print(metrics.classification_report(y_ad_test, xgb_ad_test))

              precision    recall  f1-score   support

           0       0.69      0.38      0.49       395
           1       0.49      0.78      0.60       298

    accuracy                           0.55       693
   macro avg       0.59      0.58      0.54       693
weighted avg       0.60      0.55      0.54       693



In [100]:
gb = GradientBoostingClassifier().fit(X_hr_train, y_hr_train)

# Predictions and predicted probabilities for training set
gb_hr_train = gb.predict(X_hr_train)
gb_hr_train_prob = gb.predict_proba(X_hr_train)[:,1]

# Predictions and predicted probabilities for test set
gb_hr_test = gb.predict(X_hr_test)
gb_hr_test_prob = gb.predict_proba(X_hr_test)[:,1]

In [101]:
print(metrics.classification_report(y_hr_train, gb_hr_train))

              precision    recall  f1-score   support

           0       0.82      0.65      0.72       696
           1       0.77      0.90      0.83       921

    accuracy                           0.79      1617
   macro avg       0.80      0.77      0.78      1617
weighted avg       0.79      0.79      0.78      1617



In [102]:
print(metrics.classification_report(y_hr_test, gb_hr_test))

              precision    recall  f1-score   support

           0       0.53      0.40      0.46       298
           1       0.62      0.73      0.67       395

    accuracy                           0.59       693
   macro avg       0.58      0.57      0.57       693
weighted avg       0.58      0.59      0.58       693



In [103]:
gb = GradientBoostingClassifier().fit(X_ar_train, y_ar_train)

# Predictions and predicted probabilities for training set
gb_ar_train = gb.predict(X_ar_train)
gb_ar_train_prob = gb.predict_proba(X_ar_train)[:,1]

# Predictions and predicted probabilities for test set
gb_ar_test = gb.predict(X_ar_test)
gb_ar_test_prob = gb.predict_proba(X_ar_test)[:,1]

In [104]:
print(metrics.classification_report(y_ar_train, gb_ar_train))

              precision    recall  f1-score   support

           0       0.78      0.92      0.84       921
           1       0.86      0.66      0.74       696

    accuracy                           0.80      1617
   macro avg       0.82      0.79      0.79      1617
weighted avg       0.81      0.80      0.80      1617



In [105]:
print(metrics.classification_report(y_ar_test, gb_ar_test))

              precision    recall  f1-score   support

           0       0.63      0.77      0.69       395
           1       0.57      0.41      0.48       298

    accuracy                           0.61       693
   macro avg       0.60      0.59      0.58       693
weighted avg       0.61      0.61      0.60       693



In [106]:
gb = GradientBoostingClassifier().fit(X_hd_train, y_hd_train)

# Predictions and predicted probabilities for training set
gb_hd_train = gb.predict(X_hd_train)
gb_hd_train_prob = gb.predict_proba(X_hd_train)[:,1]

# Predictions and predicted probabilities for test set
gb_hd_test = gb.predict(X_hd_test)
gb_hd_test_prob = gb.predict_proba(X_hd_test)[:,1]

In [107]:
print(metrics.classification_report(y_hd_train, gb_hd_train))

              precision    recall  f1-score   support

           0       0.80      0.61      0.69       696
           1       0.75      0.89      0.81       921

    accuracy                           0.77      1617
   macro avg       0.78      0.75      0.75      1617
weighted avg       0.77      0.77      0.76      1617



In [108]:
print(metrics.classification_report(y_hd_test, gb_hd_test))

              precision    recall  f1-score   support

           0       0.54      0.38      0.44       298
           1       0.62      0.76      0.68       395

    accuracy                           0.59       693
   macro avg       0.58      0.57      0.56       693
weighted avg       0.58      0.59      0.58       693



In [109]:
gb = GradientBoostingClassifier().fit(X_ad_train, y_ad_train)

# Predictions and predicted probabilities for training set
gb_ad_train = gb.predict(X_ad_train)
gb_ad_train_prob = gb.predict_proba(X_ad_train)[:,1]

# Predictions and predicted probabilities for test set
gb_ad_test = gb.predict(X_ad_test)
gb_ad_test_prob = gb.predict_proba(X_ad_test)[:,1]

In [110]:
print(metrics.classification_report(y_ad_train, gb_ad_train))

              precision    recall  f1-score   support

           0       0.75      0.90      0.82       921
           1       0.82      0.59      0.69       696

    accuracy                           0.77      1617
   macro avg       0.78      0.75      0.75      1617
weighted avg       0.78      0.77      0.76      1617



In [112]:
print(metrics.classification_report(y_ad_test, gb_ad_test))

              precision    recall  f1-score   support

           0       0.61      0.76      0.68       395
           1       0.53      0.36      0.43       298

    accuracy                           0.59       693
   macro avg       0.57      0.56      0.55       693
weighted avg       0.58      0.59      0.57       693

