In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.datasets import make_classification
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot
import matplotlib.pyplot as plt
from sklearn.calibration import CalibrationDisplay
from comet_ml import Experiment
import os
import configparser
np.random.seed(42)
from sklearn.metrics import confusion_matrix
from matplotlib.gridspec import GridSpec

import warnings
warnings.filterwarnings('ignore')

In [2]:
import xgboost as xgb
from xgboost import XGBClassifier

In [3]:
config = configparser.ConfigParser()
config.read('../configfile.ini')
type_env = "comet_ml_prod" #comet_ml_prod
COMET_API_KEY = config[type_env]['api_key']
COMET_PROJECT_NAME = config[type_env]['project_name_advanced']
COMET_WORKSPACE = config[type_env]['workspace']

comet_exp_obj = Experiment(api_key=COMET_API_KEY,
                           project_name=COMET_PROJECT_NAME,
                           workspace=COMET_WORKSPACE,
                           log_code=True
                          )
comet_exp_obj.set_name(name="xgboost")
comet_exp_obj.log_notebook("11_advanced_models.ipynb")

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/data-science-workspace/advanced-models/8aa767f2703d4fcc8ca1376b6a22698b
COMET INFO:   Others:
COMET INFO:     Name : Baseline Models
COMET INFO:   Uploads:
COMET INFO:     conda-environment-definition : 1
COMET INFO:     conda-info                   : 1
COMET INFO:     conda-specification          : 1
COMET INFO:     environment details          : 1
COMET INFO:     filename                     : 1
COMET INFO:     git metadata                 : 1
COMET INFO:     git-patch (uncompressed)     : 1 (4.41 KB)
COMET INFO:     installed packages           : 1
COMET INFO:     notebook                     : 2 (2.16 KB)
COMET INFO:     source_code                  : 1
COMET INFO: ---------------------------
COMET INFO: Experiment is live on comet.ml https:/

{'web': 'https://www.comet.com/api/asset/download?assetId=b65aedc7b2314986a42ab2aa2b3c0afb&experimentKey=2d037d7425ea4275ac44b3da8a7ec91a',
 'api': 'https://www.comet.com/api/rest/v2/experiment/asset/get-asset?assetId=b65aedc7b2314986a42ab2aa2b3c0afb&experimentKey=2d037d7425ea4275ac44b3da8a7ec91a',
 'assetId': 'b65aedc7b2314986a42ab2aa2b3c0afb'}

In [4]:
comet_exp_obj.end()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/data-science-workspace/advanced-models/2d037d7425ea4275ac44b3da8a7ec91a
COMET INFO:   Others:
COMET INFO:     Name : xgboost
COMET INFO:   Uploads:
COMET INFO:     conda-environment-definition : 1
COMET INFO:     conda-info                   : 1
COMET INFO:     conda-specification          : 1
COMET INFO:     environment details          : 1
COMET INFO:     filename                     : 1
COMET INFO:     git metadata                 : 1
COMET INFO:     git-patch (uncompressed)     : 1 (4.41 KB)
COMET INFO:     installed packages           : 1
COMET INFO:     notebook                     : 2 (3.64 KB)
COMET INFO:     source_code                  : 1
COMET INFO: ---------------------------
COMET INFO: Uploading metrics, params, and assets to Comet

In [9]:
x_train = pd.read_pickle("../data/dataset/x_train.pkl").drop(columns='is_goal')
x_val = pd.read_pickle("../data/dataset/x_val.pkl").drop(columns='is_goal')
y_train = pd.read_pickle("../data/dataset/y_train.pkl")
y_val = pd.read_pickle("../data/dataset/y_val.pkl")

x = x_train.append(x_val)
y = y_train.append(y_val) 

In [10]:
x_train.shape,y_train.shape

((252580, 30), (252580,))

In [17]:
#dtrain = xgb.DMatrix(data=x,label=y)

### XGB with distance and angle as features

In [None]:
'''space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }
'''

In [5]:
params = {
            'objective':'binary:logistic',
            'max_depth': 4,
            'alpha': 10,
            'learning_rate': 1.0,
            'n_estimators':5
        }
model = XGBClassifier(**params)
model.fit(x_train[['angle','distance']].to_numpy(),y_train.to_numpy())


#xgb_cv = xgb.cv(dtrain=dtrain, params=params, nfold=5,stratified=True,
#                    num_boost_round=50, early_stopping_rounds=10, metrics="auc", as_pandas=True, seed=42)

In [6]:
y_pred = model.predict(x_val[['angle','distance']].to_numpy())
print(classification_report(y_val.to_numpy(),y_pred))

              precision    recall  f1-score   support

           0       0.91      1.00      0.95     57445
           1       0.20      0.00      0.00      5700

    accuracy                           0.91     63145
   macro avg       0.55      0.50      0.48     63145
weighted avg       0.85      0.91      0.87     63145



In [7]:
#xgb.plot_importance(xgb_cv)
#plt.figure(figsize = (16, 12))
#plt.show()

### XGB with all features

In [12]:
x.columns

Index(['game_id', 'distance', 'angle', 'empty_net', 'season', 'game_period',
       'distance_from_last_event', 'rebound', 'change_in_shot_angle', 'speed',
       'x_coordinate', 'y_coordinate', 'game_seconds', 'shot_type_Backhand',
       'shot_type_Deflected', 'shot_type_NA', 'shot_type_Slap Shot',
       'shot_type_Snap Shot', 'shot_type_Tip-In', 'shot_type_Wrap-around',
       'shot_type_Wrist Shot', 'last_event_type_BLOCKED_SHOT',
       'last_event_type_FACEOFF', 'last_event_type_GIVEAWAY',
       'last_event_type_GOAL', 'last_event_type_HIT',
       'last_event_type_MISSED_SHOT', 'last_event_type_PENALTY',
       'last_event_type_SHOT', 'last_event_type_TAKEAWAY'],
      dtype='object')

In [13]:
x

Unnamed: 0,game_id,distance,angle,empty_net,season,game_period,distance_from_last_event,rebound,change_in_shot_angle,speed,...,shot_type_Wrist Shot,last_event_type_BLOCKED_SHOT,last_event_type_FACEOFF,last_event_type_GIVEAWAY,last_event_type_GOAL,last_event_type_HIT,last_event_type_MISSED_SHOT,last_event_type_PENALTY,last_event_type_SHOT,last_event_type_TAKEAWAY
90378,2016020184,9.899495,-45.000000,0,2016,2,51.623638,False,0.0,1.985525,...,1,0,0,0,0,1,0,0,0,0
264101,2018020273,38.275318,-33.274888,0,2018,1,126.321020,False,0.0,2.870932,...,1,0,0,0,0,0,1,0,0,0
34219,2015020574,18.601075,53.746162,0,2015,1,144.668587,False,0.0,7.614136,...,0,0,0,0,0,0,1,0,0,0
114286,2016020577,49.040799,-2.337306,0,2016,1,40.000000,False,0.0,4.444444,...,0,0,0,0,0,0,1,0,0,0
139804,2016020993,33.241540,-43.781125,0,2016,3,45.177428,False,0.0,1.613480,...,1,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219150,2017020924,54.708317,30.784147,0,2017,2,61.000000,False,0.0,5.545455,...,0,0,0,1,0,0,0,0,0,0
302279,2018020879,68.352030,-20.556045,0,2018,3,94.021274,False,0.0,8.547389,...,1,0,1,0,0,0,0,0,0,0
70404,2015021173,45.541190,19.230672,0,2015,2,115.277925,False,0.0,9.606494,...,0,0,0,0,0,0,1,0,0,0
4041,2015020068,29.068884,-40.815084,0,2015,3,7.211103,False,0.0,3.605551,...,1,1,0,0,0,0,0,0,0,0
