## **SHAP explanation for XGBoost**

### **Regular matches**

In [0]:
pip install shap

In [0]:
import os
import numpy as np
import pandas as pd
from xgboost import XGBClassifier

from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score, auc

import statistics as st

import warnings
warnings.filterwarnings('ignore')
from datetime import datetime

import matplotlib.pylab as pl
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import shap

shap.initjs()

np.random.seed(3)

In [0]:
# load JS visualization code to notebook
shap.initjs()

In [0]:
def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))

In [0]:
feature_regular_df = pd.read_csv("/content/dota2_regular-new_features.csv")

In [0]:
# Drop first ccolumn (match id)
feature_regular_df = feature_regular_df.drop(['match_id'], axis=1)

In [0]:
feature_regular_df.head()

In [0]:
feature_regular_df['win_label'].value_counts()

In [0]:
len(feature_regular_df.columns)

**Model building, training, and evaluation**

In [0]:
features = [c for c in feature_regular_df.columns if c != 'win_label']
target = 'win_label'
X, y = feature_regular_df.iloc[:,:-1],feature_regular_df.iloc[:,-1]

### Grid search to tune hyperparameters

In [0]:
# A parameter grid for XGBoost
params_search = {
        'learning_rate': [0.01, 0.05, 0.1, 0.2],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.3, 0.5, 0.7],
        'max_depth': [4, 6, 8, 10],
        'n_estimators': [10, 50, 100]
        }

In [0]:
xgb = XGBClassifier(objective='binary:logistic', silent=False, nthread=4)

In [0]:
train, test, train_labels, test_labels = train_test_split(X, y, 
                                                          stratify = y,
                                                          test_size = 0.2)

In [0]:
folds = 5
param_comb = 100

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)

random_search = RandomizedSearchCV(xgb, param_distributions=params_search, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(train,train_labels), verbose=True)

# Here we go
start_time = timer(None) # timing starts from this point for "start_time" variable
random_search.fit(train, train_labels)
timer(start_time) # timing ends here for "start_time" variable

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed: 15.2min
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed: 70.5min


In [0]:
print('\n Best hyperparameters:')
print(random_search.best_params_)


 Best hyperparameters:
{'subsample': 1.0, 'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.05, 'gamma': 1, 'colsample_bytree': 0.3}


**Best hyperparameters regular:** {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.1, 'gamma': 0.5, 'colsample_bytree': 0.5}

### Use best found hyperparameters

In [0]:
kfolds = KFold(n_splits=10, shuffle=True)

In [0]:
# param = {
#     'objective': 'binary:logistic',
#     'eval_metric': 'auc',
#     'colsample_bytree': 0.3,
#     'learning_rate': 0.1,
#      'max_depth': 10,
#      'alpha': 10
# }

# num_round = 100
param = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'subsample': 0.8,
    'colsample_bytree': 0.5,
    'learning_rate': 0.1,
    'max_depth': 4,
    'gamma': 0.5
}

num_round = 100 # n_estimators

In [17]:
import xgboost as xgb
auc = list()
for train_idx, test_idx in kfolds.split(X):
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
    
    param['scale_pos_weight'] = (y_train.size - y_train.sum()) / y_train.sum()    
    
    xg_train = xgb.DMatrix(
        X_train.values, feature_names=features, label=y_train.values
    )
    xg_test = xgb.DMatrix(
        X_test.values, feature_names=features, label=y_test.values
    )
    
    watchlist = [(xg_train, 'train'), (xg_test, 'test')]
    bst = xgb.train(param, xg_train, num_round, watchlist, verbose_eval=False)
    preds = bst.predict(xg_test) 

    auc.append(roc_auc_score(y_test, preds))

'Median AUC: {:.04f}'.format(st.median(auc))


'Median AUC: 0.6500'

**SHAP values**

In [0]:
  # Use only the last x_test as there is little variance between the folds
  explainer = shap.TreeExplainer(bst)
  shap_values = explainer.shap_values(X_test)

  # shap.summary_plot(shap_values, X_test)  
  
  # Variable importance-like plot.
  shap.summary_plot(shap_values, X_test, plot_type="bar")

In [0]:
# Using np.abs(shap_values).mean(0) will summarize the global importance of a feature as the mean of the absolute change in the prediction
# caused by that feature over the entire dataset. 

np.abs(shap_values).mean(0)

In [0]:
X_test.columns

In [0]:
shap_values.shape

In [0]:
shap_values_backup = shap_values

In [33]:
# new_columns = ['role_rad', 'role_dire'] + list(X_test.columns[18:])
# len(new_columns)

# new_columns = ['roles_r', 'roles_d',
#                 'base_str_r', 'base_agi_r', 'base_int_r', 'str_gain_r', 'agi_gain_r', 'int_gain_r', 'base_health_reg_r', 'move_speed_r',
#                 'base_str_d', 'base_agi_d', 'base_int_d', 'str_gain_d', 'agi_gain_d', 'int_gain_d', 'base_health_reg_d', 'move_speed_d',
#                 'winR_hero_r', 'winR_hero_d', 'winR_plr_r', 'winR_plr_d',
#                 'winR_hp_r', 'xpm_hp_r', 'goldm_hp_r', 'killm_hp_r', 'assistm_hp_r', 'damagem_hp_r', 'healm_hp_r',
#                 'winR_hp_d', 'xpm_hp_d', 'goldm_hp_d', 'killm_hp_d', 'assistm_hp_d', 'damagem_hp_d', 'healm_hp_d', 'first_pick_r']

new_columns = ['heroes_r', 'heroes_d',
               'role_carry_r', 'role_support_r', 'role_nuker_r', 'role_disabler_r', 'role_jungler_r',
                'role_durable_r', 'role_escape_r', 'role_pusher_r', 'role_initiator_r',
                'role_carry_d', 'role_support_d', 'role_nuker_d', 'role_disabler_d', 'role_jungler_d',
                'role_durable_d', 'role_escape_d', 'role_pusher_d', 'role_initiator_d',
                'strength_r', 'agility_r', 'intellig_r', 'strength_gain_r', 'agility_gain_r', 'intellig_gain_r', 'health_r', 'health_regeneration_r', 'move_speed_r',
                'strength_d', 'agility_d', 'intellig_d', 'strength_gain_d', 'agility_gain_d', 'intellig_gain_d', 'health_d', 'health_regeneration_d', 'move_speed_d',
                'hero_winrate_r', 'hero_winrate_d', 'player_winrate_r', 'player_winrate_d',
                'hp_winrate_r', 'hp_xp_min_r', 'hp_gold_min_r', 'hp_death_min_r', 'hp_taken_damage_min_r', 'hp_kill_min_r', 'hp_assist_min_r', 'hp_caused_damage_min_r', 'hp_heal_min_r',
                'hp_winrate_d', 'hp_xp_min_d', 'hp_gold_min_d', 'hp_death_min_d', 'hp_taken_damage_min_d', 'hp_kill_min_d', 'hp_assist_min_d', 'hp_caused_damage_min_d', 'hp_heal_min_d',
                'first_pick']

len(new_columns)

61

In [0]:
# Create new shap matrix
shap_values_new = pd.DataFrame(columns=new_columns)
print(shap_values_new.shape)

for row in range(len(shap_values)):
  print(row)

  # hero binary array
  sum_hero_rad = 0
  for col in range(119):
    sum_hero_rad = sum_hero_rad + shap_values[row, col]
  sum_hero_dire = 0
  for col in range(119,238):
    sum_hero_dire = sum_hero_dire + shap_values[row, col]

  # first_role_col = 238
  # role_carry_r = shap_values[row, first_role_col]            
  # role_support_r = shap_values[row, first_role_col + 1] 
  # role_nuker_r = shap_values[row, first_role_col + 2] 
  # role_disabler_r = shap_values[row, first_role_col+3] 
  # role_jungler_r = shap_values[row, first_role_col+4] 
  # role_durable_r = shap_values[row, first_role_col+5] 
  # role_escape_r = shap_values[row, first_role_col+6] 
  # role_pusher_r = shap_values[row, first_role_col+7] 
  # role_initiator_r = shap_values[row, first_role_col+8] 
  
  # first_role_col = 247
  # role_carry_d = shap_values[row, first_role_col]            
  # role_support_d = shap_values[row, first_role_col + 1] 
  # role_nuker_d = shap_values[row, first_role_col + 2] 
  # role_disabler_d = shap_values[row, first_role_col+3] 
  # role_jungler_d = shap_values[row, first_role_col+4] 
  # role_durable_d = shap_values[row, first_role_col+5] 
  # role_escape_d = shap_values[row, first_role_col+6] 
  # role_pusher_d = shap_values[row, first_role_col+7] 
  # role_initiator_d = shap_values[row, first_role_col+8] # col = 255 
  
  
  # # hero role rad and dire
  # sum_role_rad = 0
  # for col in range(238,247):
  #   sum_role_rad = sum_role_rad + shap_values[row, col]
  # sum_role_dire = 0
  # for col in range(247,256):
  #   sum_role_dire = sum_role_dire + shap_values[row, col]

  # base and gain stats rad and dire
  index_base_str_rad = [18+238, 27+238, 36+238, 45+238, 54+238]
  index_base_agi_rad = [x + 1 for x in index_base_str_rad] 
  index_base_int_rad = [x + 1 for x in index_base_agi_rad] 
  index_str_gain_rad = [x + 1 for x in index_base_int_rad] 
  index_agi_gain_rad = [x + 1 for x in index_str_gain_rad] 
  index_int_gain_rad = [x + 1 for x in index_agi_gain_rad]
  index_base_health_rad = [x + 1 for x in index_int_gain_rad]
  index_base_health_reg_rad = [x + 1 for x in index_base_health_rad]
  index_move_speed_rad = [x + 1 for x in index_base_health_reg_rad]

  sum_base_str_rad = 0
  for col in index_base_str_rad:
    sum_base_str_rad = sum_base_str_rad + shap_values[row, col] 
  
  sum_base_agi_rad = 0
  for col in index_base_agi_rad:
    sum_base_agi_rad = sum_base_agi_rad + shap_values[row, col]
  
  sum_base_int_rad = 0
  for col in index_base_int_rad:
    sum_base_int_rad = sum_base_int_rad + shap_values[row, col]
  
  sum_str_gain_rad = 0
  for col in index_str_gain_rad:
    sum_str_gain_rad = sum_str_gain_rad + shap_values[row, col]
  
  sum_agi_gain_rad = 0
  for col in index_agi_gain_rad:
    sum_agi_gain_rad = sum_agi_gain_rad + shap_values[row, col]
  
  sum_int_gain_rad = 0
  for col in index_int_gain_rad:
    sum_int_gain_rad = sum_int_gain_rad + shap_values[row, col]
  
  sum_base_health_rad = 0
  for col in index_base_health_rad:
    sum_base_health_rad = sum_base_health_rad + shap_values[row, col]
  
  sum_base_health_reg_rad = 0
  for col in index_base_health_reg_rad:
    sum_base_health_reg_rad = sum_base_health_reg_rad + shap_values[row, col]
  
  sum_move_speed_rad = 0
  for col in index_move_speed_rad:
    sum_move_speed_rad = sum_move_speed_rad + shap_values[row, col]  
  

  index_base_str_dire = [63+238, 72+238, 81+238, 90+238, 99+238]
  index_base_agi_dire = [x + 1 for x in index_base_str_dire] 
  index_base_int_dire = [x + 1 for x in index_base_agi_dire] 
  index_str_gain_dire = [x + 1 for x in index_base_int_dire] 
  index_agi_gain_dire = [x + 1 for x in index_str_gain_dire] 
  index_int_gain_dire = [x + 1 for x in index_agi_gain_dire]
  index_base_health_dire = [x + 1 for x in index_int_gain_dire]
  index_base_health_reg_dire = [x + 1 for x in index_base_health_dire]
  index_move_speed_dire = [x + 1 for x in index_base_health_reg_dire]

  sum_base_str_dire = 0
  for col in index_base_str_dire:
    sum_base_str_dire = sum_base_str_dire + shap_values[row, col] 
  
  sum_base_agi_dire = 0
  for col in index_base_agi_dire:
    sum_base_agi_dire = sum_base_agi_dire + shap_values[row, col]
  
  sum_base_int_dire = 0
  for col in index_base_int_dire:
    sum_base_int_dire = sum_base_int_dire + shap_values[row, col]
  
  sum_str_gain_dire = 0
  for col in index_str_gain_dire:
    sum_str_gain_dire = sum_str_gain_dire + shap_values[row, col]
  
  sum_agi_gain_dire = 0
  for col in index_agi_gain_dire:
    sum_agi_gain_dire = sum_agi_gain_dire + shap_values[row, col]
  
  sum_int_gain_dire = 0
  for col in index_int_gain_dire:
    sum_int_gain_dire = sum_int_gain_dire + shap_values[row, col]
  
  sum_base_health_dire = 0
  for col in index_base_health_dire:
    sum_base_health_dire = sum_base_health_dire + shap_values[row, col]
  
  sum_base_health_reg_dire = 0
  for col in index_base_health_reg_dire:
    sum_base_health_reg_dire = sum_base_health_reg_dire + shap_values[row, col]
  
  sum_move_speed_dire = 0
  for col in index_move_speed_dire:
    sum_move_speed_dire = sum_move_speed_dire + shap_values[row, col]



  index_winrate_rad = list(range(108+238,113+238))
  sum_winrate_rad = 0
  for col in index_winrate_rad:
    sum_winrate_rad = sum_winrate_rad + shap_values[row, col]

  index_winrate_dire = list(range(113+238,118+238))
  sum_winrate_dire = 0
  for col in index_winrate_dire:
    sum_winrate_dire = sum_winrate_dire + shap_values[row, col]

  index_winrate_player_rad = list(range(118+238,123+238))
  winrate_player_rad = 0
  for col in index_winrate_player_rad:
    winrate_player_rad = winrate_player_rad + shap_values[row, col]

  index_winrate_player_dire = list(range(123+238,128+238))
  winrate_player_dire = 0
  for col in index_winrate_player_dire:
    winrate_player_dire = winrate_player_dire + shap_values[row, col]

  # hero player stats
  index_winrate_hp_rad = list(range(128+238,133+238))
  winrate_hp_rad = 0
  for col in index_winrate_hp_rad:
    winrate_hp_rad = winrate_hp_rad + shap_values[row, col]
  
  index_xpm_hp_rad = list(range(133+238,138+238))
  xpm_hp_rad = 0
  for col in index_xpm_hp_rad:
    xpm_hp_rad = xpm_hp_rad + shap_values[row, col]

  index_goldm_hp_rad = list(range(138+238,143+238))
  goldm_hp_rad = 0
  for col in index_goldm_hp_rad:
    goldm_hp_rad = goldm_hp_rad + shap_values[row, col]

  index_deathsm_hp_rad = list(range(143+238,148+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + shap_values[row, col]

  index_damagem_hp_rad = list(range(148+238,153+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + shap_values[row, col]

  index_killm_hp_rad = list(range(153+238,158+238))
  killm_hp_rad = 0
  for col in index_killm_hp_rad:
    killm_hp_rad = killm_hp_rad + shap_values[row, col]

  index_assistm_hp_rad = list(range(158+238,163+238))
  assistm_hp_rad = 0
  for col in index_assistm_hp_rad:
    assistm_hp_rad = assistm_hp_rad + shap_values[row, col]

  index_hero_damagem_hp_rad = list(range(163+238,168+238))
  herodamagem_hp_rad = 0
  for col in index_hero_damagem_hp_rad:
    herodamagem_hp_rad = herodamagem_hp_rad + shap_values[row, col]

  index_healm_hp_rad = list(range(168+238,173+238))
  healm_hp_rad = 0
  for col in index_healm_hp_rad:
    healm_hp_rad = healm_hp_rad + shap_values[row, col]



  index_winrate_hp_dire = list(range(173+238,178+238))
  winrate_hp_dire = 0
  for col in index_winrate_hp_dire:
    winrate_hp_dire = winrate_hp_dire + shap_values[row, col]
  
  index_xpm_hp_dire = list(range(178+238,183+238))
  xpm_hp_dire = 0
  for col in index_xpm_hp_dire:
    xpm_hp_dire = xpm_hp_dire + shap_values[row, col]

  index_goldm_hp_dire = list(range(183+238,188+238))
  goldm_hp_dire = 0
  for col in index_goldm_hp_dire:
    goldm_hp_dire = goldm_hp_dire + shap_values[row, col]

  index_deathsm_hp_dire = list(range(188+238,193+238))
  deathsm_hp_dire = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_dire = deathsm_hp_dire + shap_values[row, col]

  index_damagem_hp_dire = list(range(193+238,198+238))
  damagem_hp_dire = 0
  for col in index_damagem_hp_rad:
    damagem_hp_dire = damagem_hp_dire + shap_values[row, col]

  index_killm_hp_dire = list(range(198+238,203+238))
  killm_hp_dire = 0
  for col in index_killm_hp_dire:
    killm_hp_dire = killm_hp_dire + shap_values[row, col]

  index_assistm_hp_dire = list(range(203+238,208+238))
  assistm_hp_dire = 0
  for col in index_assistm_hp_dire:
    assistm_hp_dire = assistm_hp_dire + shap_values[row, col]

  index_damagem_hp_dire = list(range(208+238,213+238))
  herodamagem_hp_dire = 0
  for col in index_damagem_hp_dire:
    herodamagem_hp_dire = herodamagem_hp_dire + shap_values[row, col]

  index_healm_hp_dire = list(range(213+238,218+238))
  healm_hp_dire = 0
  for col in index_healm_hp_dire:
    healm_hp_dire = healm_hp_dire + shap_values[row, col]


  # new_row = [sum_hero_rad] + [sum_hero_dire] + [sum_role_rad] + [sum_role_dire] + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]
  # new_row = list(shap_values[row, 0:18]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]
  
  new_row = [sum_hero_rad] + [sum_hero_dire] + list(shap_values[row, 238:256]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [deathsm_hp_rad] + [damagem_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [herodamagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [deathsm_hp_dire] + [damagem_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [herodamagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]


  # print(new_row)
  print(len(new_row))
  # shap_values_new.append(pd.Series(new_row, index=shap_values_new.columns[:len(new_row)]), ignore_index=True)
  shap_values_new.loc[row] = new_row


In [35]:
shap_values_new.shape

(3634, 61)

In [0]:
shap_values_new = shap_values_new.to_numpy()

In [37]:
X_test.shape

(3634, 457)

In [0]:
# Modify X_test accordingly

X_test_new = pd.DataFrame(columns=new_columns)
print(X_test_new.shape)
for row in range(len(X_test)):

  # hero binary array
  sum_hero_rad = 0
  for col in range(119):
    sum_hero_rad = sum_hero_rad + X_test.iloc[row, col]
  sum_hero_dire = 0
  for col in range(119,238):
    sum_hero_dire = sum_hero_dire + X_test.iloc[row, col]

  
  # # hero role rad and dire
  # sum_role_rad = 0
  # for col in range(9):
  #   sum_role_rad = sum_role_rad + X_test.iloc[row, col]
  # sum_role_dire = 0
  # for col in range(9,18):
  #   sum_role_dire = sum_role_dire + X_test.iloc[row, col]

  # base and gain stats rad and dire
  index_base_str_rad = [18+238, 27+238, 36+238, 45+238, 54+238]
  index_base_agi_rad = [x + 1 for x in index_base_str_rad] 
  index_base_int_rad = [x + 1 for x in index_base_agi_rad] 
  index_str_gain_rad = [x + 1 for x in index_base_int_rad] 
  index_agi_gain_rad = [x + 1 for x in index_str_gain_rad] 
  index_int_gain_rad = [x + 1 for x in index_agi_gain_rad]
  index_base_health_rad = [x + 1 for x in index_int_gain_rad]
  index_base_health_reg_rad = [x + 1 for x in index_base_health_rad]
  index_move_speed_rad = [x + 1 for x in index_base_health_reg_rad]

  sum_base_str_rad = 0
  for col in index_base_str_rad:
    sum_base_str_rad = sum_base_str_rad + X_test.iloc[row, col] 
  
  sum_base_agi_rad = 0
  for col in index_base_agi_rad:
    sum_base_agi_rad = sum_base_agi_rad + X_test.iloc[row, col]
  
  sum_base_int_rad = 0
  for col in index_base_int_rad:
    sum_base_int_rad = sum_base_int_rad + X_test.iloc[row, col]
  
  sum_str_gain_rad = 0
  for col in index_str_gain_rad:
    sum_str_gain_rad = sum_str_gain_rad + X_test.iloc[row, col]
  
  sum_agi_gain_rad = 0
  for col in index_agi_gain_rad:
    sum_agi_gain_rad = sum_agi_gain_rad + X_test.iloc[row, col]
  
  sum_int_gain_rad = 0
  for col in index_int_gain_rad:
    sum_int_gain_rad = sum_int_gain_rad + X_test.iloc[row, col]
  
  sum_base_health_rad = 0
  for col in index_base_health_rad:
    sum_base_health_rad = sum_base_health_rad + X_test.iloc[row, col]
  
  sum_base_health_reg_rad = 0
  for col in index_base_health_reg_rad:
    sum_base_health_reg_rad = sum_base_health_reg_rad + X_test.iloc[row, col]
  
  sum_move_speed_rad = 0
  for col in index_move_speed_rad:
    sum_move_speed_rad = sum_move_speed_rad + X_test.iloc[row, col]  
  

  index_base_str_dire = [63+238, 72+238, 81+238, 90+238, 99+238]
  index_base_agi_dire = [x + 1 for x in index_base_str_dire] 
  index_base_int_dire = [x + 1 for x in index_base_agi_dire] 
  index_str_gain_dire = [x + 1 for x in index_base_int_dire] 
  index_agi_gain_dire = [x + 1 for x in index_str_gain_dire] 
  index_int_gain_dire = [x + 1 for x in index_agi_gain_dire]
  index_base_health_dire = [x + 1 for x in index_int_gain_dire]
  index_base_health_reg_dire = [x + 1 for x in index_base_health_dire]
  index_move_speed_dire = [x + 1 for x in index_base_health_reg_dire]

  sum_base_str_dire = 0
  for col in index_base_str_dire:
    sum_base_str_dire = sum_base_str_dire + X_test.iloc[row, col] 
  
  sum_base_agi_dire = 0
  for col in index_base_agi_dire:
    sum_base_agi_dire = sum_base_agi_dire + X_test.iloc[row, col]
  
  sum_base_int_dire = 0
  for col in index_base_int_dire:
    sum_base_int_dire = sum_base_int_dire + X_test.iloc[row, col]
  
  sum_str_gain_dire = 0
  for col in index_str_gain_dire:
    sum_str_gain_dire = sum_str_gain_dire + X_test.iloc[row, col]
  
  sum_agi_gain_dire = 0
  for col in index_agi_gain_dire:
    sum_agi_gain_dire = sum_agi_gain_dire + X_test.iloc[row, col]
  
  sum_int_gain_dire = 0
  for col in index_int_gain_dire:
    sum_int_gain_dire = sum_int_gain_dire + X_test.iloc[row, col]
  
  sum_base_health_dire = 0
  for col in index_base_health_dire:
    sum_base_health_dire = sum_base_health_dire + X_test.iloc[row, col]
  
  sum_base_health_reg_dire = 0
  for col in index_base_health_reg_dire:
    sum_base_health_reg_dire = sum_base_health_reg_dire + X_test.iloc[row, col]
  
  sum_move_speed_dire = 0
  for col in index_move_speed_dire:
    sum_move_speed_dire = sum_move_speed_dire + X_test.iloc[row, col]

  index_winrate_rad = list(range(108+238,113+238))
  sum_winrate_rad = 0
  for col in index_winrate_rad:
    sum_winrate_rad = sum_winrate_rad + X_test.iloc[row, col]

  index_winrate_dire = list(range(113+238,118+238))
  sum_winrate_dire = 0
  for col in index_winrate_dire:
    sum_winrate_dire = sum_winrate_dire + X_test.iloc[row, col]

  index_winrate_player_rad = list(range(118+238,123+238))
  winrate_player_rad = 0
  for col in index_winrate_player_rad:
    winrate_player_rad = winrate_player_rad + X_test.iloc[row, col]

  index_winrate_player_dire = list(range(123+238,128+238))
  winrate_player_dire = 0
  for col in index_winrate_player_dire:
    winrate_player_dire = winrate_player_dire + X_test.iloc[row, col]

  
  # hero player stats
  index_winrate_hp_rad = list(range(128+238,133+238))
  winrate_hp_rad = 0
  for col in index_winrate_hp_rad:
    winrate_hp_rad = winrate_hp_rad + X_test.iloc[row, col]
  
  index_xpm_hp_rad = list(range(133+238,138+238))
  xpm_hp_rad = 0
  for col in index_xpm_hp_rad:
    xpm_hp_rad = xpm_hp_rad + X_test.iloc[row, col]

  index_goldm_hp_rad = list(range(138+238,143+238))
  goldm_hp_rad = 0
  for col in index_goldm_hp_rad:
    goldm_hp_rad = goldm_hp_rad + X_test.iloc[row, col]

  index_deathsm_hp_rad = list(range(143+238,148+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_rad = list(range(148+238,153+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + X_test.iloc[row, col]

  index_killm_hp_rad = list(range(153+238,158+238))
  killm_hp_rad = 0
  for col in index_killm_hp_rad:
    killm_hp_rad = killm_hp_rad + X_test.iloc[row, col]

  index_assistm_hp_rad = list(range(158+238,163+238))
  assistm_hp_rad = 0
  for col in index_assistm_hp_rad:
    assistm_hp_rad = assistm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_rad = list(range(163+238,168+238))
  herodamagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    herodamagem_hp_rad = herodamagem_hp_rad + X_test.iloc[row, col]

  index_healm_hp_rad = list(range(168+238,173+238))
  healm_hp_rad = 0
  for col in index_healm_hp_rad:
    healm_hp_rad = healm_hp_rad + X_test.iloc[row, col]



  index_winrate_hp_dire = list(range(173+238,178+238))
  winrate_hp_dire = 0
  for col in index_winrate_hp_dire:
    winrate_hp_dire = winrate_hp_dire + X_test.iloc[row, col]
  
  index_xpm_hp_dire = list(range(178+238,183+238))
  xpm_hp_dire = 0
  for col in index_xpm_hp_dire:
    xpm_hp_dire = xpm_hp_dire + X_test.iloc[row, col]

  index_goldm_hp_dire = list(range(183+238,188+238))
  goldm_hp_dire = 0
  for col in index_goldm_hp_dire:
    goldm_hp_dire = goldm_hp_dire + X_test.iloc[row, col]

  index_deathsm_hp_dire = list(range(188+238,193+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_dire = list(range(193+238,198+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + X_test.iloc[row, col]

  index_killm_hp_dire = list(range(198+238,203+238))
  killm_hp_dire = 0
  for col in index_killm_hp_dire:
    killm_hp_dire = killm_hp_dire + X_test.iloc[row, col]

  index_assistm_hp_dire = list(range(203+238,208+238))
  assistm_hp_dire = 0
  for col in index_assistm_hp_dire:
    assistm_hp_dire = assistm_hp_dire + X_test.iloc[row, col]

  index_damagem_hp_dire = list(range(208+238,213+238))
  herodamagem_hp_dire = 0
  for col in index_damagem_hp_dire:
    herodamagem_hp_dire = herodamagem_hp_dire + X_test.iloc[row, col]

  index_healm_hp_dire = list(range(213+238,218+238))
  healm_hp_dire = 0
  for col in index_healm_hp_dire:
    healm_hp_dire = healm_hp_dire + X_test.iloc[row, col]


  # new_row = [np.nan] + [np.nan] + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]
  # new_row = list(X_test.iloc[row, 0:18]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]
  
  new_row = [sum_hero_rad] + [sum_hero_dire] + list(X_test.iloc[row, 238:256]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [deathsm_hp_rad] + [damagem_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [herodamagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [deathsm_hp_dire] + [damagem_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [herodamagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]


  print(len(new_row))
  X_test_new.loc[row] = new_row


In [39]:
# # Modify X_test accordingly
# X_test_new = pd.DataFrame(columns=new_columns)
# print(X_test_new.shape)
# for row in range(len(X_test)):
#   print(row)
#   new_row = [np.nan] + [np.nan] + list(X_test.iloc[row, 18:])
#   print(len(new_row))
#   # shap_values_new.append(pd.Series(new_row, index=shap_values_new.columns[:len(new_row)]), ignore_index=True)
#   X_test_new.loc[row] = new_row

X_test_new = X_test_new.fillna(X_test_new.median())
X_test_new.shape

(3634, 61)

In [0]:
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
# plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["font.weight"] = "bold"

fig = shap.summary_plot(shap_values_new, X_test_new, show=False)  
plt.xlabel('')
plt.xticks(fontsize=18)
plt.yticks(fontsize=22)
plt.savefig('shapvalues_regular_newdata_bold.pdf', bbox_inches = "tight")

In [0]:
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
# plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["font.weight"] = "bold"
fig = shap.summary_plot(shap_values_new, X_test_new, plot_type="bar", show=False) 
size = plt.gcf().get_size_inches()
print(size) 
plt.gcf().set_size_inches(9, 13)

# plt.xlabel('Impact on Radiant\'s victory', fontsize=22)
plt.xlabel('')
# plt.tick_params(
#     axis='x',          # changes apply to the x-axis
#     which='both',      # both major and minor ticks are affected
#     bottom=False)
plt.xticks(fontsize=21)
plt.yticks(fontsize=24)
plt.savefig('shapvalues_regular_newdata_barplot_bold.pdf', bbox_inches = "tight")

In [0]:
max(np.abs(shap_values_new).mean(0))

In [0]:
# import seaborn as sns
# import matplotlib.pyplot as plt
# sns.set(style="whitegrid")
# f, ax = plt.subplots(figsize=(6, 15))
# sns.barplot(x="mean_SHAP", y="Feature", data=shap_values_new[:5],
#             label="Total", color="b")

### **Time blowout matches**

In [0]:
import os
import numpy as np
import pandas as pd
from xgboost import XGBClassifier

from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score, auc

import statistics as st

import warnings
warnings.filterwarnings('ignore')
from datetime import datetime

import matplotlib.pylab as pl
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import shap

shap.initjs()

np.random.seed(3)

In [0]:
def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))

In [0]:
feature_time_blowout_df = pd.read_csv("/content/dota2_time_blowout_features.csv")

In [0]:
# Drop first ccolumn (match id)
feature_time_blowout_df = feature_time_blowout_df.drop(['match_id'], axis=1)

In [0]:
feature_time_blowout_df.head()

In [0]:
feature_time_blowout_df['win_label'].value_counts()

In [0]:
len(feature_time_blowout_df.columns)

**Model building, training, and evaluation**

In [0]:
features = [c for c in feature_time_blowout_df.columns if c != 'win_label']
target = 'win_label'
X, y = feature_time_blowout_df.iloc[:,:-1],feature_time_blowout_df.iloc[:,-1]

### Grid search to tune hyperparameters

In [0]:
# A parameter grid for XGBoost
params_search = {
        'learning_rate': [0.01, 0.05, 0.1, 0.2],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.3, 0.5, 0.7],
        'max_depth': [4, 6, 8, 10],
        'n_estimators': [10, 50, 100]
        }

In [0]:
xgb = XGBClassifier(objective='binary:logistic', silent=False, nthread=4)

In [0]:
train, test, train_labels, test_labels = train_test_split(X, y, 
                                                          stratify = y,
                                                          test_size = 0.2)

In [0]:
folds = 5
param_comb = 100

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)

random_search = RandomizedSearchCV(xgb, param_distributions=params_search, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(train,train_labels), verbose=True)

# Here we go
start_time = timer(None) # timing starts from this point for "start_time" variable
random_search.fit(train, train_labels)
timer(start_time) # timing ends here for "start_time" variable

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  2.3min
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed: 10.4min
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed: 24.1min
[Parallel(n_jobs=4)]: Done 500 out of 500 | elapsed: 28.2min finished



 Time taken: 0 hours 28 minutes and 14.85 seconds.


In [0]:
print('\n Best hyperparameters:')
print(random_search.best_params_)


 Best hyperparameters:
{'subsample': 1.0, 'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.05, 'gamma': 1, 'colsample_bytree': 0.3}


**Best hyperparameters time blowout:**
{'subsample': 1.0, 'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.05, 'gamma': 1, 'colsample_bytree': 0.3}

### Use best found hyperparameters

In [0]:
kfolds = KFold(n_splits=10, shuffle=True)

In [0]:
# param = {
#     'objective': 'binary:logistic',
#     'eval_metric': 'auc',
#     'colsample_bytree': 0.3,
#     'learning_rate': 0.1,
#      'max_depth': 10,
#      'alpha': 10
# }

# num_round = 100
param = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'subsample': 1.0,
    'colsample_bytree': 0.3,
    'learning_rate': 0.05,
    'max_depth': 4,
    'gamma': 1
}

num_round = 100 # n_estimators

In [77]:
import xgboost as xgb
auc = list()
for train_idx, test_idx in kfolds.split(X):
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
    
    param['scale_pos_weight'] = (y_train.size - y_train.sum()) / y_train.sum()    
    
    xg_train = xgb.DMatrix(
        X_train.values, feature_names=features, label=y_train.values
    )
    xg_test = xgb.DMatrix(
        X_test.values, feature_names=features, label=y_test.values
    )
    
    watchlist = [(xg_train, 'train'), (xg_test, 'test')]
    bst = xgb.train(param, xg_train, num_round, watchlist, verbose_eval=False)
    preds = bst.predict(xg_test) 

    auc.append(roc_auc_score(y_test, preds))

'Median AUC: {:.04f}'.format(st.median(auc))


'Median AUC: 0.8587'

In [0]:
  # Use only the last x_test as there is little variance between the folds
  explainer = shap.TreeExplainer(bst)
  shap_values = explainer.shap_values(X_test)

  # shap.summary_plot(shap_values, X_test)  
  
  # Variable importance-like plot.
  shap.summary_plot(shap_values, X_test, plot_type="bar")

In [0]:
shap_values_backup = shap_values

In [80]:
# new_columns = ['role_rad', 'role_dire'] + list(X_test.columns[18:])
# len(new_columns)

# new_columns = ['roles_r', 'roles_d',
#                 'base_str_r', 'base_agi_r', 'base_int_r', 'str_gain_r', 'agi_gain_r', 'int_gain_r', 'base_health_reg_r', 'move_speed_r',
#                 'base_str_d', 'base_agi_d', 'base_int_d', 'str_gain_d', 'agi_gain_d', 'int_gain_d', 'base_health_reg_d', 'move_speed_d',
#                 'winR_hero_r', 'winR_hero_d', 'winR_plr_r', 'winR_plr_d',
#                 'winR_hp_r', 'xpm_hp_r', 'goldm_hp_r', 'killm_hp_r', 'assistm_hp_r', 'damagem_hp_r', 'healm_hp_r',
#                 'winR_hp_d', 'xpm_hp_d', 'goldm_hp_d', 'killm_hp_d', 'assistm_hp_d', 'damagem_hp_d', 'healm_hp_d', 'first_pick_r']

new_columns = ['heroes_r', 'heroes_d',
               'role_carry_r', 'role_support_r', 'role_nuker_r', 'role_disabler_r', 'role_jungler_r',
                'role_durable_r', 'role_escape_r', 'role_pusher_r', 'role_initiator_r',
                'role_carry_d', 'role_support_d', 'role_nuker_d', 'role_disabler_d', 'role_jungler_d',
                'role_durable_d', 'role_escape_d', 'role_pusher_d', 'role_initiator_d',
                'strength_r', 'agility_r', 'intellig_r', 'strength_gain_r', 'agility_gain_r', 'intellig_gain_r', 'health_r', 'health_regeneration_r', 'move_speed_r',
                'strength_d', 'agility_d', 'intellig_d', 'strength_gain_d', 'agility_gain_d', 'intellig_gain_d', 'health_d', 'health_regeneration_d', 'move_speed_d',
                'hero_winrate_r', 'hero_winrate_d', 'player_winrate_r', 'player_winrate_d',
                'hp_winrate_r', 'hp_xp_min_r', 'hp_gold_min_r', 'hp_death_min_r', 'hp_taken_damage_min_r', 'hp_kill_min_r', 'hp_assist_min_r', 'hp_caused_damage_min_r', 'hp_heal_min_r',
                'hp_winrate_d', 'hp_xp_min_d', 'hp_gold_min_d', 'hp_death_min_d', 'hp_taken_damage_min_d', 'hp_kill_min_d', 'hp_assist_min_d', 'hp_caused_damage_min_d', 'hp_heal_min_d',
                'first_pick']

len(new_columns)

61

In [0]:
# Create new shap matrix
shap_values_new = pd.DataFrame(columns=new_columns)
print(shap_values_new.shape)

for row in range(len(shap_values)):
  print(row)

  # hero binary array
  sum_hero_rad = 0
  for col in range(119):
    sum_hero_rad = sum_hero_rad + shap_values[row, col]
  sum_hero_dire = 0
  for col in range(119,238):
    sum_hero_dire = sum_hero_dire + shap_values[row, col]

  # first_role_col = 238
  # role_carry_r = shap_values[row, first_role_col]            
  # role_support_r = shap_values[row, first_role_col + 1] 
  # role_nuker_r = shap_values[row, first_role_col + 2] 
  # role_disabler_r = shap_values[row, first_role_col+3] 
  # role_jungler_r = shap_values[row, first_role_col+4] 
  # role_durable_r = shap_values[row, first_role_col+5] 
  # role_escape_r = shap_values[row, first_role_col+6] 
  # role_pusher_r = shap_values[row, first_role_col+7] 
  # role_initiator_r = shap_values[row, first_role_col+8] 
  
  # first_role_col = 247
  # role_carry_d = shap_values[row, first_role_col]            
  # role_support_d = shap_values[row, first_role_col + 1] 
  # role_nuker_d = shap_values[row, first_role_col + 2] 
  # role_disabler_d = shap_values[row, first_role_col+3] 
  # role_jungler_d = shap_values[row, first_role_col+4] 
  # role_durable_d = shap_values[row, first_role_col+5] 
  # role_escape_d = shap_values[row, first_role_col+6] 
  # role_pusher_d = shap_values[row, first_role_col+7] 
  # role_initiator_d = shap_values[row, first_role_col+8] # col = 255 
  
  
  # # hero role rad and dire
  # sum_role_rad = 0
  # for col in range(238,247):
  #   sum_role_rad = sum_role_rad + shap_values[row, col]
  # sum_role_dire = 0
  # for col in range(247,256):
  #   sum_role_dire = sum_role_dire + shap_values[row, col]

  # base and gain stats rad and dire
  index_base_str_rad = [18+238, 27+238, 36+238, 45+238, 54+238]
  index_base_agi_rad = [x + 1 for x in index_base_str_rad] 
  index_base_int_rad = [x + 1 for x in index_base_agi_rad] 
  index_str_gain_rad = [x + 1 for x in index_base_int_rad] 
  index_agi_gain_rad = [x + 1 for x in index_str_gain_rad] 
  index_int_gain_rad = [x + 1 for x in index_agi_gain_rad]
  index_base_health_rad = [x + 1 for x in index_int_gain_rad]
  index_base_health_reg_rad = [x + 1 for x in index_base_health_rad]
  index_move_speed_rad = [x + 1 for x in index_base_health_reg_rad]

  sum_base_str_rad = 0
  for col in index_base_str_rad:
    sum_base_str_rad = sum_base_str_rad + shap_values[row, col] 
  
  sum_base_agi_rad = 0
  for col in index_base_agi_rad:
    sum_base_agi_rad = sum_base_agi_rad + shap_values[row, col]
  
  sum_base_int_rad = 0
  for col in index_base_int_rad:
    sum_base_int_rad = sum_base_int_rad + shap_values[row, col]
  
  sum_str_gain_rad = 0
  for col in index_str_gain_rad:
    sum_str_gain_rad = sum_str_gain_rad + shap_values[row, col]
  
  sum_agi_gain_rad = 0
  for col in index_agi_gain_rad:
    sum_agi_gain_rad = sum_agi_gain_rad + shap_values[row, col]
  
  sum_int_gain_rad = 0
  for col in index_int_gain_rad:
    sum_int_gain_rad = sum_int_gain_rad + shap_values[row, col]
  
  sum_base_health_rad = 0
  for col in index_base_health_rad:
    sum_base_health_rad = sum_base_health_rad + shap_values[row, col]
  
  sum_base_health_reg_rad = 0
  for col in index_base_health_reg_rad:
    sum_base_health_reg_rad = sum_base_health_reg_rad + shap_values[row, col]
  
  sum_move_speed_rad = 0
  for col in index_move_speed_rad:
    sum_move_speed_rad = sum_move_speed_rad + shap_values[row, col]  
  

  index_base_str_dire = [63+238, 72+238, 81+238, 90+238, 99+238]
  index_base_agi_dire = [x + 1 for x in index_base_str_dire] 
  index_base_int_dire = [x + 1 for x in index_base_agi_dire] 
  index_str_gain_dire = [x + 1 for x in index_base_int_dire] 
  index_agi_gain_dire = [x + 1 for x in index_str_gain_dire] 
  index_int_gain_dire = [x + 1 for x in index_agi_gain_dire]
  index_base_health_dire = [x + 1 for x in index_int_gain_dire]
  index_base_health_reg_dire = [x + 1 for x in index_base_health_dire]
  index_move_speed_dire = [x + 1 for x in index_base_health_reg_dire]

  sum_base_str_dire = 0
  for col in index_base_str_dire:
    sum_base_str_dire = sum_base_str_dire + shap_values[row, col] 
  
  sum_base_agi_dire = 0
  for col in index_base_agi_dire:
    sum_base_agi_dire = sum_base_agi_dire + shap_values[row, col]
  
  sum_base_int_dire = 0
  for col in index_base_int_dire:
    sum_base_int_dire = sum_base_int_dire + shap_values[row, col]
  
  sum_str_gain_dire = 0
  for col in index_str_gain_dire:
    sum_str_gain_dire = sum_str_gain_dire + shap_values[row, col]
  
  sum_agi_gain_dire = 0
  for col in index_agi_gain_dire:
    sum_agi_gain_dire = sum_agi_gain_dire + shap_values[row, col]
  
  sum_int_gain_dire = 0
  for col in index_int_gain_dire:
    sum_int_gain_dire = sum_int_gain_dire + shap_values[row, col]
  
  sum_base_health_dire = 0
  for col in index_base_health_dire:
    sum_base_health_dire = sum_base_health_dire + shap_values[row, col]
  
  sum_base_health_reg_dire = 0
  for col in index_base_health_reg_dire:
    sum_base_health_reg_dire = sum_base_health_reg_dire + shap_values[row, col]
  
  sum_move_speed_dire = 0
  for col in index_move_speed_dire:
    sum_move_speed_dire = sum_move_speed_dire + shap_values[row, col]



  index_winrate_rad = list(range(108+238,113+238))
  sum_winrate_rad = 0
  for col in index_winrate_rad:
    sum_winrate_rad = sum_winrate_rad + shap_values[row, col]

  index_winrate_dire = list(range(113+238,118+238))
  sum_winrate_dire = 0
  for col in index_winrate_dire:
    sum_winrate_dire = sum_winrate_dire + shap_values[row, col]

  index_winrate_player_rad = list(range(118+238,123+238))
  winrate_player_rad = 0
  for col in index_winrate_player_rad:
    winrate_player_rad = winrate_player_rad + shap_values[row, col]

  index_winrate_player_dire = list(range(123+238,128+238))
  winrate_player_dire = 0
  for col in index_winrate_player_dire:
    winrate_player_dire = winrate_player_dire + shap_values[row, col]

  # hero player stats
  index_winrate_hp_rad = list(range(128+238,133+238))
  winrate_hp_rad = 0
  for col in index_winrate_hp_rad:
    winrate_hp_rad = winrate_hp_rad + shap_values[row, col]
  
  index_xpm_hp_rad = list(range(133+238,138+238))
  xpm_hp_rad = 0
  for col in index_xpm_hp_rad:
    xpm_hp_rad = xpm_hp_rad + shap_values[row, col]

  index_goldm_hp_rad = list(range(138+238,143+238))
  goldm_hp_rad = 0
  for col in index_goldm_hp_rad:
    goldm_hp_rad = goldm_hp_rad + shap_values[row, col]

  index_deathsm_hp_rad = list(range(143+238,148+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + shap_values[row, col]

  index_damagem_hp_rad = list(range(148+238,153+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + shap_values[row, col]

  index_killm_hp_rad = list(range(153+238,158+238))
  killm_hp_rad = 0
  for col in index_killm_hp_rad:
    killm_hp_rad = killm_hp_rad + shap_values[row, col]

  index_assistm_hp_rad = list(range(158+238,163+238))
  assistm_hp_rad = 0
  for col in index_assistm_hp_rad:
    assistm_hp_rad = assistm_hp_rad + shap_values[row, col]

  index_hero_damagem_hp_rad = list(range(163+238,168+238))
  herodamagem_hp_rad = 0
  for col in index_hero_damagem_hp_rad:
    herodamagem_hp_rad = herodamagem_hp_rad + shap_values[row, col]

  index_healm_hp_rad = list(range(168+238,173+238))
  healm_hp_rad = 0
  for col in index_healm_hp_rad:
    healm_hp_rad = healm_hp_rad + shap_values[row, col]



  index_winrate_hp_dire = list(range(173+238,178+238))
  winrate_hp_dire = 0
  for col in index_winrate_hp_dire:
    winrate_hp_dire = winrate_hp_dire + shap_values[row, col]
  
  index_xpm_hp_dire = list(range(178+238,183+238))
  xpm_hp_dire = 0
  for col in index_xpm_hp_dire:
    xpm_hp_dire = xpm_hp_dire + shap_values[row, col]

  index_goldm_hp_dire = list(range(183+238,188+238))
  goldm_hp_dire = 0
  for col in index_goldm_hp_dire:
    goldm_hp_dire = goldm_hp_dire + shap_values[row, col]

  index_deathsm_hp_dire = list(range(188+238,193+238))
  deathsm_hp_dire = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_dire = deathsm_hp_dire + shap_values[row, col]

  index_damagem_hp_dire = list(range(193+238,198+238))
  damagem_hp_dire = 0
  for col in index_damagem_hp_rad:
    damagem_hp_dire = damagem_hp_dire + shap_values[row, col]

  index_killm_hp_dire = list(range(198+238,203+238))
  killm_hp_dire = 0
  for col in index_killm_hp_dire:
    killm_hp_dire = killm_hp_dire + shap_values[row, col]

  index_assistm_hp_dire = list(range(203+238,208+238))
  assistm_hp_dire = 0
  for col in index_assistm_hp_dire:
    assistm_hp_dire = assistm_hp_dire + shap_values[row, col]

  index_damagem_hp_dire = list(range(208+238,213+238))
  herodamagem_hp_dire = 0
  for col in index_damagem_hp_dire:
    herodamagem_hp_dire = herodamagem_hp_dire + shap_values[row, col]

  index_healm_hp_dire = list(range(213+238,218+238))
  healm_hp_dire = 0
  for col in index_healm_hp_dire:
    healm_hp_dire = healm_hp_dire + shap_values[row, col]


  # new_row = [sum_hero_rad] + [sum_hero_dire] + [sum_role_rad] + [sum_role_dire] + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]
  # new_row = list(shap_values[row, 0:18]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]
  
  new_row = [sum_hero_rad] + [sum_hero_dire] + list(shap_values[row, 238:256]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [deathsm_hp_rad] + [damagem_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [herodamagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [deathsm_hp_dire] + [damagem_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [herodamagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]


  # print(new_row)
  print(len(new_row))
  # shap_values_new.append(pd.Series(new_row, index=shap_values_new.columns[:len(new_row)]), ignore_index=True)
  shap_values_new.loc[row] = new_row


In [82]:
shap_values_new.shape

(552, 61)

In [0]:
shap_values_new = shap_values_new.to_numpy()

In [84]:
X_test.shape

(552, 457)

In [0]:
# Modify X_test accordingly

X_test_new = pd.DataFrame(columns=new_columns)
print(X_test_new.shape)
for row in range(len(X_test)):

  # hero binary array
  sum_hero_rad = 0
  for col in range(119):
    sum_hero_rad = sum_hero_rad + X_test.iloc[row, col]
  sum_hero_dire = 0
  for col in range(119,238):
    sum_hero_dire = sum_hero_dire + X_test.iloc[row, col]

  
  # # hero role rad and dire
  # sum_role_rad = 0
  # for col in range(9):
  #   sum_role_rad = sum_role_rad + X_test.iloc[row, col]
  # sum_role_dire = 0
  # for col in range(9,18):
  #   sum_role_dire = sum_role_dire + X_test.iloc[row, col]

  # base and gain stats rad and dire
  index_base_str_rad = [18+238, 27+238, 36+238, 45+238, 54+238]
  index_base_agi_rad = [x + 1 for x in index_base_str_rad] 
  index_base_int_rad = [x + 1 for x in index_base_agi_rad] 
  index_str_gain_rad = [x + 1 for x in index_base_int_rad] 
  index_agi_gain_rad = [x + 1 for x in index_str_gain_rad] 
  index_int_gain_rad = [x + 1 for x in index_agi_gain_rad]
  index_base_health_rad = [x + 1 for x in index_int_gain_rad]
  index_base_health_reg_rad = [x + 1 for x in index_base_health_rad]
  index_move_speed_rad = [x + 1 for x in index_base_health_reg_rad]

  sum_base_str_rad = 0
  for col in index_base_str_rad:
    sum_base_str_rad = sum_base_str_rad + X_test.iloc[row, col] 
  
  sum_base_agi_rad = 0
  for col in index_base_agi_rad:
    sum_base_agi_rad = sum_base_agi_rad + X_test.iloc[row, col]
  
  sum_base_int_rad = 0
  for col in index_base_int_rad:
    sum_base_int_rad = sum_base_int_rad + X_test.iloc[row, col]
  
  sum_str_gain_rad = 0
  for col in index_str_gain_rad:
    sum_str_gain_rad = sum_str_gain_rad + X_test.iloc[row, col]
  
  sum_agi_gain_rad = 0
  for col in index_agi_gain_rad:
    sum_agi_gain_rad = sum_agi_gain_rad + X_test.iloc[row, col]
  
  sum_int_gain_rad = 0
  for col in index_int_gain_rad:
    sum_int_gain_rad = sum_int_gain_rad + X_test.iloc[row, col]
  
  sum_base_health_rad = 0
  for col in index_base_health_rad:
    sum_base_health_rad = sum_base_health_rad + X_test.iloc[row, col]
  
  sum_base_health_reg_rad = 0
  for col in index_base_health_reg_rad:
    sum_base_health_reg_rad = sum_base_health_reg_rad + X_test.iloc[row, col]
  
  sum_move_speed_rad = 0
  for col in index_move_speed_rad:
    sum_move_speed_rad = sum_move_speed_rad + X_test.iloc[row, col]  
  

  index_base_str_dire = [63+238, 72+238, 81+238, 90+238, 99+238]
  index_base_agi_dire = [x + 1 for x in index_base_str_dire] 
  index_base_int_dire = [x + 1 for x in index_base_agi_dire] 
  index_str_gain_dire = [x + 1 for x in index_base_int_dire] 
  index_agi_gain_dire = [x + 1 for x in index_str_gain_dire] 
  index_int_gain_dire = [x + 1 for x in index_agi_gain_dire]
  index_base_health_dire = [x + 1 for x in index_int_gain_dire]
  index_base_health_reg_dire = [x + 1 for x in index_base_health_dire]
  index_move_speed_dire = [x + 1 for x in index_base_health_reg_dire]

  sum_base_str_dire = 0
  for col in index_base_str_dire:
    sum_base_str_dire = sum_base_str_dire + X_test.iloc[row, col] 
  
  sum_base_agi_dire = 0
  for col in index_base_agi_dire:
    sum_base_agi_dire = sum_base_agi_dire + X_test.iloc[row, col]
  
  sum_base_int_dire = 0
  for col in index_base_int_dire:
    sum_base_int_dire = sum_base_int_dire + X_test.iloc[row, col]
  
  sum_str_gain_dire = 0
  for col in index_str_gain_dire:
    sum_str_gain_dire = sum_str_gain_dire + X_test.iloc[row, col]
  
  sum_agi_gain_dire = 0
  for col in index_agi_gain_dire:
    sum_agi_gain_dire = sum_agi_gain_dire + X_test.iloc[row, col]
  
  sum_int_gain_dire = 0
  for col in index_int_gain_dire:
    sum_int_gain_dire = sum_int_gain_dire + X_test.iloc[row, col]
  
  sum_base_health_dire = 0
  for col in index_base_health_dire:
    sum_base_health_dire = sum_base_health_dire + X_test.iloc[row, col]
  
  sum_base_health_reg_dire = 0
  for col in index_base_health_reg_dire:
    sum_base_health_reg_dire = sum_base_health_reg_dire + X_test.iloc[row, col]
  
  sum_move_speed_dire = 0
  for col in index_move_speed_dire:
    sum_move_speed_dire = sum_move_speed_dire + X_test.iloc[row, col]

  index_winrate_rad = list(range(108+238,113+238))
  sum_winrate_rad = 0
  for col in index_winrate_rad:
    sum_winrate_rad = sum_winrate_rad + X_test.iloc[row, col]

  index_winrate_dire = list(range(113+238,118+238))
  sum_winrate_dire = 0
  for col in index_winrate_dire:
    sum_winrate_dire = sum_winrate_dire + X_test.iloc[row, col]

  index_winrate_player_rad = list(range(118+238,123+238))
  winrate_player_rad = 0
  for col in index_winrate_player_rad:
    winrate_player_rad = winrate_player_rad + X_test.iloc[row, col]

  index_winrate_player_dire = list(range(123+238,128+238))
  winrate_player_dire = 0
  for col in index_winrate_player_dire:
    winrate_player_dire = winrate_player_dire + X_test.iloc[row, col]

  
  # hero player stats
  index_winrate_hp_rad = list(range(128+238,133+238))
  winrate_hp_rad = 0
  for col in index_winrate_hp_rad:
    winrate_hp_rad = winrate_hp_rad + X_test.iloc[row, col]
  
  index_xpm_hp_rad = list(range(133+238,138+238))
  xpm_hp_rad = 0
  for col in index_xpm_hp_rad:
    xpm_hp_rad = xpm_hp_rad + X_test.iloc[row, col]

  index_goldm_hp_rad = list(range(138+238,143+238))
  goldm_hp_rad = 0
  for col in index_goldm_hp_rad:
    goldm_hp_rad = goldm_hp_rad + X_test.iloc[row, col]

  index_deathsm_hp_rad = list(range(143+238,148+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_rad = list(range(148+238,153+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + X_test.iloc[row, col]

  index_killm_hp_rad = list(range(153+238,158+238))
  killm_hp_rad = 0
  for col in index_killm_hp_rad:
    killm_hp_rad = killm_hp_rad + X_test.iloc[row, col]

  index_assistm_hp_rad = list(range(158+238,163+238))
  assistm_hp_rad = 0
  for col in index_assistm_hp_rad:
    assistm_hp_rad = assistm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_rad = list(range(163+238,168+238))
  herodamagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    herodamagem_hp_rad = herodamagem_hp_rad + X_test.iloc[row, col]

  index_healm_hp_rad = list(range(168+238,173+238))
  healm_hp_rad = 0
  for col in index_healm_hp_rad:
    healm_hp_rad = healm_hp_rad + X_test.iloc[row, col]



  index_winrate_hp_dire = list(range(173+238,178+238))
  winrate_hp_dire = 0
  for col in index_winrate_hp_dire:
    winrate_hp_dire = winrate_hp_dire + X_test.iloc[row, col]
  
  index_xpm_hp_dire = list(range(178+238,183+238))
  xpm_hp_dire = 0
  for col in index_xpm_hp_dire:
    xpm_hp_dire = xpm_hp_dire + X_test.iloc[row, col]

  index_goldm_hp_dire = list(range(183+238,188+238))
  goldm_hp_dire = 0
  for col in index_goldm_hp_dire:
    goldm_hp_dire = goldm_hp_dire + X_test.iloc[row, col]

  index_deathsm_hp_dire = list(range(188+238,193+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_dire = list(range(193+238,198+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + X_test.iloc[row, col]

  index_killm_hp_dire = list(range(198+238,203+238))
  killm_hp_dire = 0
  for col in index_killm_hp_dire:
    killm_hp_dire = killm_hp_dire + X_test.iloc[row, col]

  index_assistm_hp_dire = list(range(203+238,208+238))
  assistm_hp_dire = 0
  for col in index_assistm_hp_dire:
    assistm_hp_dire = assistm_hp_dire + X_test.iloc[row, col]

  index_damagem_hp_dire = list(range(208+238,213+238))
  herodamagem_hp_dire = 0
  for col in index_damagem_hp_dire:
    herodamagem_hp_dire = herodamagem_hp_dire + X_test.iloc[row, col]

  index_healm_hp_dire = list(range(213+238,218+238))
  healm_hp_dire = 0
  for col in index_healm_hp_dire:
    healm_hp_dire = healm_hp_dire + X_test.iloc[row, col]


  # new_row = [np.nan] + [np.nan] + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]
  # new_row = list(X_test.iloc[row, 0:18]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]
  
  new_row = [sum_hero_rad] + [sum_hero_dire] + list(X_test.iloc[row, 238:256]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [deathsm_hp_rad] + [damagem_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [herodamagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [deathsm_hp_dire] + [damagem_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [herodamagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]


  print(len(new_row))
  X_test_new.loc[row] = new_row


In [86]:
# # Modify X_test accordingly
# X_test_new = pd.DataFrame(columns=new_columns)
# print(X_test_new.shape)
# for row in range(len(X_test)):
#   print(row)
#   new_row = [np.nan] + [np.nan] + list(X_test.iloc[row, 18:])
#   print(len(new_row))
#   # shap_values_new.append(pd.Series(new_row, index=shap_values_new.columns[:len(new_row)]), ignore_index=True)
#   X_test_new.loc[row] = new_row
X_test_new = X_test_new.fillna(X_test_new.median())
X_test_new.shape

(552, 61)

In [0]:
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
# plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["font.weight"] = "bold"

fig = shap.summary_plot(shap_values_new, X_test_new, show=False)  
plt.xlabel('')
plt.xticks(fontsize=18)
plt.yticks(fontsize=22)
plt.savefig('shapvalues_time_bold.pdf', bbox_inches = "tight")

In [0]:
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
# plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["font.weight"] = "bold"
fig = shap.summary_plot(shap_values_new, X_test_new, plot_type="bar", show=False) 
size = plt.gcf().get_size_inches()
print(size) 
plt.gcf().set_size_inches(9, 13)

# plt.xlabel('Impact on Radiant\'s victory', fontsize=22)
plt.xlabel('')
# plt.tick_params(
#     axis='x',          # changes apply to the x-axis
#     which='both',      # both major and minor ticks are affected
#     bottom=False)
plt.xticks(fontsize=21)
plt.yticks(fontsize=24)
plt.savefig('shapvalues_time_barplot_bold.pdf', bbox_inches = "tight")



---



### **Score blowout matches**

In [0]:
pip install shap

In [0]:
import os
import numpy as np
import pandas as pd
from xgboost import XGBClassifier

from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score, auc

import statistics as st

import warnings
warnings.filterwarnings('ignore')
from datetime import datetime

import matplotlib.pylab as pl
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import shap

shap.initjs()

np.random.seed(3)

In [0]:
# load JS visualization code to notebook
shap.initjs()

In [0]:
def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))

In [0]:
feature_score_blowout_df = pd.read_csv("/content/dota2_score_blowout_features.csv")

In [0]:
# Drop first ccolumn (match id)
feature_score_blowout_df = feature_score_blowout_df.drop(['match_id'], axis=1)

In [0]:
feature_score_blowout_df.head()

In [0]:
feature_score_blowout_df['win_label'].value_counts()

In [0]:
len(feature_score_blowout_df.columns)

**Model building, training, and evaluation**

In [0]:
features = [c for c in feature_score_blowout_df.columns if c != 'win_label']
target = 'win_label'
X, y = feature_score_blowout_df.iloc[:,:-1],feature_score_blowout_df.iloc[:,-1]

### Grid search to tune hyperparameters

In [0]:
# A parameter grid for XGBoost
params_search = {
        'learning_rate': [0.01, 0.05, 0.1, 0.2],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.3, 0.5, 0.7],
        'max_depth': [4, 6, 8, 10],
        'n_estimators': [10, 50, 100]
        }

In [0]:
xgb = XGBClassifier(objective='binary:logistic', silent=False, nthread=4)

In [0]:
train, test, train_labels, test_labels = train_test_split(X, y, 
                                                          stratify = y,
                                                          test_size = 0.2)

In [0]:
folds = 5
param_comb = 100

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)

random_search = RandomizedSearchCV(xgb, param_distributions=params_search, n_iter=param_comb, scoring='roc_auc', n_jobs=4, cv=skf.split(train,train_labels), verbose=True)

# Here we go
start_time = timer(None) # timing starts from this point for "start_time" variable
random_search.fit(train, train_labels)
timer(start_time) # timing ends here for "start_time" variable

Fitting 5 folds for each of 500 candidates, totalling 2500 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  1.5min
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:  8.5min
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed: 22.3min
[Parallel(n_jobs=4)]: Done 792 tasks      | elapsed: 38.5min
[Parallel(n_jobs=4)]: Done 1242 tasks      | elapsed: 61.6min
[Parallel(n_jobs=4)]: Done 1792 tasks      | elapsed: 90.6min
[Parallel(n_jobs=4)]: Done 2442 tasks      | elapsed: 123.8min
[Parallel(n_jobs=4)]: Done 2500 out of 2500 | elapsed: 127.2min finished



 Time taken: 2 hours 7 minutes and 14.05 seconds.


In [0]:
print('\n Best hyperparameters:')
print(random_search.best_params_)


 Best hyperparameters:
{'subsample': 1.0, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.1, 'gamma': 0.5, 'colsample_bytree': 0.3}


**Best hyperparameters score blowout:**
{'subsample': 1.0, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.1, 'gamma': 0.5, 'colsample_bytree': 0.3}

### Use best found hyperparameters

In [0]:
kfolds = KFold(n_splits=10, shuffle=True)

In [0]:
# param = {
#     'objective': 'binary:logistic',
#     'eval_metric': 'auc',
#     'colsample_bytree': 0.3,
#     'learning_rate': 0.1,
#      'max_depth': 10,
#      'alpha': 10
# }

# num_round = 100
param = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'subsample': 1.0,
    'colsample_bytree': 0.3,
    'learning_rate': 0.1,
    'max_depth': 6,
    'gamma': 0.5
}

num_round = 100 # n_estimators

In [97]:
import xgboost as xgb
auc = list()
for train_idx, test_idx in kfolds.split(X):
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
    
    param['scale_pos_weight'] = (y_train.size - y_train.sum()) / y_train.sum()    
    
    xg_train = xgb.DMatrix(
        X_train.values, feature_names=features, label=y_train.values
    )
    xg_test = xgb.DMatrix(
        X_test.values, feature_names=features, label=y_test.values
    )
    
    watchlist = [(xg_train, 'train'), (xg_test, 'test')]
    bst = xgb.train(param, xg_train, num_round, watchlist, verbose_eval=False)
    preds = bst.predict(xg_test) 

    auc.append(roc_auc_score(y_test, preds))

'Median AUC: {:.04f}'.format(st.median(auc))


'Median AUC: 0.7796'

In [0]:
  # Use only the last x_test as there is little variance between the folds
  explainer = shap.TreeExplainer(bst)
  shap_values = explainer.shap_values(X_test)

  # shap.summary_plot(shap_values, X_test)  
  
  # Variable importance-like plot.
  shap.summary_plot(shap_values, X_test, plot_type="bar")

In [0]:
# Using np.abs(shap_values).mean(0) will summarize the global importance of a feature as the mean of the absolute change in the prediction
# caused by that feature over the entire dataset. 

np.abs(shap_values).mean(0)

In [0]:
shap_values_backup = shap_values

In [101]:
# new_columns = ['role_rad', 'role_dire'] + list(X_test.columns[18:])
# len(new_columns)

# new_columns = ['roles_r', 'roles_d',
#                 'base_str_r', 'base_agi_r', 'base_int_r', 'str_gain_r', 'agi_gain_r', 'int_gain_r', 'base_health_reg_r', 'move_speed_r',
#                 'base_str_d', 'base_agi_d', 'base_int_d', 'str_gain_d', 'agi_gain_d', 'int_gain_d', 'base_health_reg_d', 'move_speed_d',
#                 'winR_hero_r', 'winR_hero_d', 'winR_plr_r', 'winR_plr_d',
#                 'winR_hp_r', 'xpm_hp_r', 'goldm_hp_r', 'killm_hp_r', 'assistm_hp_r', 'damagem_hp_r', 'healm_hp_r',
#                 'winR_hp_d', 'xpm_hp_d', 'goldm_hp_d', 'killm_hp_d', 'assistm_hp_d', 'damagem_hp_d', 'healm_hp_d', 'first_pick_r']

new_columns = ['heroes_r', 'heroes_d',
               'role_carry_r', 'role_support_r', 'role_nuker_r', 'role_disabler_r', 'role_jungler_r',
                'role_durable_r', 'role_escape_r', 'role_pusher_r', 'role_initiator_r',
                'role_carry_d', 'role_support_d', 'role_nuker_d', 'role_disabler_d', 'role_jungler_d',
                'role_durable_d', 'role_escape_d', 'role_pusher_d', 'role_initiator_d',
                'strength_r', 'agility_r', 'intellig_r', 'strength_gain_r', 'agility_gain_r', 'intellig_gain_r', 'health_r', 'health_regeneration_r', 'move_speed_r',
                'strength_d', 'agility_d', 'intellig_d', 'strength_gain_d', 'agility_gain_d', 'intellig_gain_d', 'health_d', 'health_regeneration_d', 'move_speed_d',
                'hero_winrate_r', 'hero_winrate_d', 'player_winrate_r', 'player_winrate_d',
                'hp_winrate_r', 'hp_xp_min_r', 'hp_gold_min_r', 'hp_death_min_r', 'hp_taken_damage_min_r', 'hp_kill_min_r', 'hp_assist_min_r', 'hp_caused_damage_min_r', 'hp_heal_min_r',
                'hp_winrate_d', 'hp_xp_min_d', 'hp_gold_min_d', 'hp_death_min_d', 'hp_taken_damage_min_d', 'hp_kill_min_d', 'hp_assist_min_d', 'hp_caused_damage_min_d', 'hp_heal_min_d',
                'first_pick']

len(new_columns)

61

In [0]:
# Create new shap matrix
shap_values_new = pd.DataFrame(columns=new_columns)
print(shap_values_new.shape)

for row in range(len(shap_values)):
  print(row)

  # hero binary array
  sum_hero_rad = 0
  for col in range(119):
    sum_hero_rad = sum_hero_rad + shap_values[row, col]
  sum_hero_dire = 0
  for col in range(119,238):
    sum_hero_dire = sum_hero_dire + shap_values[row, col]

  # first_role_col = 238
  # role_carry_r = shap_values[row, first_role_col]            
  # role_support_r = shap_values[row, first_role_col + 1] 
  # role_nuker_r = shap_values[row, first_role_col + 2] 
  # role_disabler_r = shap_values[row, first_role_col+3] 
  # role_jungler_r = shap_values[row, first_role_col+4] 
  # role_durable_r = shap_values[row, first_role_col+5] 
  # role_escape_r = shap_values[row, first_role_col+6] 
  # role_pusher_r = shap_values[row, first_role_col+7] 
  # role_initiator_r = shap_values[row, first_role_col+8] 
  
  # first_role_col = 247
  # role_carry_d = shap_values[row, first_role_col]            
  # role_support_d = shap_values[row, first_role_col + 1] 
  # role_nuker_d = shap_values[row, first_role_col + 2] 
  # role_disabler_d = shap_values[row, first_role_col+3] 
  # role_jungler_d = shap_values[row, first_role_col+4] 
  # role_durable_d = shap_values[row, first_role_col+5] 
  # role_escape_d = shap_values[row, first_role_col+6] 
  # role_pusher_d = shap_values[row, first_role_col+7] 
  # role_initiator_d = shap_values[row, first_role_col+8] # col = 255 
  
  
  # # hero role rad and dire
  # sum_role_rad = 0
  # for col in range(238,247):
  #   sum_role_rad = sum_role_rad + shap_values[row, col]
  # sum_role_dire = 0
  # for col in range(247,256):
  #   sum_role_dire = sum_role_dire + shap_values[row, col]

  # base and gain stats rad and dire
  index_base_str_rad = [18+238, 27+238, 36+238, 45+238, 54+238]
  index_base_agi_rad = [x + 1 for x in index_base_str_rad] 
  index_base_int_rad = [x + 1 for x in index_base_agi_rad] 
  index_str_gain_rad = [x + 1 for x in index_base_int_rad] 
  index_agi_gain_rad = [x + 1 for x in index_str_gain_rad] 
  index_int_gain_rad = [x + 1 for x in index_agi_gain_rad]
  index_base_health_rad = [x + 1 for x in index_int_gain_rad]
  index_base_health_reg_rad = [x + 1 for x in index_base_health_rad]
  index_move_speed_rad = [x + 1 for x in index_base_health_reg_rad]

  sum_base_str_rad = 0
  for col in index_base_str_rad:
    sum_base_str_rad = sum_base_str_rad + shap_values[row, col] 
  
  sum_base_agi_rad = 0
  for col in index_base_agi_rad:
    sum_base_agi_rad = sum_base_agi_rad + shap_values[row, col]
  
  sum_base_int_rad = 0
  for col in index_base_int_rad:
    sum_base_int_rad = sum_base_int_rad + shap_values[row, col]
  
  sum_str_gain_rad = 0
  for col in index_str_gain_rad:
    sum_str_gain_rad = sum_str_gain_rad + shap_values[row, col]
  
  sum_agi_gain_rad = 0
  for col in index_agi_gain_rad:
    sum_agi_gain_rad = sum_agi_gain_rad + shap_values[row, col]
  
  sum_int_gain_rad = 0
  for col in index_int_gain_rad:
    sum_int_gain_rad = sum_int_gain_rad + shap_values[row, col]
  
  sum_base_health_rad = 0
  for col in index_base_health_rad:
    sum_base_health_rad = sum_base_health_rad + shap_values[row, col]
  
  sum_base_health_reg_rad = 0
  for col in index_base_health_reg_rad:
    sum_base_health_reg_rad = sum_base_health_reg_rad + shap_values[row, col]
  
  sum_move_speed_rad = 0
  for col in index_move_speed_rad:
    sum_move_speed_rad = sum_move_speed_rad + shap_values[row, col]  
  

  index_base_str_dire = [63+238, 72+238, 81+238, 90+238, 99+238]
  index_base_agi_dire = [x + 1 for x in index_base_str_dire] 
  index_base_int_dire = [x + 1 for x in index_base_agi_dire] 
  index_str_gain_dire = [x + 1 for x in index_base_int_dire] 
  index_agi_gain_dire = [x + 1 for x in index_str_gain_dire] 
  index_int_gain_dire = [x + 1 for x in index_agi_gain_dire]
  index_base_health_dire = [x + 1 for x in index_int_gain_dire]
  index_base_health_reg_dire = [x + 1 for x in index_base_health_dire]
  index_move_speed_dire = [x + 1 for x in index_base_health_reg_dire]

  sum_base_str_dire = 0
  for col in index_base_str_dire:
    sum_base_str_dire = sum_base_str_dire + shap_values[row, col] 
  
  sum_base_agi_dire = 0
  for col in index_base_agi_dire:
    sum_base_agi_dire = sum_base_agi_dire + shap_values[row, col]
  
  sum_base_int_dire = 0
  for col in index_base_int_dire:
    sum_base_int_dire = sum_base_int_dire + shap_values[row, col]
  
  sum_str_gain_dire = 0
  for col in index_str_gain_dire:
    sum_str_gain_dire = sum_str_gain_dire + shap_values[row, col]
  
  sum_agi_gain_dire = 0
  for col in index_agi_gain_dire:
    sum_agi_gain_dire = sum_agi_gain_dire + shap_values[row, col]
  
  sum_int_gain_dire = 0
  for col in index_int_gain_dire:
    sum_int_gain_dire = sum_int_gain_dire + shap_values[row, col]
  
  sum_base_health_dire = 0
  for col in index_base_health_dire:
    sum_base_health_dire = sum_base_health_dire + shap_values[row, col]
  
  sum_base_health_reg_dire = 0
  for col in index_base_health_reg_dire:
    sum_base_health_reg_dire = sum_base_health_reg_dire + shap_values[row, col]
  
  sum_move_speed_dire = 0
  for col in index_move_speed_dire:
    sum_move_speed_dire = sum_move_speed_dire + shap_values[row, col]



  index_winrate_rad = list(range(108+238,113+238))
  sum_winrate_rad = 0
  for col in index_winrate_rad:
    sum_winrate_rad = sum_winrate_rad + shap_values[row, col]

  index_winrate_dire = list(range(113+238,118+238))
  sum_winrate_dire = 0
  for col in index_winrate_dire:
    sum_winrate_dire = sum_winrate_dire + shap_values[row, col]

  index_winrate_player_rad = list(range(118+238,123+238))
  winrate_player_rad = 0
  for col in index_winrate_player_rad:
    winrate_player_rad = winrate_player_rad + shap_values[row, col]

  index_winrate_player_dire = list(range(123+238,128+238))
  winrate_player_dire = 0
  for col in index_winrate_player_dire:
    winrate_player_dire = winrate_player_dire + shap_values[row, col]

  # hero player stats
  index_winrate_hp_rad = list(range(128+238,133+238))
  winrate_hp_rad = 0
  for col in index_winrate_hp_rad:
    winrate_hp_rad = winrate_hp_rad + shap_values[row, col]
  
  index_xpm_hp_rad = list(range(133+238,138+238))
  xpm_hp_rad = 0
  for col in index_xpm_hp_rad:
    xpm_hp_rad = xpm_hp_rad + shap_values[row, col]

  index_goldm_hp_rad = list(range(138+238,143+238))
  goldm_hp_rad = 0
  for col in index_goldm_hp_rad:
    goldm_hp_rad = goldm_hp_rad + shap_values[row, col]

  index_deathsm_hp_rad = list(range(143+238,148+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + shap_values[row, col]

  index_damagem_hp_rad = list(range(148+238,153+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + shap_values[row, col]

  index_killm_hp_rad = list(range(153+238,158+238))
  killm_hp_rad = 0
  for col in index_killm_hp_rad:
    killm_hp_rad = killm_hp_rad + shap_values[row, col]

  index_assistm_hp_rad = list(range(158+238,163+238))
  assistm_hp_rad = 0
  for col in index_assistm_hp_rad:
    assistm_hp_rad = assistm_hp_rad + shap_values[row, col]

  index_hero_damagem_hp_rad = list(range(163+238,168+238))
  herodamagem_hp_rad = 0
  for col in index_hero_damagem_hp_rad:
    herodamagem_hp_rad = herodamagem_hp_rad + shap_values[row, col]

  index_healm_hp_rad = list(range(168+238,173+238))
  healm_hp_rad = 0
  for col in index_healm_hp_rad:
    healm_hp_rad = healm_hp_rad + shap_values[row, col]



  index_winrate_hp_dire = list(range(173+238,178+238))
  winrate_hp_dire = 0
  for col in index_winrate_hp_dire:
    winrate_hp_dire = winrate_hp_dire + shap_values[row, col]
  
  index_xpm_hp_dire = list(range(178+238,183+238))
  xpm_hp_dire = 0
  for col in index_xpm_hp_dire:
    xpm_hp_dire = xpm_hp_dire + shap_values[row, col]

  index_goldm_hp_dire = list(range(183+238,188+238))
  goldm_hp_dire = 0
  for col in index_goldm_hp_dire:
    goldm_hp_dire = goldm_hp_dire + shap_values[row, col]

  index_deathsm_hp_dire = list(range(188+238,193+238))
  deathsm_hp_dire = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_dire = deathsm_hp_dire + shap_values[row, col]

  index_damagem_hp_dire = list(range(193+238,198+238))
  damagem_hp_dire = 0
  for col in index_damagem_hp_rad:
    damagem_hp_dire = damagem_hp_dire + shap_values[row, col]

  index_killm_hp_dire = list(range(198+238,203+238))
  killm_hp_dire = 0
  for col in index_killm_hp_dire:
    killm_hp_dire = killm_hp_dire + shap_values[row, col]

  index_assistm_hp_dire = list(range(203+238,208+238))
  assistm_hp_dire = 0
  for col in index_assistm_hp_dire:
    assistm_hp_dire = assistm_hp_dire + shap_values[row, col]

  index_damagem_hp_dire = list(range(208+238,213+238))
  herodamagem_hp_dire = 0
  for col in index_damagem_hp_dire:
    herodamagem_hp_dire = herodamagem_hp_dire + shap_values[row, col]

  index_healm_hp_dire = list(range(213+238,218+238))
  healm_hp_dire = 0
  for col in index_healm_hp_dire:
    healm_hp_dire = healm_hp_dire + shap_values[row, col]


  # new_row = [sum_hero_rad] + [sum_hero_dire] + [sum_role_rad] + [sum_role_dire] + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]
  # new_row = list(shap_values[row, 0:18]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]
  
  new_row = [sum_hero_rad] + [sum_hero_dire] + list(shap_values[row, 238:256]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [deathsm_hp_rad] + [damagem_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [herodamagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [deathsm_hp_dire] + [damagem_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [herodamagem_hp_dire] + [healm_hp_dire] + [shap_values[row,198]]


  # print(new_row)
  print(len(new_row))
  # shap_values_new.append(pd.Series(new_row, index=shap_values_new.columns[:len(new_row)]), ignore_index=True)
  shap_values_new.loc[row] = new_row



In [103]:
shap_values_new.shape

(552, 61)

In [0]:
shap_values_new = shap_values_new.to_numpy()

In [105]:
X_test.shape

(552, 457)

In [0]:
# Modify X_test accordingly

X_test_new = pd.DataFrame(columns=new_columns)
print(X_test_new.shape)
for row in range(len(X_test)):

  # hero binary array
  sum_hero_rad = 0
  for col in range(119):
    sum_hero_rad = sum_hero_rad + X_test.iloc[row, col]
  sum_hero_dire = 0
  for col in range(119,238):
    sum_hero_dire = sum_hero_dire + X_test.iloc[row, col]

  
  # # hero role rad and dire
  # sum_role_rad = 0
  # for col in range(9):
  #   sum_role_rad = sum_role_rad + X_test.iloc[row, col]
  # sum_role_dire = 0
  # for col in range(9,18):
  #   sum_role_dire = sum_role_dire + X_test.iloc[row, col]

  # base and gain stats rad and dire
  index_base_str_rad = [18+238, 27+238, 36+238, 45+238, 54+238]
  index_base_agi_rad = [x + 1 for x in index_base_str_rad] 
  index_base_int_rad = [x + 1 for x in index_base_agi_rad] 
  index_str_gain_rad = [x + 1 for x in index_base_int_rad] 
  index_agi_gain_rad = [x + 1 for x in index_str_gain_rad] 
  index_int_gain_rad = [x + 1 for x in index_agi_gain_rad]
  index_base_health_rad = [x + 1 for x in index_int_gain_rad]
  index_base_health_reg_rad = [x + 1 for x in index_base_health_rad]
  index_move_speed_rad = [x + 1 for x in index_base_health_reg_rad]

  sum_base_str_rad = 0
  for col in index_base_str_rad:
    sum_base_str_rad = sum_base_str_rad + X_test.iloc[row, col] 
  
  sum_base_agi_rad = 0
  for col in index_base_agi_rad:
    sum_base_agi_rad = sum_base_agi_rad + X_test.iloc[row, col]
  
  sum_base_int_rad = 0
  for col in index_base_int_rad:
    sum_base_int_rad = sum_base_int_rad + X_test.iloc[row, col]
  
  sum_str_gain_rad = 0
  for col in index_str_gain_rad:
    sum_str_gain_rad = sum_str_gain_rad + X_test.iloc[row, col]
  
  sum_agi_gain_rad = 0
  for col in index_agi_gain_rad:
    sum_agi_gain_rad = sum_agi_gain_rad + X_test.iloc[row, col]
  
  sum_int_gain_rad = 0
  for col in index_int_gain_rad:
    sum_int_gain_rad = sum_int_gain_rad + X_test.iloc[row, col]
  
  sum_base_health_rad = 0
  for col in index_base_health_rad:
    sum_base_health_rad = sum_base_health_rad + X_test.iloc[row, col]
  
  sum_base_health_reg_rad = 0
  for col in index_base_health_reg_rad:
    sum_base_health_reg_rad = sum_base_health_reg_rad + X_test.iloc[row, col]
  
  sum_move_speed_rad = 0
  for col in index_move_speed_rad:
    sum_move_speed_rad = sum_move_speed_rad + X_test.iloc[row, col]  
  

  index_base_str_dire = [63+238, 72+238, 81+238, 90+238, 99+238]
  index_base_agi_dire = [x + 1 for x in index_base_str_dire] 
  index_base_int_dire = [x + 1 for x in index_base_agi_dire] 
  index_str_gain_dire = [x + 1 for x in index_base_int_dire] 
  index_agi_gain_dire = [x + 1 for x in index_str_gain_dire] 
  index_int_gain_dire = [x + 1 for x in index_agi_gain_dire]
  index_base_health_dire = [x + 1 for x in index_int_gain_dire]
  index_base_health_reg_dire = [x + 1 for x in index_base_health_dire]
  index_move_speed_dire = [x + 1 for x in index_base_health_reg_dire]

  sum_base_str_dire = 0
  for col in index_base_str_dire:
    sum_base_str_dire = sum_base_str_dire + X_test.iloc[row, col] 
  
  sum_base_agi_dire = 0
  for col in index_base_agi_dire:
    sum_base_agi_dire = sum_base_agi_dire + X_test.iloc[row, col]
  
  sum_base_int_dire = 0
  for col in index_base_int_dire:
    sum_base_int_dire = sum_base_int_dire + X_test.iloc[row, col]
  
  sum_str_gain_dire = 0
  for col in index_str_gain_dire:
    sum_str_gain_dire = sum_str_gain_dire + X_test.iloc[row, col]
  
  sum_agi_gain_dire = 0
  for col in index_agi_gain_dire:
    sum_agi_gain_dire = sum_agi_gain_dire + X_test.iloc[row, col]
  
  sum_int_gain_dire = 0
  for col in index_int_gain_dire:
    sum_int_gain_dire = sum_int_gain_dire + X_test.iloc[row, col]
  
  sum_base_health_dire = 0
  for col in index_base_health_dire:
    sum_base_health_dire = sum_base_health_dire + X_test.iloc[row, col]
  
  sum_base_health_reg_dire = 0
  for col in index_base_health_reg_dire:
    sum_base_health_reg_dire = sum_base_health_reg_dire + X_test.iloc[row, col]
  
  sum_move_speed_dire = 0
  for col in index_move_speed_dire:
    sum_move_speed_dire = sum_move_speed_dire + X_test.iloc[row, col]

  index_winrate_rad = list(range(108+238,113+238))
  sum_winrate_rad = 0
  for col in index_winrate_rad:
    sum_winrate_rad = sum_winrate_rad + X_test.iloc[row, col]

  index_winrate_dire = list(range(113+238,118+238))
  sum_winrate_dire = 0
  for col in index_winrate_dire:
    sum_winrate_dire = sum_winrate_dire + X_test.iloc[row, col]

  index_winrate_player_rad = list(range(118+238,123+238))
  winrate_player_rad = 0
  for col in index_winrate_player_rad:
    winrate_player_rad = winrate_player_rad + X_test.iloc[row, col]

  index_winrate_player_dire = list(range(123+238,128+238))
  winrate_player_dire = 0
  for col in index_winrate_player_dire:
    winrate_player_dire = winrate_player_dire + X_test.iloc[row, col]

  
  # hero player stats
  index_winrate_hp_rad = list(range(128+238,133+238))
  winrate_hp_rad = 0
  for col in index_winrate_hp_rad:
    winrate_hp_rad = winrate_hp_rad + X_test.iloc[row, col]
  
  index_xpm_hp_rad = list(range(133+238,138+238))
  xpm_hp_rad = 0
  for col in index_xpm_hp_rad:
    xpm_hp_rad = xpm_hp_rad + X_test.iloc[row, col]

  index_goldm_hp_rad = list(range(138+238,143+238))
  goldm_hp_rad = 0
  for col in index_goldm_hp_rad:
    goldm_hp_rad = goldm_hp_rad + X_test.iloc[row, col]

  index_deathsm_hp_rad = list(range(143+238,148+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_rad = list(range(148+238,153+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + X_test.iloc[row, col]

  index_killm_hp_rad = list(range(153+238,158+238))
  killm_hp_rad = 0
  for col in index_killm_hp_rad:
    killm_hp_rad = killm_hp_rad + X_test.iloc[row, col]

  index_assistm_hp_rad = list(range(158+238,163+238))
  assistm_hp_rad = 0
  for col in index_assistm_hp_rad:
    assistm_hp_rad = assistm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_rad = list(range(163+238,168+238))
  herodamagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    herodamagem_hp_rad = herodamagem_hp_rad + X_test.iloc[row, col]

  index_healm_hp_rad = list(range(168+238,173+238))
  healm_hp_rad = 0
  for col in index_healm_hp_rad:
    healm_hp_rad = healm_hp_rad + X_test.iloc[row, col]



  index_winrate_hp_dire = list(range(173+238,178+238))
  winrate_hp_dire = 0
  for col in index_winrate_hp_dire:
    winrate_hp_dire = winrate_hp_dire + X_test.iloc[row, col]
  
  index_xpm_hp_dire = list(range(178+238,183+238))
  xpm_hp_dire = 0
  for col in index_xpm_hp_dire:
    xpm_hp_dire = xpm_hp_dire + X_test.iloc[row, col]

  index_goldm_hp_dire = list(range(183+238,188+238))
  goldm_hp_dire = 0
  for col in index_goldm_hp_dire:
    goldm_hp_dire = goldm_hp_dire + X_test.iloc[row, col]

  index_deathsm_hp_dire = list(range(188+238,193+238))
  deathsm_hp_rad = 0
  for col in index_deathsm_hp_rad:
    deathsm_hp_rad = deathsm_hp_rad + X_test.iloc[row, col]

  index_damagem_hp_dire = list(range(193+238,198+238))
  damagem_hp_rad = 0
  for col in index_damagem_hp_rad:
    damagem_hp_rad = damagem_hp_rad + X_test.iloc[row, col]

  index_killm_hp_dire = list(range(198+238,203+238))
  killm_hp_dire = 0
  for col in index_killm_hp_dire:
    killm_hp_dire = killm_hp_dire + X_test.iloc[row, col]

  index_assistm_hp_dire = list(range(203+238,208+238))
  assistm_hp_dire = 0
  for col in index_assistm_hp_dire:
    assistm_hp_dire = assistm_hp_dire + X_test.iloc[row, col]

  index_damagem_hp_dire = list(range(208+238,213+238))
  herodamagem_hp_dire = 0
  for col in index_damagem_hp_dire:
    herodamagem_hp_dire = herodamagem_hp_dire + X_test.iloc[row, col]

  index_healm_hp_dire = list(range(213+238,218+238))
  healm_hp_dire = 0
  for col in index_healm_hp_dire:
    healm_hp_dire = healm_hp_dire + X_test.iloc[row, col]


  # new_row = [np.nan] + [np.nan] + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]
  # new_row = list(X_test.iloc[row, 0:18]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [damagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [damagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]
  
  new_row = [sum_hero_rad] + [sum_hero_dire] + list(X_test.iloc[row, 238:256]) + [sum_base_str_rad] + [sum_base_agi_rad] + [sum_base_int_rad] + [sum_str_gain_rad] + [sum_agi_gain_rad] + [sum_int_gain_rad] + [sum_base_health_rad] + [sum_base_health_reg_rad] + [sum_move_speed_rad] + [sum_base_str_dire] + [sum_base_agi_dire] + [sum_base_int_dire] + [sum_str_gain_dire] + [sum_agi_gain_dire] + [sum_int_gain_dire] + [sum_base_health_dire] + [sum_base_health_reg_dire] + [sum_move_speed_dire] + [sum_winrate_rad] + [sum_winrate_dire] + [winrate_player_rad] + [winrate_player_dire] + [winrate_hp_rad] + [xpm_hp_rad] + [goldm_hp_rad] + [deathsm_hp_rad] + [damagem_hp_rad] + [killm_hp_rad] + [assistm_hp_rad] + [herodamagem_hp_rad] + [healm_hp_rad] + [winrate_hp_dire] + [xpm_hp_dire] + [goldm_hp_dire] + [deathsm_hp_dire] + [damagem_hp_dire] + [killm_hp_dire] + [assistm_hp_dire] + [herodamagem_hp_dire] + [healm_hp_dire] + [X_test.iloc[row,198]]


  print(len(new_row))
  X_test_new.loc[row] = new_row



In [107]:
# # Modify X_test accordingly
# X_test_new = pd.DataFrame(columns=new_columns)
# print(X_test_new.shape)
# for row in range(len(X_test)):
#   print(row)
#   new_row = [np.nan] + [np.nan] + list(X_test.iloc[row, 18:])
#   print(len(new_row))
#   # shap_values_new.append(pd.Series(new_row, index=shap_values_new.columns[:len(new_row)]), ignore_index=True)
#   X_test_new.loc[row] = new_row
X_test_new = X_test_new.fillna(X_test_new.median())
X_test_new.shape

(552, 61)

In [0]:
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
# plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["font.weight"] = "bold"

fig = shap.summary_plot(shap_values_new, X_test_new, show=False)  
plt.xlabel('')
plt.xticks(fontsize=18)
plt.yticks(fontsize=22)
plt.savefig('shapvalues_score_bold.pdf', bbox_inches = "tight")

In [0]:
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
# plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["font.weight"] = "bold"
fig = shap.summary_plot(shap_values_new, X_test_new, plot_type="bar", show=False) 
size = plt.gcf().get_size_inches()
print(size) 
plt.gcf().set_size_inches(9, 13)

# plt.xlabel('Impact on Radiant\'s victory', fontsize=22)
plt.xlabel('')
# plt.tick_params(
#     axis='x',          # changes apply to the x-axis
#     which='both',      # both major and minor ticks are affected
#     bottom=False)
plt.xticks(fontsize=21)
plt.yticks(fontsize=24)
plt.savefig('shapvalues_score_barplot_bold.pdf', bbox_inches = "tight")