## Setup

In [None]:
### SETUP ###
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
import random
from plyer import notification
from scipy.stats import percentileofscore
import matplotlib.pyplot as plt

pd.set_option('future.no_silent_downcasting', True)

data = joblib.load("../data/pkls/df_dict.pkl")

# Load necessary data

players = data['players']
teams = data['teams']

team_medic_stats = data['team_medic_stats']
info = data['info']
class_kda = data['class_kda']b

In [None]:
class_names = ['medic','demoman','soldier','scout']

real_class_kda = class_kda[['steamid'] + [col for col in class_kda.columns if 
           any(class_name in col for class_name in class_names)]].copy()

players = players.merge(real_class_kda,on = ['steamid'])


Unnamed: 0,id,steamid,primary_class,primary_class_time,name,assists,cpc,dapd,dapm,deaths,...,scout_kills,soldier_kills,demoman_deaths,scout_deaths,soldier_deaths,demoman_assists,medic_assists,scout_assists,soldier_assists,medic_deaths
0,3886768,[U:1:113283780],medic,1865,loafe,5,1,69,8,4,...,,,1.0,1.0,2.0,0.0,0.0,3.0,1.0,
1,3886768,[U:1:131704728],medic,1865,sushi,8,3,47,6,4,...,,,2.0,0.0,2.0,0.0,1.0,2.0,4.0,
2,3886768,[U:1:136473263],soldier,1865,Math,8,3,290,168,18,...,3.0,1.0,2.0,8.0,6.0,4.0,1.0,5.0,6.0,
3,3886768,[U:1:159413592],scout,1862,TIGER BALM,10,6,700,202,9,...,2.0,8.0,2.0,6.0,1.0,2.0,2.0,4.0,12.0,
4,3886768,[U:1:185989865],soldier,1840,gabby,4,1,700,247,11,...,2.0,6.0,3.0,5.0,3.0,3.0,2.0,2.0,8.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53637,1764254,[U:1:44119927],pyro,263,Kevin | Gas Station Soda - 99¢,,,253,173,3,...,1.0,1.0,0.0,0.0,0.0,,0.0,1.0,1.0,
53638,1764254,[U:1:58753664],spy,263,Ambluj,,,439,602,6,...,0.0,1.0,0.0,0.0,2.0,,2.0,0.0,1.0,
53639,1764254,[U:1:78449395],sniper,263,Kevin | ProfessorParsnips,,,414,283,3,...,2.0,0.0,0.0,0.0,0.0,,1.0,2.0,0.0,
53640,1764254,[U:1:82268202],scout,263,Serru,2,2,436,397,4,...,0.0,0.0,0.0,3.0,0.0,,1.0,2.0,0.0,


In [22]:
real_class_kda

Unnamed: 0,steamid,demoman_kills,medic_kills,scout_kills,soldier_kills,demoman_deaths,scout_deaths,soldier_deaths,demoman_assists,medic_assists,scout_assists,soldier_assists,medic_deaths
0,[U:1:113283780],,,,,1.0,1.0,2.0,0.0,0.0,3.0,1.0,
1,[U:1:131704728],,,,,2.0,0.0,2.0,0.0,1.0,2.0,4.0,
2,[U:1:136473263],4.0,0.0,3.0,1.0,2.0,8.0,6.0,4.0,1.0,5.0,6.0,
3,[U:1:159413592],1.0,0.0,2.0,8.0,2.0,6.0,1.0,2.0,2.0,4.0,12.0,
4,[U:1:185989865],3.0,1.0,2.0,6.0,3.0,5.0,3.0,3.0,2.0,2.0,8.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8,[U:1:44119927],,0.0,1.0,1.0,0.0,0.0,0.0,,0.0,1.0,1.0,
9,[U:1:58753664],,2.0,0.0,1.0,0.0,0.0,2.0,,2.0,0.0,1.0,
10,[U:1:78449395],,1.0,2.0,0.0,0.0,0.0,0.0,,1.0,2.0,0.0,
11,[U:1:82268202],,1.0,0.0,0.0,0.0,3.0,0.0,,1.0,2.0,0.0,


## Data Manips
- Detect if the teams follow the medic,2 scout, 2 soldier, 1 demo

In [118]:
team_comp = players.groupby(['id', 'team'])['primary_class'].agg(lambda x: ".".join(x)).reset_index(name='class_concat')

team_comps = (team_comp['class_concat'].str.split("."))

# Test if team_comp is correct
correct = []
for team in team_comps:
    if len(team) != 6:
        correct.append(0)
        continue
    demoman = 0
    soldier = 0 
    scout = 0
    medic = 0
    for class_name in team:
        if class_name == 'demoman':
            demoman += 1
        if class_name == 'soldier':
            soldier += 1
        if class_name == 'scout':
            scout += 1
        if class_name == 'medic':
            medic += 1
    if demoman == 1 and soldier == 2 and scout == 2 and medic == 1:
        correct.append(1)
    else:
        correct.append(0)

team_comp['correct'] = correct

team_comp = team_comp.groupby('id').agg(correct_team_comp = ('correct','sum'))

team_comp = team_comp[team_comp['correct_team_comp'] == 2]

players = players[players['id'].isin(team_comp.reset_index()['id'])]

- Remove short matches

In [119]:
short_matches = info[info['length'] < 450]

players = players[~players['id'].isin(short_matches['id'])]

- Remove non-valid maps

In [120]:
## Check if map is valid
# If the map doesnt have at least 50 plays, not valid.
# Sometimes people just upload as "sunshine" need to ensure there are only single maps

# Find valid map names
play_count = 50
info_correct = info[info['id'].isin(players['id'])].copy()
maps = info_correct['map'].str.lower().str.split("_")
map_counts = pd.Series(maps.str[1].value_counts())

valid_maps = map_counts[map_counts > play_count]
valid_map_names = valid_maps.index

# Grab the first and second word of the mapname
first_map_word_length = maps.str[0].str.split(" ").apply(len)
first_map_word = maps.str[0].str.split(" ").str[0]
second_map_word = maps.str[1]

first_map_length_check = first_map_word_length == 1
first_map_check = first_map_word.isin(valid_map_names)
second_map_check = second_map_word.isin(valid_map_names)

info_correct['map_check'] = (first_map_length_check & first_map_check) | (second_map_check)

correct_map = info_correct[info_correct['map_check'] == True].copy()

players = players[players['id'].isin(correct_map['id'])]

- Map name

In [121]:
map_list = []
for map in correct_map['map'].str.lower().values:
    for map_name in valid_map_names:
        if map_name in map:
            map_list.append(map_name)

correct_map['map_name'] = map_list

In [122]:
map_list = []
for map in correct_map['map'].str.lower().values:
    for map_name in valid_map_names:
        if map_name in map:
            map_list.append(map_name)

correct_map['map_name'] = map_list

- Fix class_names to have 1's and 2's

In [123]:
import random
# Copy the DataFrame so we don't overwrite the original
players_fixed = players.copy()

# Group by match id and team
grouped = players_fixed.groupby(['id', 'team'])

# Function to randomly rename duplicate classes within each group
def rename_classes_randomly(df):
    random.seed(123)
    np.random.seed(123)
    df = df.copy()  # avoid SettingWithCopyWarning
    for cls in ['scout', 'soldier']:
        indices = df.index[df['primary_class'] == cls].tolist()
        if len(indices) == 2:
            # Randomly shuffle the suffixes
            suffixes = [f"{cls}_1", f"{cls}_2"]
            random.shuffle(suffixes)
            for i, idx in enumerate(indices):
                df.at[idx, 'primary_class'] = suffixes[i]
    return df

# Apply function to each group
players_fixed = grouped.apply(rename_classes_randomly, include_groups=False).reset_index()

# Drop the redundant index column
players_fixed.drop('level_2', axis=1, inplace=True)


- Drop Appropriate Columns

In [124]:
drop_cols = [ 'primary_class_time', 'name',
       'assists', 'cpc','heal', 'hr','deaths', 'dmg', 'dmg_real', 'drops',
       'dt', 'dt_real','kills','medkits','medkits_hp','sentries', 
       'suicides','ka','offclass_time','total_time','kapd','ka','ka_pct','hroi',"dmg_real_pct",
       "dmg_pct",'suicide_rate']

players_fixed.drop(drop_cols,axis =1,inplace = True)

- Make the combat_players and medic_players datasets

In [None]:
combat_classes = ['scout', 'soldier', 'demoman']
pattern = '|'.join(combat_classes)  # Creates 'scout|soldier|demoman'
combat_players = players_fixed[players_fixed['primary_class'].str.contains(pattern, case=False, na=False)].copy()

# Make medic stats
medic_players = players_fixed[players_fixed['primary_class'] == 'medic'].copy()

# Bind in team_medic
medic_players = medic_players.merge(team_medic_stats,on= ['id','team'])


# Drop Bad columns
drop_medic = ['offclass_pct','hroi_real','hr_pct',
              'medicstats.advantages_lost','medicstats.deaths_with_95_99_uber',
       'medicstats.deaths_within_20s_after_uber', 'ubers', 'drops',
       'medic_deaths', 'exchanges_initiated', 'drops_forced',
       'successful_ubers', 'medic_deaths_forced', 'exchanges_not_initiated',
       'successful_uber_rate', 'forced_medic_death_rate', 'forced_drop_rate',
       'medic_deaths_capitalized', 'round_losing_medic_deaths',
       'round_losing_medic_death_rate', 'medic_death_capitalization_rate',
       'advantages_lost_per_round']

drop_combat = ['healpm']

combat_players.drop(drop_combat,axis = 1,inplace = True)

medic_players.drop(drop_medic,axis = 1,inplace = True)

# Make columns numeric and fillna with 0
non_numeric_columns = ['id', 'team', 'primary_class','steamid']
for df in [medic_players,combat_players]:
       for col in df.columns:
              if col in non_numeric_columns:
                     continue
              df[col] = pd.to_numeric(df[col])
       df.fillna(0,inplace=True)

# Remove medicstats. from colnames

medic_players.columns = [col.replace("medicstats.","") for col in medic_players.columns]
       

KeyError: "['healps'] not found in axis"

- Widen the datasets

In [None]:
index_columns = ['id', 'team', 'primary_class']
combat_wide = (
    combat_players
    .set_index(index_columns)  # MultiIndex
    .unstack('primary_class')                    # Pivot on class
)

medic_wide = (
    medic_players
    .set_index(index_columns)  # MultiIndex
    .unstack('primary_class')                        # Pivot on class
)

# Step 3: Flatten the MultiIndex column names
combat_wide.columns = [f"{cls}_{stat}" for stat, cls in combat_wide.columns]

# Step 4: Reset index
combat_wide = combat_wide.reset_index()

# Drop non-scout offclass 
cols = [col for col in combat_wide.columns if 'offclass' in col and 'scout' not in col]
combat_wide = combat_wide.drop(cols,axis = 1)

# Step 3: Flatten the MultiIndex column names
medic_wide.columns = [f"{cls}_{stat}" for stat, cls in medic_wide.columns]

# Step 4: Reset index
medic_wide = medic_wide.reset_index()



- Merge Combat, Medic, Mapname and Winner

In [None]:
# Remove duplicate columns
medic_merger = medic_wide.drop(['id','team'],axis =1)

# Merge medic and combat
players_wide = pd.concat([combat_wide,medic_merger],axis = 1)

players_wide = players_wide.merge(teams[['id','team','winner']],on =['id','team'])

- Make Model Datasets

In [None]:
drop_cols = ['id','team','winner'] + [col for col in players_wide if 'steamid' in col]

X = players_wide.drop(drop_cols,axis = 1).copy()

y = players_wide['winner']

- Rank Normalize Data

In [None]:
# Rank the scout and soldier data based on the entire dataset, not the pivot version
scout_soldier = X[[col for col in X.columns if 'scout' in col or 'soldier' in col]].copy()

# Turn the data into long
scout_soldier_long = pd.DataFrame()
for index in ['1','2']:
    df = scout_soldier[[col for col in scout_soldier.columns if index in col]].copy()
    df.columns = [col.replace("_"+index,"") for col in df.columns]
    df['num']= index
    scout_soldier_long = pd.concat([scout_soldier_long,df])

# Drop index and rank
num = scout_soldier_long['num']
scout_soldier_long.drop("num",axis =1,inplace = True)
ranked_scout_soldier = scout_soldier_long.rank(pct=True)

# Re attach index
ranked_scout_soldier['num'] = num

# Widen the datset again
scout_soldier = pd.DataFrame()
for index in ['1','2']:
    df = ranked_scout_soldier[ranked_scout_soldier['num'] == index].copy()
    df.drop('num',axis = 1,inplace = True)
    df.columns = [col + "_" + index for col in df.columns]
    scout_soldier = pd.concat([scout_soldier,df],axis = 1)


# Rank the medic and demo stats
medic_demo = X[[col for col in X.columns if 'scout' not in col and 'soldier' not in col]].copy()

medic_demo = medic_demo.rank(pct=True)

# Merge data back together

X = pd.concat([scout_soldier,medic_demo],axis = 1)

In [None]:
# # Map name
# team_maps = players_wide.merge(correct_map[['id','map_name']],on = 'id')['map_name']
# map_dummies = pd.get_dummies(team_maps)
# X = pd.concat([X,map_dummies],axis =1 )
# X = X.astype({col: bool for col in X.select_dtypes(include='object').columns})


In [None]:
corr_mat = X.corr().abs()

for i in corr_mat.index:
    for j in corr_mat.columns:
        if i == j:
            continue
        corr = corr_mat.loc[i,j]
        if corr > .75:
            s = f'{i} x {j}'
            print(s)

scout_dapd_1 x scout_kpd_1
soldier_dapd_1 x soldier_kpd_1
scout_kpd_1 x scout_dapd_1
soldier_kpd_1 x soldier_dapd_1
scout_dapd_2 x scout_kpd_2
soldier_dapd_2 x soldier_kpd_2
scout_kpd_2 x scout_dapd_2
soldier_kpd_2 x soldier_dapd_2
demoman_dapd x demoman_kpd
demoman_kpd x demoman_dapd
medic_dapd x medic_dapm
medic_dapm x medic_dapd
medic_kpd x medic_kill_pct
medic_kill_pct x medic_kpd
medic_deaths_pct x medic_dt_real_pct
medic_dt_real_pct x medic_deaths_pct


## Make a Da Model

In [None]:
seed = 123

In [None]:
# Set seeds
random.seed(seed)
np.random.seed(seed)

# Split into test and eval
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_test, X_eval, y_test, y_eval = train_test_split(X_test, y_test, test_size=0.3)



# Define the base model
model = XGBClassifier(eval_metric='logloss', random_state=seed)

# Define parameter grid to search over
param_grid = {
    'max_depth': np.arange(3, 6, 1),
    'learning_rate': [.15],
    'n_estimators': np.arange(100, 250, 10),
    'subsample': [.8],
    'colsample_bytree': [.75]
}

# Define cross-validation strategy
cv = StratifiedKFold(n_splits=6, shuffle=True, random_state=seed)

# Set up GridSearchCV   
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',   
    cv=cv,
    verbose=0,
    n_jobs=22
)

#grid_search.fit(X_train, y_train)

#best_model = grid_search.best_estimator_

#grid_search.best_params_



In [None]:
`import joblib
model = joblib.load("../data/pkls/xgb.pkl")

importance = pd.Series(model.feature_importances_,name = "importance")
feature_names = pd.Series(model.feature_names_in_,name = "name")
summary = pd.concat([feature_names,importance],axis = 1).sort_values(by = 'importance',ascending=False)

# Grab necessary vars
score = model.score(X_eval,y_eval)
probs = model.predict_proba(X_test)
probs = (probs[:,1])

# Assign values to dict 
summary['score'] = score

summary['importance_relative'] = summary['importance'] / summary['importance'].max()

summary`

In [105]:
minutes = (1 / (players['dapm']) )* players['dmg']

In [113]:
1 / np.nan

nan

In [101]:
summary.round(3)

Unnamed: 0,name,importance,score,importance_relative
52,demoman_kpd,0.075,0.886,1.000
4,scout_kpd_1,0.050,0.886,0.663
29,scout_kpd_2,0.049,0.886,0.645
50,demoman_dapd,0.046,0.886,0.614
5,soldier_kpd_1,0.033,0.886,0.445
...,...,...,...,...
41,soldier_dt_pct_2,0.006,0.886,0.078
13,scout_assists_pct_1,0.006,0.886,0.077
72,medic_medkits_hpps,0.006,0.886,0.076
39,soldier_assists_pct_2,0.006,0.886,0.076


## Make Per


### Bad Old Method

- Define columns to be set to 0 or 1

In [None]:
class_name_list = ['scout','scout','soldier','soldier','medic','demoman']
num_list = ['1','2','1','2','','']
cols_to_zero = ['dapd', 'dapm', 'kpd',
       'offclass_pct', 'kill_pct', 'cpc_pct', 'assists_pct',
       'hr_pct', 'hroi_real', 'medkits_hpps','avg_uber_length','uberspm','healps']

cols_to_one = ['dt_pct','dt_real_pct','deaths_pct',
               'avg_time_before_healing','avg_time_before_using',
                 'avg_time_to_build','biggest_advantage_lost',
                 'deaths_with_95_99_uber_rate',
                 'deaths_within_20s_after_uber_rate', 'drops_rate']


def remove_player(df,num,class_name):
    df = df.copy()
    for col in df.columns:
        if class_name not in col or num not in col:
            continue

        if any(name in col for name in cols_to_zero):
            df[col] = .02
        elif any(name in col for name in cols_to_one):
            df[col] = .98
    return df

remove_player_probs = pd.DataFrame()

probs = model.predict_proba(X_test)
probs = probs[:,1]
probs = pd.Series(probs,name = "prediction")

remove_player_probs = pd.concat([remove_player_probs,probs],axis = 1)


for class_name,num in zip(class_name_list,num_list):

    name = "prediction_no_" + class_name + "_" + num
    name2 = "pct_no_" + class_name + "_" + num
    X_test_player_remove = remove_player(df = X_test,class_name = class_name,num = num)

    player_removed_probs = model.predict_proba(X_test_player_remove)

    player_removed_probs = player_removed_probs[:,1]

    player_removed_probs = pd.Series(player_removed_probs,name = name)

    player_removed_probs_pct = pd.Series(player_removed_probs / remove_player_probs['prediction'],name = name2)

    remove_player_probs = pd.concat([remove_player_probs,
                                     player_removed_probs,
                                     player_removed_probs_pct],
                                     axis = 1)



### New Per Method

In [59]:
import shap

explainer = shap.TreeExplainer(model)

In [63]:
class_name_list = ['scout','scout','soldier','soldier','medic','demoman']
num_list = ['1','2','1','2','','']


sum_by_class_list = []

for df in [X_test,X_eval]:
    shap_values = explainer.shap_values(df)

    shap_values = pd.DataFrame(shap_values)
    shap_values.columns = df.columns

    shap_values.drop([col for col in shap_values.columns if col in valid_map_names],axis = 1,inplace = True)

    class_names = []

    shap_values_flip = shap_values.T.copy()

    for index in shap_values_flip.index:
        for class_name,num in zip(class_name_list,num_list):
            if class_name in index and num in index:
                class_names.append(class_name + num)
    shap_values.drop([col for col in shap_values.columns if col in valid_map_names],axis = 1,inplace = True)

    shap_values_flip['test'] = class_names

    sum_by_class = shap_values_flip.groupby('test').sum().T
    sum_by_class_list.append(sum_by_class)


def get_quantile_series(reference_series, target_series):
    return target_series.apply(lambda x: percentileofscore(reference_series, x, kind='mean') / 100)

quantiled_df = pd.DataFrame({
    col: get_quantile_series(sum_by_class_list[0][col], sum_by_class_list[1][col])
    for col in sum_by_class_list[1].columns
})



In [64]:
per_test = sum_by_class_list[0].rank(pct = True)
per_test['winner'] = y_test.values

win_per = per_test[per_test['winner']==1]

In [70]:
per_test

test,demoman,medic,scout1,scout2,soldier1,soldier2,winner
0,0.162584,0.893096,0.971047,0.524499,0.945434,0.287305,1
1,0.066815,0.805122,0.167038,0.111359,0.358575,0.195991,0
2,0.992205,0.537862,0.592428,0.864143,0.874165,0.848552,1
3,0.063474,0.567929,0.542316,0.319599,0.164811,0.086860,0
4,0.148107,0.345212,0.184855,0.493318,0.422049,0.783964,0
...,...,...,...,...,...,...,...
893,0.427617,0.799555,0.996659,0.515590,0.997773,0.281737,1
894,0.756125,0.369710,0.583519,0.369710,0.135857,0.014477,0
895,0.255011,0.436526,0.400891,0.044543,0.158129,0.488864,0
896,0.428731,0.135857,0.563474,0.604677,0.371938,0.339644,1


In [78]:
per_df

Unnamed: 0,demoman,medic,scout1,scout2,soldier1,soldier2,winner
0,3.71,6.89,5.02,4.97,9.96,6.38,1
1,3.02,3.01,3.85,5.17,7.57,3.27,0
2,6.53,1.69,8.46,0.16,1.20,0.13,0
3,8.24,6.65,4.27,5.37,8.05,8.52,1
4,6.49,6.35,5.01,3.94,6.80,4.87,1
...,...,...,...,...,...,...,...
380,7.69,8.69,8.01,5.07,4.06,6.98,1
381,7.86,8.94,2.63,9.61,4.89,5.18,1
382,7.69,7.62,0.23,5.31,6.29,7.35,0
383,0.91,2.23,3.07,3.52,4.02,2.14,0


In [92]:
testing = per_df.drop('winner',axis = 1).T

PIM_mean = testing.mean()

PIM_mean = pd.concat([PIM_mean,per_df['winner']],axis =1)

PIM_mean.groupby('winner').mean()

Unnamed: 0_level_0,0
winner,Unnamed: 1_level_1
0,3.572484
1,6.46093


In [79]:
len(X_eval) / len(X)

0.060043668122270744

In [65]:
per_df = (quantiled_df * 10).round(2)
per_df['winner'] = y_eval.values
per_df.groupby("winner").mean()

Unnamed: 0_level_0,demoman,medic,scout1,scout2,soldier1,soldier2
winner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3.365441,3.591422,3.45598,3.605931,3.796176,3.619951
1,6.722044,6.024254,6.259006,6.406519,6.930331,6.423425


In [None]:
s = "qseqSA"
s.lower()

'qseqsa'

: 

In [2]:
import shap
import joblib
from sklearn.model_selection import train_test_split
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import percentileofscore
import os
#os.chdir(os.path.dirname(os.path.abspath(__file__)))

# region Setup
# Read in Data 
model_ready_data_dict = joblib.load('../data/pkls/model_ready_data_dict.pkl')
X = model_ready_data_dict['X']
y = model_ready_data_dict['y']

model = joblib.load('../data/pkls/xgb.pkl')



# Set seeds
seed = 123
random.seed(seed)
np.random.seed(seed)


# Split into test and eval
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


X_test, X_eval, y_test, y_eval = train_test_split(X_test, y_test, test_size=0.3)
# endregiopn

# region Shap Explainer Sum by Class

explainer = shap.TreeExplainer(model)


In [None]:
shap_values = explainer.shap_values(X_test)


: 

In [2]:
class_name_list = ['scout','scout','soldier','soldier','medic','demoman']
num_list = ['1','2','1','2','','']


sum_by_class_list = []

for df in [X_test,X_eval]:
    shap_values = explainer.shap_values(df)

    shap_values = pd.DataFrame(shap_values)
    shap_values.columns = df.columns

   # shap_values.drop([col for col in shap_values.columns if col in valid_map_names],axis = 1,inplace = True)

    class_names = []

    shap_values_flip = shap_values.T.copy()

    for index in shap_values_flip.index:
        for class_name,num in zip(class_name_list,num_list):
            if class_name in index and num in index:
                class_names.append(class_name + num)
 #   shap_values.drop([col for col in shap_values.columns if col in valid_map_names],axis = 1,inplace = True)

    shap_values_flip['test'] = class_names

    sum_by_class = shap_values_flip.groupby('test').sum().T
    sum_by_class_list.append(sum_by_class)

# endregion

# Generate the shap value sums by class on test set
# Use those to generate the PIM's on the eval set by looking at the 
# Quantiles of the test set

# region Generate PIM

# function to use another series to generate quantiles
def get_quantile_series(reference_series, target_series):
    return target_series.apply(lambda x: percentileofscore(reference_series, x, kind='mean') / 100)

quantiled_df = pd.DataFrame({
    col: get_quantile_series(sum_by_class_list[0][col], sum_by_class_list[1][col])
    for col in sum_by_class_list[1].columns
})


PIM_df = (quantiled_df * 10).round(2)
PIM_df['winner'] = y_eval.values


: 