# Calculating SHAP values

## Loading libraries, data and model

In [None]:
#library(devtools)
#devtools::install_github("ModelOriented/treeshap")

In [None]:
library(ranger)
library(treeshap)
library(dplyr)

In [None]:
model <- readRDS('./model/model.rds')
df <- read.csv('./data/raw_data.csv')[,-1]
df_preprocessed <- read.csv('./data/preprocessed_data.csv')

In [None]:
model_uni <- ranger.unify(model, df_preprocessed)
rm(model)
model <- model_uni

In [None]:
subset_base <- df %>% filter(result != "OwnGoal") # first step from preprocessing, different row numbers

In [None]:
saveRDS(model, './model/unfied_model_treeshap.RDS')

In [None]:
dir_name <- './results'

if (file.exists(dir_name)) {
 cat("The folder already exists")
} else {
 dir.create(dir_name)
}

In [None]:
#model <- readRDS('./model/unfied_model_treeshap.RDS')

## Robert Lewandowski

### Lewandowski's shots in 2021

In [None]:
dir_name <- './results/lewandowski2021'

if (file.exists(dir_name)) {
 cat("The folder already exists")
} else {
 dir.create(dir_name)
}

#### Extracting data

In [None]:
subset <- subset_base

In [None]:
subset <- subset[subset$player == 'Robert Lewandowski' & subset$season == 2021,]
head(subset)

Unnamed: 0_level_0,league,id,minute,result,X,Y,player,h_a,player_id,situation,season,shotType,match_id,home_team,away_team,home_goals,away_goals,date,player_assisted,lastAction
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>
217489,Bundesliga,425533,11,BlockedShot,0.874,0.543,Robert Lewandowski,a,227,OpenPlay,2021,Head,17516,Borussia M.Gladbach,Bayern Munich,1,1,2021-08-13 18:30:00,Josip Stanisic,Aerial
217490,Bundesliga,425534,13,SavedShot,0.85,0.443,Robert Lewandowski,a,227,OpenPlay,2021,RightFoot,17516,Borussia M.Gladbach,Bayern Munich,1,1,2021-08-13 18:30:00,Leon Goretzka,HeadPass
217491,Bundesliga,425536,25,SavedShot,0.947,0.472,Robert Lewandowski,a,227,OpenPlay,2021,RightFoot,17516,Borussia M.Gladbach,Bayern Munich,1,1,2021-08-13 18:30:00,Alphonso Davies,Throughball
217497,Bundesliga,425543,41,Goal,0.917,0.429,Robert Lewandowski,a,227,FromCorner,2021,RightFoot,17516,Borussia M.Gladbach,Bayern Munich,1,1,2021-08-13 18:30:00,Joshua Kimmich,Cross
217499,Bundesliga,425545,51,SavedShot,0.916,0.542,Robert Lewandowski,a,227,OpenPlay,2021,Head,17516,Borussia M.Gladbach,Bayern Munich,1,1,2021-08-13 18:30:00,Leroy Sané,Cross
217502,Bundesliga,425550,59,SavedShot,0.906,0.493,Robert Lewandowski,a,227,OpenPlay,2021,RightFoot,17516,Borussia M.Gladbach,Bayern Munich,1,1,2021-08-13 18:30:00,,


In [None]:
rows <- rownames(subset)

In [None]:
subset <- df_preprocessed[rows,]

In [None]:
head(subset)

Unnamed: 0_level_0,status,minute,h_a_a,h_a_h,situation_DirectFreekick,situation_FromCorner,situation_OpenPlay,situation_Penalty,situation_SetPiece,shotType_Head,⋯,lastAction_Smother,lastAction_Standard,lastAction_Start,lastAction_SubstitutionOff,lastAction_SubstitutionOn,lastAction_Tackle,lastAction_TakeOn,lastAction_Throughball,distanceToGoal,angleToGoal
Unnamed: 0_level_1,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>
217489,0,11,1,0,0,0,1,0,0,1,⋯,0,0,0,0,0,0,0,0,13.950076,28.1214
217490,0,13,1,0,0,0,1,0,0,0,⋯,0,0,0,0,0,0,0,0,15.92821,25.6294
217491,0,25,1,0,0,0,1,0,0,0,⋯,0,0,0,0,0,0,0,1,5.579648,66.4711
217497,1,41,1,0,0,1,0,0,0,0,⋯,0,0,0,0,0,0,0,0,9.328819,40.90552
217499,0,51,1,0,0,0,1,0,0,1,⋯,0,0,0,0,0,0,0,0,9.83703,37.75397
217502,0,59,1,0,0,0,1,0,0,0,⋯,0,0,0,0,0,0,0,0,9.922979,40.34067


In [None]:
nrow(subset)

In [None]:
write.csv(subset, paste0(dir_name, '/subset.csv'))

#### Calculating SHAP values

In [None]:
lewandowski2021_shap <- treeshap(model, subset[,-1], verbose = 0)

In [None]:
head(lewandowski2021_shap$shaps)

Unnamed: 0_level_0,minute,h_a_a,h_a_h,situation_DirectFreekick,situation_FromCorner,situation_OpenPlay,situation_Penalty,situation_SetPiece,shotType_Head,shotType_LeftFoot,⋯,lastAction_Smother,lastAction_Standard,lastAction_Start,lastAction_SubstitutionOff,lastAction_SubstitutionOn,lastAction_Tackle,lastAction_TakeOn,lastAction_Throughball,distanceToGoal,angleToGoal
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
217489,0.001035728,0.0004386425,0.00058202,0.001168599,0.01088093,0.01217071,-0.00403832,0.0019870725,-0.07454676,-0.019814626,⋯,7.528327e-09,-0.0045171318,-1.039691e-06,-1.864357e-06,6.984829e-06,-4.426245e-05,-0.007987182,-0.010268416,0.022979145,0.04255906
217490,-0.005038426,0.0047786315,0.0067074175,0.002602411,0.0196569,0.02641175,-0.003311923,0.0058518312,0.03204962,0.006790312,⋯,1.012671e-08,-0.0009141661,-2.739376e-06,-4.51107e-07,1.425498e-05,0.0001006455,-0.014268776,-0.014358049,0.007860345,0.1616914
217491,0.003243939,0.0011892264,0.0013496792,0.003977921,0.01138241,0.01124802,-0.001829396,0.0014118751,0.01245018,-0.003648802,⋯,9.249413e-09,0.0030394864,-1.157782e-06,-9.009708e-07,7.423778e-05,6.290986e-05,-0.002066578,0.205402072,0.178358763,0.18236773
217497,0.003036836,0.0096602815,0.0094989944,0.002413834,-0.12750911,-0.0815398,-0.004922878,-0.0061104994,0.03006627,-0.008358721,⋯,6.925549e-09,-0.0042974596,-5.190989e-07,-2.524528e-07,2.36102e-05,-2.266016e-05,-0.003086384,-0.006421726,0.156157479,0.19677638
217499,0.001241315,0.0021854134,-0.0019083355,0.002350423,0.04554099,0.02930637,-0.003119534,-0.0005733064,-0.06040176,-0.019014947,⋯,7.446855e-09,-0.0013642777,-6.78995e-07,-2.092006e-06,3.650588e-05,4.389292e-05,-0.00367794,-0.006943638,0.190228053,0.18511915
217502,0.006207708,-0.0003837765,-0.0006929247,0.002661293,0.0216963,0.02424647,-0.002696922,0.0022867348,0.04885776,-0.006207005,⋯,1.052285e-08,-0.0008258102,-6.221731e-07,-6.537083e-07,0.0001042109,2.658383e-05,-0.004834232,-0.007125568,0.222738165,0.2554071


In [None]:
saveRDS(lewandowski2021_shap, paste0(dir_name, '/treeshap_output.RDS'))
write.csv(lewandowski2021_shap$shaps, paste0(dir_name, '/treeshap_shaps.csv'))

### Lewandowski's shots in 2020

In [None]:
dir_name <- './results/lewandowski2020'

if (file.exists(dir_name)) {
 cat("The folder already exists")
} else {
 dir.create(dir_name)
}

#### Extracting data

In [None]:
subset <- subset_base

In [None]:
subset <- subset[subset$player == 'Robert Lewandowski' & subset$season == 2020,]
head(subset)

Unnamed: 0_level_0,league,id,minute,result,X,Y,player,h_a,player_id,situation,season,shotType,match_id,home_team,away_team,home_goals,away_goals,date,player_assisted,lastAction
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<chr>
225397,Bundesliga,378394,4,BlockedShot,0.914,0.566,Robert Lewandowski,h,227,OpenPlay,2020,RightFoot,14173,Bayern Munich,Schalke 04,8,0,2020-09-18 18:30:00,Leroy Sané,Pass
225399,Bundesliga,378396,12,BlockedShot,0.852,0.396,Robert Lewandowski,h,227,OpenPlay,2020,RightFoot,14173,Bayern Munich,Schalke 04,8,0,2020-09-18 18:30:00,Leroy Sané,Pass
225401,Bundesliga,378399,20,MissedShots,0.861,0.418,Robert Lewandowski,h,227,OpenPlay,2020,LeftFoot,14173,Bayern Munich,Schalke 04,8,0,2020-09-18 18:30:00,Leroy Sané,Pass
225403,Bundesliga,378401,30,Goal,0.885,0.5,Robert Lewandowski,h,227,Penalty,2020,RightFoot,14173,Bayern Munich,Schalke 04,8,0,2020-09-18 18:30:00,,Standard
225832,Bundesliga,379517,69,MissedShots,0.888,0.684,Robert Lewandowski,a,227,OpenPlay,2020,RightFoot,14189,Hoffenheim,Bayern Munich,4,1,2020-09-27 13:30:00,Leroy Sané,BallTouch
226050,Bundesliga,381402,8,BlockedShot,0.85,0.642,Robert Lewandowski,h,227,OpenPlay,2020,RightFoot,15166,Bayern Munich,Hertha Berlin,4,3,2020-10-04 16:00:00,Leon Goretzka,BallTouch


In [None]:
rows <- rownames(subset)
subset <- df_preprocessed[rows,]
write.csv(subset, paste0(dir_name, '/subset.csv'))

#### Calculating SHAP values

In [None]:
lewandowski2020_shap <- treeshap(model, subset[,-1], verbose = 0)

In [None]:
saveRDS(lewandowski2020_shap, paste0(dir_name, '/treeshap_output.RDS'))
write.csv(lewandowski2020_shap$shaps, paste0(dir_name, '/treeshap_shaps.csv'))

### Creating a function to do tasks above at once

It is available in `./scripts/calculate_treeshap.R`.

In [None]:
source('./scripts/calculate_treeshap.R')

### Lewandowski's shots in 2019

In [None]:
subset <- subset_base
subset <- subset[subset$player == 'Robert Lewandowski' & subset$season == 2019,]
rows <- rownames(subset)
subset <- df_preprocessed[rows,]

In [None]:
calculate_treeshap(model, subset, './results/lewandowski2019', 'status')

## New task

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=638a36e2-efff-486f-858d-cbca546da2c6' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>