# XAI de los modelos expected goals

Imports

In [None]:
import pickle
from sklearn.model_selection import train_test_split
import pandas as pd
import sklearn
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import warnings
warnings.simplefilter('ignore')

Cargar modelo

In [None]:
playername = "Lionel Andrés Messi Cuccittini"
#playername = "Luis Alberto Suárez Díaz"

In [None]:
filename_model = "xG_model_"+ playername.replace(" ", "_")
model = pickle.load(open(filename_model, 'rb'))

In [None]:
datasetname = "data/dataset_" + playername.replace(" ", "_") + ".csv"
dataset = pd.read_csv(datasetname)
dataset.head()

In [None]:
dataset = pd.read_csv("data/all_shots.csv")
dataset = dataset[dataset["player"]==playername]
dataset.head(20)

In [None]:
X = dataset.iloc[:,0:-1]
Y = dataset.iloc[:,-1]

## Explicación local

In [None]:
def calculate_distance(location):
    porteria1 = [0, 40]
    porteria2 = [120, 40]
    d1 = math.sqrt(((location[0]-porteria1[0])**2)+((location[1]-porteria1[1])**2))
    d2 = math.sqrt(((location[0]-porteria2[0])**2)+((location[1]-porteria2[1])**2))
    return min(d1,d2)

def calculate_angle(location):
    x=min((120-location[0]),(location[0]))
    y=(40-location[1])
    tan_angle = ((7.32*x)/((x**2)+(y**2)-((7.32/2)**2)))  #7.32 es el tamaño de la porteria
    angle = np.arctan(tan_angle) # en radianes
    if angle < 0:
        angle = np.pi + angle
    return angle

In [None]:
under_pressure = 0
follows_dribble = 0
one_on_one = 0
first_time = 0
defenders_between = 1
Corner = 0
Free_Kick = 0
Open_Play = 0
Penalty = 1
Backheel = 0
Diving_Header = 0
Half_Volley = 0
Lob = 0
Normal = 1
Overhead_Kick = 0
Volley = 0
Head = 0
Other = 0
preferred_foot = 1
not_preferred_foot = 0
x = 11
y = 38
distance = calculate_distance([x,y])                               
angle = calculate_angle([x,y])   

In [None]:
shot = pd.DataFrame(np.array([[distance, angle, under_pressure, follows_dribble, one_on_one, first_time, defenders_between, 
                 Corner, Free_Kick, Open_Play, Penalty, Backheel, Diving_Header, Half_Volley, Lob, Normal, 
                 Overhead_Kick, Volley, Head, Other, preferred_foot, not_preferred_foot]]),
               columns=["Distance", "Angle", "Under_pressure", "Follows_dribble", "One_on_one", "First_time", "Defenders_between", 
                 "Corner", "Free_Kick", "Open_Play", "Penalty", "Backheel", "Diving_Header", "Half_Volley", "Lob", "Normal", 
                 "Overhead_Kick", "Volley", "Head", "Other", "Preferred_foot", "Not_preferred_foot"])

result = model.predict_proba(shot)
print(result[0][1])

### SHAP

In [None]:
import shap
from shap import Explainer
from aix360.algorithms.shap import KernelExplainer

shap.initjs()

Representación de una sola predicción

In [None]:
explainer = shap.Explainer(model.predict, X)
shap_values = explainer(shot)

In [None]:
shapexplainer = KernelExplainer(model.predict_proba, X)
shap_values = shapexplainer.explain_instance(shot.iloc[0])
shap.force_plot(shapexplainer.explainer.expected_value[1], shap_values[1], shot.iloc[0])

### LIME

In [None]:
import lime
from lime import lime_tabular

In [None]:
explainer = lime_tabular.LimeTabularExplainer(
    training_data=np.array(X),
    feature_names=X.columns,
    class_names=['No goal', 'Goal'],
    mode='classification'
)

In [None]:
exp = explainer.explain_instance(
    data_row=shot.iloc[0], 
    predict_fn=model.predict_proba,
    num_features=22
)

exp.show_in_notebook(show_table=True)

## Explicación global

### Propio Modelo

In [None]:
print(model.feature_importances_)
importances = model.feature_importances_

indices = np.argsort(importances)
features = X.columns
plt.figure(figsize=(12, 8))
plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.xlabel('Relative Importance')
plt.show()

### SHAP

In [None]:
explainer = shap.Explainer(model.predict, X)
shap_values = explainer(X)

In [None]:
shap.plots.bar(shap_values, max_display=22)

In [None]:
shap.plots.beeswarm(shap_values, max_display=22)