In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from interpret.glassbox import ExplainableBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from time import time

pitch = pd.read_excel("reduced_delanteros.xlsx")

numeric_vars = ['Transfer_value', 'Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC', 'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions', 'GCA_BC', 'Yellow_cards', 'Red_cards']
pitch[numeric_vars] = StandardScaler().fit_transform(pitch[numeric_vars])

In [3]:
ind_vars = ['Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC',
       'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions',
       'GCA_BC', 'Yellow_cards', 'Red_cards', 'Reduced_team_from_Inter',
       'Reduced_team_from_AC Milan', 'Reduced_team_from_Juventus',
       'Reduced_team_from_Napoli', 'Reduced_team_from_Paris SG',
       'Reduced_team_from_Real Madrid', 'Reduced_team_from_Barcelona',
       'Reduced_team_from_Atlético Madrid', 'Reduced_team_from_Bayern Munich',
       'Reduced_team_from_Bor. Dortmund', 'Reduced_team_from_Chelsea',
       'Reduced_team_from_Arsenal', 'Reduced_team_from_Man City',
       'Reduced_team_from_Man Utd', 'Reduced_team_from_Liverpool', 'Reduced_country_from_Inglaterra',
       'Reduced_country_from_Francia', 'Reduced_country_from_España',
       'Reduced_country_from_Italia', 'Reduced_country_from_Alemania',
       'Reduced_country_from_Portugal', 'Reduced_country_from_Países Bajos',
       'Reduced_country_from_Argentina', 'Reduced_country_from_Brasil']

In [4]:
time0 = time()

X = pitch[ind_vars]  # Asegúrate de usar las variables significativas
y = pitch['Transfer_value']  # Variable dependiente

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

ebm = ExplainableBoostingRegressor(random_state=0)
ebm.fit(X_train, y_train)

y_pred = ebm.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R² del modelo: {r2:.4f}")
print(f"MSE del modelo: {mse:.4f}")

print(time() - time0)

R² del modelo: 0.5027
MSE del modelo: 0.4052
167.21953320503235


In [5]:
global_explanation = ebm.explain_global()

importance_df = pd.DataFrame({
    'Variable': global_explanation.data()['names'],
    'Importancia': global_explanation.data()['scores']
}).sort_values(by='Importancia', ascending=False)

print(importance_df.head(20))

                                           Variable  Importancia
9                                            GCA_BC     0.180796
1                                               Age     0.154767
0                                         Exp_contr     0.144197
4                                          NP_goals     0.127532
3                                     Minutes_pl_BC     0.098606
7                                           Assists     0.082677
27                  Reduced_country_from_Inglaterra     0.080018
6                                      Pen_goals_BC     0.048602
2                                        Minutes_pl     0.040199
32                    Reduced_country_from_Portugal     0.030076
5                                         Pen_goals     0.024933
8                                     Interceptions     0.024195
28                     Reduced_country_from_Francia     0.020389
33                Reduced_country_from_Países Bajos     0.019326
11                       