In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

# Cargar datos
pitch = pd.read_excel("reduced_centrocampistas.xlsx")

# sqrt_vars = ['Interceptions', 'Yellow_cards', 'Transfer_value', 'Exp_contr', 'Minutes_pl_BC', 'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'GCA_BC', 'Red_cards']
# for var in sqrt_vars: 
#     pitch[var] = pitch[var].apply(np.sqrt)


# Escalar variables numéricas
numeric_vars = ['Transfer_value', 'Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC', 'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions', 'GCA_BC', 'Yellow_cards', 'Red_cards']
pitch[numeric_vars] = StandardScaler().fit_transform(pitch[numeric_vars])

# Definir X e Y


In [2]:
ind_vars = ['Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC',
       'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions',
       'GCA_BC', 'Yellow_cards', 'Red_cards', 'Reduced_team_from_Inter',
       'Reduced_team_from_AC Milan', 'Reduced_team_from_Juventus',
       'Reduced_team_from_Napoli', 'Reduced_team_from_Paris SG',
       'Reduced_team_from_Real Madrid', 'Reduced_team_from_Barcelona',
       'Reduced_team_from_Atlético Madrid', 'Reduced_team_from_Bayern Munich',
       'Reduced_team_from_Bor. Dortmund', 'Reduced_team_from_Chelsea',
       'Reduced_team_from_Arsenal', 'Reduced_team_from_Man City',
       'Reduced_team_from_Man Utd', 'Reduced_team_from_Liverpool', 'Reduced_country_from_Inglaterra',
       'Reduced_country_from_Francia', 'Reduced_country_from_España',
       'Reduced_country_from_Italia', 'Reduced_country_from_Alemania',
       'Reduced_country_from_Portugal', 'Reduced_country_from_Países Bajos',
       'Reduced_country_from_Argentina', 'Reduced_country_from_Brasil']

In [3]:
X = pitch[ind_vars]
Y = pitch["Transfer_value"]

# Definir el modelo base
elastic = ElasticNet(max_iter=10000)

# Definir la malla de hiperparámetros
param_grid = {
    'alpha': np.logspace(-4, 1, 10),       # regularización
    'l1_ratio': np.linspace(0.1, 0.9, 9)   # mezcla L1/L2
}

# Grid search con validación cruzada
grid_search = GridSearchCV(estimator=elastic,
                           param_grid=param_grid,
                           scoring='r2',
                           cv=5,
                           n_jobs=-1)

# Ajustar
grid_search.fit(X, Y)

# Resultados
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X)
r2 = best_model.score(X, Y)
mse = mean_squared_error(Y, y_pred)

print("Mejor alpha:", grid_search.best_params_['alpha'])
print("Mejor l1_ratio:", grid_search.best_params_['l1_ratio'])
print(f"R^2 en los datos de entrenamiento: {r2}")
print(f"MSE en los datos de entrenamiento: {mse}")
print("Coeficientes:", best_model.coef_)

Mejor alpha: 0.004641588833612782
Mejor l1_ratio: 0.1
R^2 en los datos de entrenamiento: 0.5123142203269124
MSE en los datos de entrenamiento: 0.4876857796730875
Coeficientes: [ 0.24162111 -0.35916677 -0.05042127  0.17037662  0.1573929  -0.02056208
 -0.03463174  0.05188368  0.17587319  0.28898222  0.09828254 -0.05407678
 -0.24042365  0.27409091  0.36837789  0.13917093  0.42645492  0.70847639
  0.35435487  0.35401675  0.33705302  0.51590363  0.19377231 -0.07618278
 -0.03784543 -0.15192061  0.          0.47816642 -0.19593125 -0.16905198
 -0.17473944 -0.22351789  0.31196288  0.31833124  0.11421247  0.0540149 ]


In [4]:
columnas = X.columns
coeficientes = best_model.coef_

# Crear DataFrame con nombres y coeficientes
coef_df = pd.DataFrame({
    'Variable': columnas,
    'Coeficiente': coeficientes
})

# Filtrar coeficientes distintos de cero
coef_df = coef_df[coef_df['Coeficiente'] != 0]

# Ordenar por importancia (valor absoluto)
coef_df = coef_df.reindex(coef_df['Coeficiente'].abs().sort_values(ascending=False).index)

# Mostrar
print("\nVariables con coeficiente distinto de cero:")
print(coef_df)


Variables con coeficiente distinto de cero:
                             Variable  Coeficiente
17      Reduced_team_from_Real Madrid     0.708476
21    Reduced_team_from_Bor. Dortmund     0.515904
27    Reduced_country_from_Inglaterra     0.478166
16         Reduced_team_from_Paris SG     0.426455
14         Reduced_team_from_Juventus     0.368378
1                                 Age    -0.359167
18        Reduced_team_from_Barcelona     0.354355
19  Reduced_team_from_Atlético Madrid     0.354017
20    Reduced_team_from_Bayern Munich     0.337053
33  Reduced_country_from_Países Bajos     0.318331
32      Reduced_country_from_Portugal     0.311963
9                              GCA_BC     0.288982
13         Reduced_team_from_AC Milan     0.274091
0                           Exp_contr     0.241621
12            Reduced_team_from_Inter    -0.240424
31      Reduced_country_from_Alemania    -0.223518
28       Reduced_country_from_Francia    -0.195931
22          Reduced_team_from_Chelsea