In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

# Cargar datos
pitch = pd.read_excel("reduced_delanteros.xlsx")

# sqrt_vars = ['Interceptions', 'Yellow_cards', 'Transfer_value', 'Exp_contr', 'Minutes_pl_BC', 'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'GCA_BC', 'Red_cards']
# for var in sqrt_vars: 
#     pitch[var] = pitch[var].apply(np.sqrt)


# Escalar variables numéricas
numeric_vars = ['Transfer_value', 'Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC', 'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions', 'GCA_BC', 'Yellow_cards', 'Red_cards']
pitch[numeric_vars] = StandardScaler().fit_transform(pitch[numeric_vars])

# Definir X e Y


In [2]:
ind_vars = ['Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC',
       'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions',
       'GCA_BC', 'Yellow_cards', 'Red_cards', 'Reduced_team_from_Inter',
       'Reduced_team_from_AC Milan', 'Reduced_team_from_Juventus',
       'Reduced_team_from_Napoli', 'Reduced_team_from_Paris SG',
       'Reduced_team_from_Real Madrid', 'Reduced_team_from_Barcelona',
       'Reduced_team_from_Atlético Madrid', 'Reduced_team_from_Bayern Munich',
       'Reduced_team_from_Bor. Dortmund', 'Reduced_team_from_Chelsea',
       'Reduced_team_from_Arsenal', 'Reduced_team_from_Man City',
       'Reduced_team_from_Man Utd', 'Reduced_team_from_Liverpool', 'Reduced_country_from_Inglaterra',
       'Reduced_country_from_Francia', 'Reduced_country_from_España',
       'Reduced_country_from_Italia', 'Reduced_country_from_Alemania',
       'Reduced_country_from_Portugal', 'Reduced_country_from_Países Bajos',
       'Reduced_country_from_Argentina', 'Reduced_country_from_Brasil']

In [3]:
X = pitch[ind_vars]
Y = pitch["Transfer_value"]

# Definir el modelo base
elastic = ElasticNet(max_iter=10000)

# Definir la malla de hiperparámetros
param_grid = {
    'alpha': np.logspace(-4, 1, 10),       # regularización
    'l1_ratio': np.linspace(0.1, 0.9, 9)   # mezcla L1/L2
}

# Grid search con validación cruzada
grid_search = GridSearchCV(estimator=elastic,
                           param_grid=param_grid,
                           scoring='r2',
                           cv=5,
                           n_jobs=-1)

# Ajustar
grid_search.fit(X, Y)

# Resultados
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X)
r2 = best_model.score(X, Y)
mse = mean_squared_error(Y, y_pred)

print("Mejor alpha:", grid_search.best_params_['alpha'])
print("Mejor l1_ratio:", grid_search.best_params_['l1_ratio'])
print(f"R^2 en los datos de entrenamiento: {r2}")
print(f"MSE en los datos de entrenamiento: {mse}")
print("Coeficientes:", best_model.coef_)

Mejor alpha: 0.05994842503189409
Mejor l1_ratio: 0.1
R^2 en los datos de entrenamiento: 0.5187618715565414
MSE en los datos de entrenamiento: 0.4812381284434586
Coeficientes: [ 0.22112967 -0.27715165 -0.05501716  0.0807116   0.20133166  0.04115793
  0.03188911  0.18017377 -0.00487045  0.32260407  0.          0.00201178
  0.         -0.          0.          0.          0.         -0.
 -0.          0.         -0.          0.          0.          0.
  0.          0.          0.          0.18271767 -0.07479178 -0.
 -0.         -0.06684653  0.09983659  0.          0.          0.        ]


In [4]:
columnas = X.columns
coeficientes = best_model.coef_

# Crear DataFrame con nombres y coeficientes
coef_df = pd.DataFrame({
    'Variable': columnas,
    'Coeficiente': coeficientes
})

# Filtrar coeficientes distintos de cero
coef_df = coef_df[coef_df['Coeficiente'] != 0]

# Ordenar por importancia (valor absoluto)
coef_df = coef_df.reindex(coef_df['Coeficiente'].abs().sort_values(ascending=False).index)

# Mostrar
print("\nVariables con coeficiente distinto de cero:")
print(coef_df)


Variables con coeficiente distinto de cero:
                           Variable  Coeficiente
9                            GCA_BC     0.322604
1                               Age    -0.277152
0                         Exp_contr     0.221130
4                          NP_goals     0.201332
27  Reduced_country_from_Inglaterra     0.182718
7                           Assists     0.180174
32    Reduced_country_from_Portugal     0.099837
3                     Minutes_pl_BC     0.080712
28     Reduced_country_from_Francia    -0.074792
31    Reduced_country_from_Alemania    -0.066847
2                        Minutes_pl    -0.055017
5                         Pen_goals     0.041158
6                      Pen_goals_BC     0.031889
8                     Interceptions    -0.004870
11                        Red_cards     0.002012
