In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

# Cargar datos
pitch = pd.read_excel("reduced_defensas.xlsx")

# sqrt_vars = ['Interceptions', 'Yellow_cards', 'Transfer_value', 'Exp_contr', 'Minutes_pl_BC', 'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'GCA_BC', 'Red_cards']
# for var in sqrt_vars: 
#     pitch[var] = pitch[var].apply(np.sqrt)


# Escalar variables numéricas
numeric_vars = ['Transfer_value', 'Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC', 'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions', 'GCA_BC', 'Yellow_cards', 'Red_cards']
pitch[numeric_vars] = StandardScaler().fit_transform(pitch[numeric_vars])

# Definir X e Y


In [4]:
ind_vars = ['Exp_contr', 'Age', 'Minutes_pl', 'Minutes_pl_BC',
       'NP_goals', 'Pen_goals', 'Pen_goals_BC', 'Assists', 'Interceptions',
       'GCA_BC', 'Yellow_cards', 'Red_cards', 'Reduced_team_from_Inter',
       'Reduced_team_from_AC Milan', 'Reduced_team_from_Juventus',
       'Reduced_team_from_Napoli', 'Reduced_team_from_Paris SG',
       'Reduced_team_from_Real Madrid', 'Reduced_team_from_Barcelona',
       'Reduced_team_from_Atlético Madrid', 'Reduced_team_from_Bayern Munich',
       'Reduced_team_from_Bor. Dortmund', 'Reduced_team_from_Chelsea',
       'Reduced_team_from_Arsenal', 'Reduced_team_from_Man City',
       'Reduced_team_from_Man Utd', 'Reduced_team_from_Liverpool', 'Reduced_country_from_Inglaterra',
       'Reduced_country_from_Francia', 'Reduced_country_from_España',
       'Reduced_country_from_Italia', 'Reduced_country_from_Alemania',
       'Reduced_country_from_Portugal', 'Reduced_country_from_Países Bajos',
       'Reduced_country_from_Argentina', 'Reduced_country_from_Brasil']

In [5]:
X = pitch[ind_vars]
Y = pitch["Transfer_value"]

# Definir el modelo base
elastic = ElasticNet(max_iter=10000)

# Definir la malla de hiperparámetros
param_grid = {
    'alpha': np.logspace(-4, 1, 10),       # regularización
    'l1_ratio': np.linspace(0.1, 0.9, 9)   # mezcla L1/L2
}

# Grid search con validación cruzada
grid_search = GridSearchCV(estimator=elastic,
                           param_grid=param_grid,
                           scoring='r2',
                           cv=5,
                           n_jobs=-1)

# Ajustar
grid_search.fit(X, Y)

# Resultados
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X)
r2 = best_model.score(X, Y)
mse = mean_squared_error(Y, y_pred)

print("Mejor alpha:", grid_search.best_params_['alpha'])
print("Mejor l1_ratio:", grid_search.best_params_['l1_ratio'])
print(f"R^2 en los datos de entrenamiento: {r2}")
print(f"MSE en los datos de entrenamiento: {mse}")
print("Coeficientes:", best_model.coef_)

Mejor alpha: 0.05994842503189409
Mejor l1_ratio: 0.1
R^2 en los datos de entrenamiento: 0.4328431164653581
MSE en los datos de entrenamiento: 0.5671568835346419
Coeficientes: [ 0.30012255 -0.28781778  0.16068704  0.21989989  0.13695573 -0.0017033
 -0.04090747 -0.08909978  0.05668085  0.14320897 -0.0315942  -0.01244841
  0.         -0.          0.03530661  0.          0.          0.
  0.          0.          0.00505189  0.          0.         -0.
  0.          0.          0.          0.19081051 -0.08128845 -0.
 -0.         -0.          0.00914886  0.04560766 -0.          0.        ]


In [6]:
columnas = X.columns
coeficientes = best_model.coef_

# Crear DataFrame con nombres y coeficientes
coef_df = pd.DataFrame({
    'Variable': columnas,
    'Coeficiente': coeficientes
})

# Filtrar coeficientes distintos de cero
coef_df = coef_df[coef_df['Coeficiente'] != 0]

# Ordenar por importancia (valor absoluto)
coef_df = coef_df.reindex(coef_df['Coeficiente'].abs().sort_values(ascending=False).index)

# Mostrar
print("\nVariables con coeficiente distinto de cero:")
print(coef_df)


Variables con coeficiente distinto de cero:
                             Variable  Coeficiente
0                           Exp_contr     0.300123
1                                 Age    -0.287818
3                       Minutes_pl_BC     0.219900
27    Reduced_country_from_Inglaterra     0.190811
2                          Minutes_pl     0.160687
9                              GCA_BC     0.143209
4                            NP_goals     0.136956
7                             Assists    -0.089100
28       Reduced_country_from_Francia    -0.081288
8                       Interceptions     0.056681
33  Reduced_country_from_Países Bajos     0.045608
6                        Pen_goals_BC    -0.040907
14         Reduced_team_from_Juventus     0.035307
10                       Yellow_cards    -0.031594
11                          Red_cards    -0.012448
32      Reduced_country_from_Portugal     0.009149
20    Reduced_team_from_Bayern Munich     0.005052
5                           Pen_goals