In [74]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [75]:
df = pd.read_csv('juadores_fantasy.csv.csv')

In [76]:
original_data = df[['Nombre', 'posicion']].copy()

In [77]:
df = pd.get_dummies(df, columns=['posicion', 'equipo'])


In [78]:
X = df.drop(['Nombre', 'Total_puntos', 'Precio'], axis=1)
y_puntos = df['Total_puntos']
y_precio = df['Precio']

In [79]:
X_train, X_test, y_puntos_train, y_puntos_test = train_test_split(X, y_puntos, test_size=0.3, random_state=42)
X_train, X_test, y_precio_train, y_precio_test = train_test_split(X, y_precio, test_size=0.3, random_state=42)


In [80]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [81]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [82]:
rf_puntos = RandomForestRegressor(random_state=42)
grid_search_puntos = GridSearchCV(estimator=rf_puntos, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search_puntos.fit(X_train_scaled, y_puntos_train)

Fitting 3 folds for each of 108 candidates, totalling 324 fits


[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estim

In [83]:
best_rf_puntos = grid_search_puntos.best_estimator_

In [84]:
cv_scores_puntos = cross_val_score(best_rf_puntos, X_train_scaled, y_puntos_train, cv=5)
print("Puntos CV Scores:", cv_scores_puntos)
print("Puntos CV Mean Score:", cv_scores_puntos.mean())

Puntos CV Scores: [0.89386477 0.60277326 0.63240071 0.7539106  0.75731247]
Puntos CV Mean Score: 0.7280523639104256


In [85]:
rf_precio = RandomForestRegressor(random_state=42)
grid_search_precio = GridSearchCV(estimator=rf_precio, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search_precio.fit(X_train_scaled, y_precio_train)

Fitting 3 folds for each of 108 candidates, totalling 324 fits
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   1.0s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.3s
[CV] END m

In [86]:
best_rf_precio = grid_search_precio.best_estimator_

In [87]:
cv_scores_precio = cross_val_score(best_rf_precio, X_train_scaled, y_precio_train, cv=5)
print("Precio CV Scores:", cv_scores_precio)
print("Precio CV Mean Score:", cv_scores_precio.mean())

Precio CV Scores: [0.79174354 0.53091342 0.63145735 0.67987158 0.67936666]
Precio CV Mean Score: 0.6626705085234756


In [88]:
y_puntos_pred = best_rf_puntos.predict(X_test_scaled)
y_precio_pred = best_rf_precio.predict(X_test_scaled)

In [89]:
print("Puntos R2 Score:", best_rf_puntos.score(X_test_scaled, y_puntos_test))
print("Puntos MSE:", mean_squared_error(y_puntos_test, y_puntos_pred))
print("Precio R2 Score:", best_rf_precio.score(X_test_scaled, y_precio_test))
print("Precio MSE:", mean_squared_error(y_precio_test, y_precio_pred))

Puntos R2 Score: 0.7248991718401678
Puntos MSE: 338.8459668461018
Precio R2 Score: 0.6895964674408441
Precio MSE: 11259979810107.05


In [90]:
resultados = pd.DataFrame({
    'Nombre': original_data.loc[X_test.index, 'Nombre'],
    'Posicion': original_data.loc[X_test.index, 'posicion'],
    'Puntos_Predichos': y_puntos_pred,
    'Precio_Predicho': y_precio_pred
})

In [91]:
top_10_puntos = resultados.sort_values(by='Puntos_Predichos', ascending=False).head(10)
print("\n10 jugadores con mayor puntaje predicho:")
for i, row in top_10_puntos.iterrows():
    print(f"{row['Nombre']} - Puntos: {row['Puntos_Predichos']:.2f}, Posición: {row['Posicion']}, Precio: {row['Precio_Predicho']:.2f} €")


10 jugadores con mayor puntaje predicho:
A. Griezmann - Puntos: 128.51, Posición: delantero, Precio: 17682839.37 €
J. Bellingham - Puntos: 127.64, Posición: mediocentro, Precio: 19461801.67 €
V. Muriqi - Puntos: 119.21, Posición: delantero, Precio: 14102619.00 €
A. Budimir - Puntos: 119.21, Posición: delantero, Precio: 15740235.42 €
Isco - Puntos: 118.73, Posición: mediocentro, Precio: 17585362.59 €
M. Guiu - Puntos: 115.47, Posición: delantero, Precio: 14843660.00 €
Sávio - Puntos: 111.86, Posición: delantero, Precio: 16639620.42 €
G. Moreno - Puntos: 111.70, Posición: delantero, Precio: 16448102.92 €
B. Zaragoza - Puntos: 105.94, Posición: mediocentro, Precio: 15544168.42 €
F. De Jong - Puntos: 97.56, Posición: mediocentro, Precio: 11290490.00 €


In [92]:
top_10_precio = resultados.sort_values(by='Precio_Predicho', ascending=False).head(10)
print("\n10 jugadores con mayor precio predicho:")
for i, row in top_10_precio.iterrows():
    print(f"{row['Nombre']} - Puntos: {row['Puntos_Predichos']:.2f}, Posición: {row['Posicion']}, Precio: {row['Precio_Predicho']:.2f} €")


10 jugadores con mayor precio predicho:
J. Bellingham - Puntos: 127.64, Posición: mediocentro, Precio: 19461801.67 €
A. Griezmann - Puntos: 128.51, Posición: delantero, Precio: 17682839.37 €
Isco - Puntos: 118.73, Posición: mediocentro, Precio: 17585362.59 €
Sávio - Puntos: 111.86, Posición: delantero, Precio: 16639620.42 €
G. Moreno - Puntos: 111.70, Posición: delantero, Precio: 16448102.92 €
A. Budimir - Puntos: 119.21, Posición: delantero, Precio: 15740235.42 €
B. Zaragoza - Puntos: 105.94, Posición: mediocentro, Precio: 15544168.42 €
M. Guiu - Puntos: 115.47, Posición: delantero, Precio: 14843660.00 €
L. Suárez - Puntos: 93.69, Posición: delantero, Precio: 14810574.63 €
J. Strand Larsen - Puntos: 92.20, Posición: delantero, Precio: 14709486.45 €


In [93]:
#print(f"\n2 jugadores {posicion} con mayor precio predicho:")
#for i in indices_precio[:10]:
#  print(df.loc[i,'Nombre'], y_precio_pred[i])