In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
from sklearn.metrics import make_scorer
from scipy.stats import spearmanr

# 1. Cargar y Ordenar
df = pd.read_csv('../data/processed/f1_features_complete.csv')
df = df.sort_values(['year', 'round', 'driver']).reset_index(drop=True)

feature_cols = [
    'pct_puntos_actual', 'pct_linear_points', 'posicion_media',
    'tendencia_ultimas_3', 'diff_con_lider_normalizada', 'progreso_temporada',
    'driver_quality_3y', 'team_avg_pos_3y', 'team_trend'
]
target_col = 'pct_puntos_final'

X = df[feature_cols]
y = df[target_col]

# 2. Configurar Time Series CV
tscv = TimeSeriesSplit(n_splits=5)

# 3. M√©trica Spearman
def spearman_scorer(y_true, y_pred):
    return spearmanr(y_true, y_pred)[0]

custom_scorer = make_scorer(spearman_scorer, greater_is_better=True)

# 4. NUEVA GRID "DESPLAZADA" (AJUSTE FINO)
# Nos movemos hacia donde apuntaban los resultados anteriores
param_grid_fino = {
    'n_estimators': [50, 70, 90, 100, 110, 130, 150],    # Exploramos por debajo de 100
    'max_depth': [2, 3, 4],                              # Probamos 2 por si acaso
    'learning_rate': [0.08, 0.1, 0.12, 0.15, 0.2],       # Exploramos por encima de 0.1
    'subsample': [0.85, 0.9, 0.95, 1.0],                 # Refinamos la zona alta
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7],            # Exploramos por debajo de 0.6
    'reg_alpha': [0, 0.01, 0.05, 0.1]                    # Refinamos cerca de 0
}

# 5. Configurar Modelo
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_jobs=-1,
    random_state=42
)

# 6. B√∫squeda Fina

search = RandomizedSearchCV(
    estimator=xgb_model,
    param_distributions=param_grid_fino,
    n_iter=50,       
    cv=tscv,
    scoring=custom_scorer,
    verbose=1,
    n_jobs=-1,
    random_state=42
)

search.fit(X, y)

print("\n" + "="*50)
print("üèÜ MEJORES PAR√ÅMETROS FINALES")
print("="*50)
print(search.best_params_)
print(f"\nMejor Spearman Promedio: {search.best_score_:.4f}")

Fitting 5 folds for each of 50 candidates, totalling 250 fits

üèÜ MEJORES PAR√ÅMETROS FINALES
{'subsample': 0.95, 'reg_alpha': 0.1, 'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.08, 'colsample_bytree': 0.5}

Mejor Spearman Promedio: 0.9380
