In [1]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import make_scorer, mean_squared_error
import numpy as np
import mlflow

In [2]:
train = pd.read_csv('../data/train.csv')

In [3]:
X = train.drop(columns=[
    'ID', 'target1', 'target2', 'target3', 'target4', 'target5',
    'target6', 'target7', 'target8', 'target9', 'target10', 'target11'
])

y = train.drop(columns=[
    'ID', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6',
    'feature7', 'feature8', 'feature9', 'feature10', 'feature11',
    'feature12', 'feature13', 'feature14', 'feature15'
])


In [4]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Hyper-param optmization

In [5]:
import optuna
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
import mlflow
import numpy as np
import warnings
from sklearn.metrics import root_mean_squared_error

warnings.filterwarnings('ignore')

def objective(trial):
    with mlflow.start_run(run_name=f'Trial-{trial.number}', nested=True):
        params = {
            'C': trial.suggest_float('C', 1e-2, 100.0, log=True),
            'epsilon': trial.suggest_float('epsilon', 1e-3, 1.0, log=True),
            'kernel': trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly']),
            'degree': trial.suggest_int('degree', 2, 5) if trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly']) == 'poly' else 3,
            'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),
        }

        mlflow.log_params(params)

        base = SVR(**params)
        model = MultiOutputRegressor(base, n_jobs=1)

        kf = KFold(n_splits=10, shuffle=True, random_state=42)
        rmses = []

        for train_idx, val_idx in kf.split(X):
            X_tr, X_va = X.iloc[train_idx], X.iloc[val_idx]
            y_tr, y_va = y.iloc[train_idx], y.iloc[val_idx]

            model.fit(X_tr, y_tr)
            y_pred = model.predict(X_va)
            rmses.append(root_mean_squared_error(y_va, y_pred))

        rmse_mean = np.mean(rmses)
        mlflow.log_metric('rmse', rmse_mean)
        return rmse_mean


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
study = optuna.create_study(
    direction='minimize',
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(
        n_startup_trials=20,
        n_warmup_steps=10,
        interval_steps=5
    )
)

mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment('DSWA - Competicao3')
with mlflow.start_run(run_name='SVR optimization'):
    study.optimize(objective, n_trials=50)
    
    print("Melhores hiperparâmetros:")
    print(study.best_params)
    print(f"Melhor RMSE: {study.best_value:.4f}")

    # Loga os melhores parâmetros e métrica no MLflow
    mlflow.log_params(study.best_params)
    mlflow.log_metric("best_rmse", study.best_value)


[I 2025-07-28 14:20:27,298] A new study created in memory with name: no-name-d6fa2164-cbaf-4899-bf69-e7ce1b264844
[I 2025-07-28 14:20:28,790] Trial 0 finished with value: 0.6641015829560927 and parameters: {'C': 0.31489116479568624, 'epsilon': 0.711447600934342, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 0 with value: 0.6641015829560927.


🏃 View run Trial-0 at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/162bd8c318a14ad4be22374d6ff6a57c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732


[I 2025-07-28 14:20:35,038] Trial 1 finished with value: 21.097009291049552 and parameters: {'C': 29.154431891537552, 'epsilon': 0.06358358856676251, 'kernel': 'poly', 'degree': 5, 'gamma': 'scale'}. Best is trial 0 with value: 0.6641015829560927.


🏃 View run Trial-1 at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/2114599a2e354c1e9b883219d1e53c45
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732


[I 2025-07-28 14:20:37,446] Trial 2 finished with value: 0.6102246860638755 and parameters: {'C': 0.05415244119402541, 'epsilon': 0.008179499475211672, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 2 with value: 0.6102246860638755.


🏃 View run Trial-2 at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/e5eda024fac14dff892357f3733b699a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732


[I 2025-07-28 14:20:38,948] Trial 3 finished with value: 0.7174278005644223 and parameters: {'C': 0.14742753159914673, 'epsilon': 0.01256277350380703, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 2 with value: 0.6102246860638755.


🏃 View run Trial-3 at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/ab006b54852f4cd3889d774852e27136
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732


[I 2025-07-28 14:20:40,561] Trial 4 finished with value: 2.262740947516627 and parameters: {'C': 0.015339162591163621, 'epsilon': 0.06647135865318027, 'kernel': 'poly', 'degree': 5, 'gamma': 'scale'}. Best is trial 2 with value: 0.6102246860638755.


🏃 View run Trial-4 at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/8e3ba87ad7ed4a37be6da78fdd79f19b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732


[I 2025-07-28 14:20:41,532] Trial 5 finished with value: 0.6841546633578948 and parameters: {'C': 0.024586032763280065, 'epsilon': 0.11290133559092672, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 2 with value: 0.6102246860638755.


🏃 View run Trial-5 at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/416154f12171411c8f63719057bd771b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732


[W 2025-07-28 14:20:44,653] Trial 6 failed with parameters: {'C': 4.467752817973908, 'epsilon': 0.00861257919259488, 'kernel': 'rbf', 'gamma': 'scale'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\Guilherme\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Guilherme\AppData\Local\Temp\ipykernel_19552\2848900227.py", line 34, in objective
    model.fit(X_tr, y_tr)
  File "c:\Users\Guilherme\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Guilherme\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\multioutput.py", line 278, in fit
    self.estimators_ = Parallel(n_jobs=self.n_jobs)(
                       ^

🏃 View run Trial-6 at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/98edc481e5674d8f9bd4aebb8e549312
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732
🏃 View run SVR optimization at: http://127.0.0.1:5000/#/experiments/962208812792484732/runs/007cf12c3e5c4a829b602c0f7ac9671c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/962208812792484732


KeyboardInterrupt: 