# 06 - Hyperparameter Tuning

Tuning com GridSearchCV/RandomizedSearchCV para modelos selecionados.



In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.ensemble import RandomForestRegressor

from src.data_ingestion import load_wine_dataframe
from src.data_processing import DataPreprocessor

HF_REPO = os.getenv("HF_DATASET_REPO", "henriquebap/wine-ml-dataset")
FILENAME = os.getenv("HF_DATASET_FILENAME", "WineQT.csv")

FEATURES = [
    "fixed acidity","volatile acidity","citric acid","residual sugar","chlorides",
    "free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"
]

df = load_wine_dataframe(repo_id=HF_REPO, filename=FILENAME)
pre = DataPreprocessor(feature_columns=FEATURES, target_column="quality")
df_p = pre.fit_transform(df)
X = df_p[FEATURES]
y = df_p["quality"]

param_grid = {
    'n_estimators': [200, 400, 600],
    'max_depth': [None, 8, 12, 16],
    'min_samples_split': [2, 5, 10],
}

cv = KFold(n_splits=5, shuffle=True, random_state=42)
rf = RandomForestRegressor(random_state=42)

search = GridSearchCV(rf, param_grid, scoring='neg_root_mean_squared_error', cv=cv, n_jobs=-1)
search.fit(X, y)

print("Best params:", search.best_params_)
print("Best RMSE:", -search.best_score_)
