<a href="https://colab.research.google.com/github/easywater4528/my-ML-project/blob/main/mini_proj_04_20221322.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestRegressor

# 점수 출력 함수 포맷 설정
def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())


In [9]:
df = pd.read_csv("winequality-red.csv")

print("데이터 크기:", df.shape)
df.head()
df.describe()

데이터 크기: (1599, 12)


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0
mean,8.319637,0.527821,0.270976,2.538806,0.087467,15.874922,46.467792,0.996747,3.311113,0.658149,10.422983,5.636023
std,1.741096,0.17906,0.194801,1.409928,0.047065,10.460157,32.895324,0.001887,0.154386,0.169507,1.065668,0.807569
min,4.6,0.12,0.0,0.9,0.012,1.0,6.0,0.99007,2.74,0.33,8.4,3.0
25%,7.1,0.39,0.09,1.9,0.07,7.0,22.0,0.9956,3.21,0.55,9.5,5.0
50%,7.9,0.52,0.26,2.2,0.079,14.0,38.0,0.99675,3.31,0.62,10.2,6.0
75%,9.2,0.64,0.42,2.6,0.09,21.0,62.0,0.997835,3.4,0.73,11.1,6.0
max,15.9,1.58,1.0,15.5,0.611,72.0,289.0,1.00369,4.01,2.0,14.9,8.0


In [10]:
X = df.drop("quality", axis=1)
y = df["quality"]

print("X shape:", X.shape)
print("y shape:", y.shape)

#교차검증
cv = KFold(n_splits=10, shuffle=True, random_state=42)

X shape: (1599, 11)
y shape: (1599,)


In [12]:
#랜덤 포레스트 사용
rf = RandomForestRegressor(random_state=42, n_jobs=-1)

# 하이퍼파라미터
param_dist_rf = {
    "n_estimators": [200, 300, 500, 800],
    "max_depth": [None, 10, 20, 30],
    "min_samples_split": [2, 4, 6, 10],
    "min_samples_leaf": [1, 2, 4],
    "max_features": ["sqrt", "log2", 0.5, 0.7],
    "bootstrap": [True, False]
}

search_rf = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist_rf,
    n_iter=40,
    cv=cv,
    scoring="neg_root_mean_squared_error",
    n_jobs=-1,
    random_state=42
)

search_rf.fit(X, y)

In [None]:
# 2차 튜닝
base_n_estimators = search_rf.best_params_["n_estimators"]
base_max_depth = search_rf.best_params_["max_depth"]
base_min_split = search_rf.best_params_["min_samples_split"]
base_min_leaf = search_rf.best_params_["min_samples_leaf"]
base_max_features = search_rf.best_params_["max_features"]
base_bootstrap = search_rf.best_params_["bootstrap"]


In [None]:
# GridSearchCV
param_grid_rf_fine = {
    "n_estimators": [
        max(200, base_n_estimators - 200),
        base_n_estimators,
        base_n_estimators + 200
    ],
    "max_depth": (
        [None] if base_max_depth is None else
        [max(1, base_max_depth - 5), base_max_depth, base_max_depth + 5]
    ),
    "min_samples_split": [
        max(2, base_min_split - 2),
        base_min_split,
        base_min_split + 2
    ],
    "min_samples_leaf": [
        max(1, base_min_leaf - 1),
        base_min_leaf,
        base_min_leaf + 1
    ],
    "max_features": [base_max_features],
    "bootstrap": [base_bootstrap]
}

rf_fine = RandomForestRegressor(random_state=42, n_jobs=-1)

grid_rf_fine = GridSearchCV(
    estimator=rf_fine,
    param_grid=param_grid_rf_fine,
    cv=cv,
    scoring="neg_root_mean_squared_error",
    n_jobs=-1
)

grid_rf_fine.fit(X, y)

best_rf_final = grid_rf_fine.best_estimator_
best_rmse_final = -grid_rf_fine.best_score_

# 최종 교차검증
neg_rmse_scores_final = cross_val_score(
    best_rf_final,
    X, y,
    scoring="neg_root_mean_squared_error",
    cv=cv
)

rmse_scores_final = -neg_rmse_scores_final

print("\n[RandomForest - 최종 튜닝 모델 결과 값]")
display_scores(rmse_scores_final)



[RandomForest - 최종 튜닝 모델 결과 값]
Scores: [0.54238186 0.51964678 0.58905601 0.52894379 0.56334497 0.58888149
 0.55785222 0.59563075 0.52320269 0.52747654]
Mean: 0.553641709905199
Standard deviation: 0.02806163352232548
