In [1]:
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 데이터 로드
df = pd.read_csv('dataset.csv')
X = df[['cement', 'blast furnace slag', 'water ', 'superplasticizer', 'fine aggregate', 'age']]
y = df['CCS']

# 학습/테스트 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    # 하이퍼파라미터 설정
    n_estimators = trial.suggest_int("n_estimators", 100, 1000)
    max_depth = trial.suggest_int("max_depth", 3, 20)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 5)

    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Optuna 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# 결과 출력
best_trial = study.best_trial
print(f"📌 고정 피처: {list(X.columns)}")
print(f"📈 테스트 세트 기준 R²: {best_trial.value:.4f}")
print(f"⚙️ 최적 하이퍼파라미터: {best_trial.params}")

[I 2025-07-24 14:16:46,729] A new study created in memory with name: no-name-8e2537b3-dd0c-4934-8c19-055382ac39d9
[I 2025-07-24 14:16:48,843] Trial 0 finished with value: 0.803619274975718 and parameters: {'n_estimators': 952, 'max_depth': 5, 'min_samples_split': 4, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.803619274975718.
[I 2025-07-24 14:16:49,469] Trial 1 finished with value: 0.858245052705145 and parameters: {'n_estimators': 235, 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 5}. Best is trial 1 with value: 0.858245052705145.
[I 2025-07-24 14:16:51,755] Trial 2 finished with value: 0.863992235263487 and parameters: {'n_estimators': 875, 'max_depth': 16, 'min_samples_split': 10, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.863992235263487.
[I 2025-07-24 14:16:54,507] Trial 3 finished with value: 0.8721534578865238 and parameters: {'n_estimators': 1000, 'max_depth': 20, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 3 with value: 0.87

📌 고정 피처: ['cement', 'blast furnace slag', 'water ', 'superplasticizer', 'fine aggregate', 'age']
📈 테스트 세트 기준 R²: 0.8850
⚙️ 최적 하이퍼파라미터: {'n_estimators': 607, 'max_depth': 19, 'min_samples_split': 3, 'min_samples_leaf': 1}
