In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 데이터 로드
df = pd.read_csv('dataset.csv')
selected_features = ['cement', 'blast furnace slag', 'water ', 'superplasticizer', 'age']
X = df[selected_features]
y = df['CCS']

# 학습/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Optuna 목적 함수 정의
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        'random_state': 42,
        'n_jobs': -1,
        'verbosity': 0
    }

    model = XGBRegressor(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

# Optuna 스터디 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# 최적 결과 출력
print("✅ Best R² on test set:", study.best_value)
print("📌 Best Parameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

[I 2025-07-24 14:29:10,620] A new study created in memory with name: no-name-1362711b-fcc3-4eec-a1b6-bd6e2f0e7daa
[I 2025-07-24 14:29:11,409] Trial 0 finished with value: 0.8893885277358751 and parameters: {'n_estimators': 740, 'learning_rate': 0.245116961807902, 'max_depth': 7, 'subsample': 0.7230158510829424, 'colsample_bytree': 0.9030256684111093, 'reg_alpha': 0.9919925847985406, 'reg_lambda': 0.31472677181402675}. Best is trial 0 with value: 0.8893885277358751.
[I 2025-07-24 14:29:11,538] Trial 1 finished with value: 0.9049975957490659 and parameters: {'n_estimators': 185, 'learning_rate': 0.2857045297083393, 'max_depth': 5, 'subsample': 0.7490028132345713, 'colsample_bytree': 0.5336489193506859, 'reg_alpha': 3.22549890968612, 'reg_lambda': 1.6185864224393547}. Best is trial 1 with value: 0.9049975957490659.
[I 2025-07-24 14:29:11,988] Trial 2 finished with value: 0.9033555053692605 and parameters: {'n_estimators': 297, 'learning_rate': 0.04919064880321288, 'max_depth': 10, 'subsam

✅ Best R² on test set: 0.9301043046404089
📌 Best Parameters:
  n_estimators: 752
  learning_rate: 0.24535560803430556
  max_depth: 4
  subsample: 0.9662003369246814
  colsample_bytree: 0.8700084091964568
  reg_alpha: 4.424425558808598
  reg_lambda: 2.8246697934201825
