In [2]:
import pandas as pd
import optuna
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from catboost import CatBoostRegressor

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
df = pd.read_csv('dataset.csv')
X = df[['cement', 'blast furnace slag', 'fly ash', 'superplasticizer', 'coarse aggregate', 'age']]
y = df['CCS']

# 학습/테스트 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial, X_train, y_train, X_test, y_test):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'depth': trial.suggest_int('depth', 3, 10),
        'random_seed': 42,
        'verbose': 0
    }
    model = CatBoostRegressor(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return r2_score(y_test, y_pred)

def optuna_tune_catboost(X_train, y_train, X_test, y_test, features, n_trials=50):
    def tuned_objective(trial):
        return objective(trial, X_train[features], y_train, X_test[features], y_test)

    study = optuna.create_study(direction='maximize')
    study.optimize(tuned_objective, n_trials=n_trials)

    best_params = study.best_params
    best_model = CatBoostRegressor(**best_params, random_seed=42, verbose=0)
    best_model.fit(X_train[features], y_train)
    y_pred = best_model.predict(X_test[features])
    r2 = r2_score(y_test, y_pred)

    return r2, best_params

# 지정 피처
selected_features = ['cement', 'blast furnace slag', 'fly ash', 'superplasticizer', 'coarse aggregate', 'age']

# 실행
r2, best_params = optuna_tune_catboost(X_train, y_train, X_test, y_test, selected_features, n_trials=50)

print(f"📌 지정 피처: {selected_features}")
print(f"📈 테스트 세트 기준 R²: {r2:.4f}")
print(f"⚙️ 최적 하이퍼파라미터: {best_params}")

[I 2025-07-24 14:02:36,578] A new study created in memory with name: no-name-0815c4f0-0da8-41a9-a927-41d310811db0
[I 2025-07-24 14:02:37,287] Trial 0 finished with value: 0.9445453504011159 and parameters: {'iterations': 944, 'learning_rate': 0.17803854690663984, 'depth': 3}. Best is trial 0 with value: 0.9445453504011159.
[I 2025-07-24 14:02:42,806] Trial 1 finished with value: 0.9450637260447726 and parameters: {'iterations': 908, 'learning_rate': 0.038144871657649945, 'depth': 9}. Best is trial 1 with value: 0.9450637260447726.
[I 2025-07-24 14:02:43,489] Trial 2 finished with value: 0.935592329531463 and parameters: {'iterations': 555, 'learning_rate': 0.06703411286392894, 'depth': 5}. Best is trial 1 with value: 0.9450637260447726.
[I 2025-07-24 14:02:44,114] Trial 3 finished with value: 0.9142239790356808 and parameters: {'iterations': 694, 'learning_rate': 0.042181921623489486, 'depth': 4}. Best is trial 1 with value: 0.9450637260447726.
[I 2025-07-24 14:02:45,528] Trial 4 finis

📌 지정 피처: ['cement', 'blast furnace slag', 'fly ash', 'superplasticizer', 'coarse aggregate', 'age']
📈 테스트 세트 기준 R²: 0.9564
⚙️ 최적 하이퍼파라미터: {'iterations': 621, 'learning_rate': 0.21997732696635888, 'depth': 5}
