In [2]:
import pandas as pd
import itertools
import optuna
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import make_scorer, r2_score

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def optuna_tune_catboost(X, y, combo, n_trials=50):
    def objective(trial):
        params = {
            'iterations': trial.suggest_int('iterations', 200, 1000),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'depth': trial.suggest_int('depth', 4, 10),
            'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
            'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 1),
            'random_state': 42,
            'verbose': 0
        }

        model = CatBoostRegressor(**params)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        scores = cross_val_score(model, X[list(combo)], y, cv=kf, scoring='r2')
        return scores.mean()

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)
    return study.best_trial.value, study.best_trial.params

def evaluate_and_tune_top_combos(X, y, top_n=3, n_trials=50):
    base_results = []
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    print("🔍 1단계: 전체 조합 성능 평가 중...\n")
    for r in range(1, len(X.columns) + 1):
        for combo in itertools.combinations(X.columns, r):
            X_subset = X[list(combo)]
            model = CatBoostRegressor(iterations=300, learning_rate=0.1, depth=6, random_state=42, verbose=0)
            scores = cross_val_score(model, X_subset, y, cv=kf, scoring='r2')
            base_results.append({
                'features': combo,
                'mean_r2': scores.mean()
            })

    top_combos = sorted(base_results, key=lambda x: x['mean_r2'], reverse=True)[:top_n]

    print("⚙️ 2단계: 상위 조합에 대해 Optuna 하이퍼파라미터 튜닝 중...\n")

    for i, combo_info in enumerate(top_combos):
        combo = combo_info['features']
        print(f"⏳ {i+1}/{top_n} 조합 튜닝 중: {combo}")
        best_r2, best_params = optuna_tune_catboost(X, y, combo, n_trials=n_trials)

        print(f"\n✅ 결과 {i+1}:")
        print(f"📌 Feature 조합: {combo}")
        print(f"📈 최적 R²: {best_r2:.4f}")
        print(f"⚙️ 최적 하이퍼파라미터: {best_params}")
        print("-" * 60)

# 데이터 불러오기
df = pd.read_csv('dataset.csv')
X = df[['cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer', 'coarse aggregate', 'fine aggregate', 'age']]
y = df['CCS']

# 실행: 상위 3개 조합, 조합당 50번 탐색
evaluate_and_tune_top_combos(X, y, top_n=3, n_trials=50)

🔍 1단계: 전체 조합 성능 평가 중...



[I 2025-07-10 16:02:53,274] A new study created in memory with name: no-name-56fd8e1d-427d-4559-9cc3-e847cad77702


⚙️ 2단계: 상위 조합에 대해 Optuna 하이퍼파라미터 튜닝 중...

⏳ 1/3 조합 튜닝 중: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer', 'coarse aggregate', 'fine aggregate', 'age')


[I 2025-07-10 16:02:57,197] Trial 0 finished with value: 0.9218836437788781 and parameters: {'iterations': 689, 'learning_rate': 0.0301090298006941, 'depth': 5, 'l2_leaf_reg': 7.2167874959939535, 'bagging_temperature': 0.8603938176312604}. Best is trial 0 with value: 0.9218836437788781.
[I 2025-07-10 16:03:18,618] Trial 1 finished with value: 0.9323331617736208 and parameters: {'iterations': 670, 'learning_rate': 0.2758557102362276, 'depth': 9, 'l2_leaf_reg': 2.1493414255537346, 'bagging_temperature': 0.08820458116919061}. Best is trial 1 with value: 0.9323331617736208.
[I 2025-07-10 16:03:20,100] Trial 2 finished with value: 0.9297431007402123 and parameters: {'iterations': 312, 'learning_rate': 0.11924363620063999, 'depth': 4, 'l2_leaf_reg': 8.018388073499457, 'bagging_temperature': 0.4799076449318955}. Best is trial 1 with value: 0.9323331617736208.
[I 2025-07-10 16:03:23,353] Trial 3 finished with value: 0.9237652984277489 and parameters: {'iterations': 568, 'learning_rate': 0.0394


✅ 결과 1:
📌 Feature 조합: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer', 'coarse aggregate', 'fine aggregate', 'age')
📈 최적 R²: 0.9468
⚙️ 최적 하이퍼파라미터: {'iterations': 946, 'learning_rate': 0.15123628823235863, 'depth': 4, 'l2_leaf_reg': 2.1855148738345545, 'bagging_temperature': 0.8006190530305709}
------------------------------------------------------------
⏳ 2/3 조합 튜닝 중: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer', 'fine aggregate', 'age')


[I 2025-07-10 16:09:16,622] Trial 0 finished with value: 0.9154330394944197 and parameters: {'iterations': 587, 'learning_rate': 0.017022398504563288, 'depth': 6, 'l2_leaf_reg': 2.2007291721082716, 'bagging_temperature': 0.9531646512831267}. Best is trial 0 with value: 0.9154330394944197.
[I 2025-07-10 16:09:21,603] Trial 1 finished with value: 0.9388071488050487 and parameters: {'iterations': 648, 'learning_rate': 0.2991988444845886, 'depth': 6, 'l2_leaf_reg': 1.3971728534911034, 'bagging_temperature': 0.6696955832304969}. Best is trial 1 with value: 0.9388071488050487.
[I 2025-07-10 16:09:26,887] Trial 2 finished with value: 0.9415326536996977 and parameters: {'iterations': 941, 'learning_rate': 0.2840659961722106, 'depth': 5, 'l2_leaf_reg': 9.406693612743485, 'bagging_temperature': 0.1855248463446445}. Best is trial 2 with value: 0.9415326536996977.
[I 2025-07-10 16:09:29,611] Trial 3 finished with value: 0.864076031227782 and parameters: {'iterations': 458, 'learning_rate': 0.01042


✅ 결과 2:
📌 Feature 조합: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer', 'fine aggregate', 'age')
📈 최적 R²: 0.9441
⚙️ 최적 하이퍼파라미터: {'iterations': 873, 'learning_rate': 0.16541932050032707, 'depth': 4, 'l2_leaf_reg': 4.09516816327432, 'bagging_temperature': 0.7326345408293699}
------------------------------------------------------------
⏳ 3/3 조합 튜닝 중: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'fine aggregate', 'age')


[I 2025-07-10 16:15:20,823] Trial 0 finished with value: 0.9312078007494277 and parameters: {'iterations': 675, 'learning_rate': 0.050631202262450194, 'depth': 9, 'l2_leaf_reg': 8.621861138002282, 'bagging_temperature': 0.7754853477436267}. Best is trial 0 with value: 0.9312078007494277.
[I 2025-07-10 16:15:24,218] Trial 1 finished with value: 0.9374395017282193 and parameters: {'iterations': 758, 'learning_rate': 0.0986542332056794, 'depth': 4, 'l2_leaf_reg': 4.3821088373038934, 'bagging_temperature': 0.6329689169101439}. Best is trial 1 with value: 0.9374395017282193.
[I 2025-07-10 16:15:27,349] Trial 2 finished with value: 0.940619492880628 and parameters: {'iterations': 696, 'learning_rate': 0.23874040590887624, 'depth': 4, 'l2_leaf_reg': 3.1624428461290264, 'bagging_temperature': 0.7052613871350838}. Best is trial 2 with value: 0.940619492880628.
[I 2025-07-10 16:15:29,433] Trial 3 finished with value: 0.8801651170595178 and parameters: {'iterations': 348, 'learning_rate': 0.01740


✅ 결과 3:
📌 Feature 조합: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'fine aggregate', 'age')
📈 최적 R²: 0.9421
⚙️ 최적 하이퍼파라미터: {'iterations': 995, 'learning_rate': 0.10813105801990017, 'depth': 6, 'l2_leaf_reg': 3.0565190044219053, 'bagging_temperature': 0.12464914285467359}
------------------------------------------------------------
