


You should consider upgrading via the 'c:\Users\SAL\AppData\Local\Programs\Python\Python38\python.exe -m pip install --upgrade pip' command.


In [3]:
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from deepforest import CascadeForestRegressor
import numpy as np
np.int = int
np.bool = bool

In [4]:
# 데이터 로드
df = pd.read_csv('dataset.csv')
X = df[['cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer',
        'coarse aggregate', 'fine aggregate', 'age']]
y = df['CCS']

# 학습/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 최적 조합 탐색 함수 (DeepForest 버전)
def evaluate_all_combinations_deepforest(X_train, y_train, X_test, y_test):
    results = []
    for r in range(1, len(X_train.columns) + 1):
        for combo in itertools.combinations(X_train.columns, r):
            model = CascadeForestRegressor(random_state=42, n_jobs=-1)
            model.fit(X_train[list(combo)].values, y_train.values)
            y_pred = model.predict(X_test[list(combo)].values)
            r2 = r2_score(y_test, y_pred)

            results.append({
                'num_features': r,
                'features': combo,
                'test_r2': r2
            })

    results_df = pd.DataFrame(results).sort_values(by='test_r2', ascending=False).reset_index(drop=True)
    for i, row in results_df.iterrows():
        print(f"{i+1}. Features: {row['features']}, R² on test set: {row['test_r2']:.4f}")

    best = results_df.iloc[0]
    return best['features'], best['test_r2'], results_df

# 실행
best_combo, best_r2, all_results = evaluate_all_combinations_deepforest(X_train, y_train, X_test, y_test)
print("\n📌 최고 조합:", best_combo)
print("📈 테스트 세트 기준 R²:", best_r2)

[2025-07-24 05:33:14.130] Start to fit the model:
[2025-07-24 05:33:14.131] Fitting cascade layer = 0 
[2025-07-24 05:33:14.785] layer = 0  | Val MSE = 198.83658 | Elapsed = 0.654 s
[2025-07-24 05:33:14.789] Fitting cascade layer = 1 
[2025-07-24 05:33:15.582] layer = 1  | Val MSE = 153.80248 | Elapsed = 0.792 s
[2025-07-24 05:33:15.587] Fitting cascade layer = 2 
[2025-07-24 05:33:16.489] layer = 2  | Val MSE = 162.56658 | Elapsed = 0.901 s
[2025-07-24 05:33:16.489] Early stopping counter: 1 out of 2
[2025-07-24 05:33:16.493] Fitting cascade layer = 3 
[2025-07-24 05:33:17.402] layer = 3  | Val MSE = 169.37222 | Elapsed = 0.909 s
[2025-07-24 05:33:17.403] Early stopping counter: 2 out of 2
[2025-07-24 05:33:17.403] Handling early stopping
[2025-07-24 05:33:17.404] The optimal number of layers: 2
[2025-07-24 05:33:17.407] Start to evalute the model:
[2025-07-24 05:33:17.407] Evaluating cascade layer = 0 
[2025-07-24 05:33:17.542] Evaluating cascade layer = 1 
[2025-07-24 05:33:17.674] 