In [1]:
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from xgboost import XGBRegressor

In [2]:
df = pd.read_csv('dataset.csv')
X = df[['cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer',
        'coarse aggregate', 'fine aggregate', 'age']]
y = df['CCS']

# 학습/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 최적 조합 탐색 함수 (XGBoost 버전)
def evaluate_all_combinations_xgboost(X_train, y_train, X_test, y_test):
    results = []
    for r in range(1, len(X_train.columns) + 1):
        for combo in itertools.combinations(X_train.columns, r):
            model = XGBRegressor(
                n_estimators=500,
                learning_rate=0.1,
                max_depth=6,
                random_state=42,
                verbosity=0,
                n_jobs=-1
            )
            model.fit(X_train[list(combo)], y_train)
            y_pred = model.predict(X_test[list(combo)])
            r2 = r2_score(y_test, y_pred)

            results.append({
                'num_features': r,
                'features': combo,
                'test_r2': r2
            })

    results_df = pd.DataFrame(results).sort_values(by='test_r2', ascending=False).reset_index(drop=True)
    for i, row in results_df.iterrows():
        print(f"{i+1}. Features: {row['features']}, R² on test set: {row['test_r2']:.4f}")

    best = results_df.iloc[0]
    return best['features'], best['test_r2'], results_df

# 실행
best_combo, best_r2, all_results = evaluate_all_combinations_xgboost(X_train, y_train, X_test, y_test)
print("\n📌 최고 조합:", best_combo)
print("📈 테스트 세트 기준 R²:", best_r2)

1. Features: ('cement', 'blast furnace slag', 'water ', 'superplasticizer', 'age'), R² on test set: 0.9228
2. Features: ('cement', 'blast furnace slag', 'water ', 'superplasticizer', 'coarse aggregate', 'age'), R² on test set: 0.9222
3. Features: ('cement', 'blast furnace slag', 'fly ash', 'superplasticizer', 'fine aggregate', 'age'), R² on test set: 0.9210
4. Features: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'coarse aggregate', 'age'), R² on test set: 0.9209
5. Features: ('cement', 'blast furnace slag', 'fly ash', 'superplasticizer', 'coarse aggregate', 'fine aggregate', 'age'), R² on test set: 0.9195
6. Features: ('cement', 'blast furnace slag', 'superplasticizer', 'coarse aggregate', 'age'), R² on test set: 0.9186
7. Features: ('cement', 'blast furnace slag', 'superplasticizer', 'coarse aggregate', 'fine aggregate', 'age'), R² on test set: 0.9185
8. Features: ('cement', 'blast furnace slag', 'fly ash', 'coarse aggregate', 'fine aggregate', 'age'), R² on test set: 0.917