In [5]:
import pandas as pd
import itertools
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestRegressor
import optuna

In [6]:
def optuna_tune_randomforest(X, y, n_trials=50):
    def objective(trial):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 20),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 5),
            'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
            'random_state': 42,
            'n_jobs': -1
        }

        model = RandomForestRegressor(**params)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        scores = cross_val_score(model, X, y, cv=kf, scoring='r2')
        return scores.mean()

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials)
    return study.best_trial.value, study.best_trial.params

# üì¶ Îç∞Ïù¥ÌÑ∞ Î∂àÎü¨Ïò§Í∏∞
df = pd.read_csv('dataset.csv')
X = df[['cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer', 'coarse aggregate', 'fine aggregate', 'age']]
y = df['CCS']

# üèÅ 8Í∞ú Ï†ÑÏ≤¥ feature Ï°∞Ìï© ÌäúÎãù Ïã§Ìñâ
best_r2, best_params = optuna_tune_randomforest(X, y, n_trials=50)

# ‚úÖ Í≤∞Í≥º Ï∂úÎ†•
print(f"\n‚úÖ 8Í∞ú feature Ï†ÑÏ≤¥ Ï°∞Ìï© Í≤∞Í≥º:")
print(f"üìå Feature Ï°∞Ìï©: {tuple(X.columns)}")
print(f"üìà ÏµúÏ†Å R¬≤: {best_r2:.4f}")
print(f"‚öôÔ∏è ÏµúÏ†Å ÌïòÏù¥ÌçºÌååÎùºÎØ∏ÌÑ∞: {best_params}")

[I 2025-07-18 11:27:31,142] A new study created in memory with name: no-name-79ba9b7c-b97b-4c4b-b813-25c8b450edde
[I 2025-07-18 11:27:38,118] Trial 0 finished with value: 0.8503671648288644 and parameters: {'n_estimators': 949, 'max_depth': 7, 'min_samples_split': 3, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 0 with value: 0.8503671648288644.
[I 2025-07-18 11:27:41,406] Trial 1 finished with value: 0.8885880551873019 and parameters: {'n_estimators': 435, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': None}. Best is trial 1 with value: 0.8885880551873019.
[I 2025-07-18 11:27:46,418] Trial 2 finished with value: 0.8948488584857026 and parameters: {'n_estimators': 684, 'max_depth': 19, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': None}. Best is trial 2 with value: 0.8948488584857026.
[I 2025-07-18 11:27:50,810] Trial 3 finished with value: 0.8889665231137063 and parameters: {'n_estimators': 583, 'max_depth': 16, 'min_sa


‚úÖ 8Í∞ú feature Ï†ÑÏ≤¥ Ï°∞Ìï© Í≤∞Í≥º:
üìå Feature Ï°∞Ìï©: ('cement', 'blast furnace slag', 'fly ash', 'water ', 'superplasticizer', 'coarse aggregate', 'fine aggregate', 'age')
üìà ÏµúÏ†Å R¬≤: 0.9089
‚öôÔ∏è ÏµúÏ†Å ÌïòÏù¥ÌçºÌååÎùºÎØ∏ÌÑ∞: {'n_estimators': 198, 'max_depth': 17, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'log2'}
