### **1. 랜덤 포레스트**

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
rf_clf=RandomForestClassifier(random_state=156)
rf_clf.fit(X,y)
y_pred_rf=rf_clf.predict(test)

리더보드 점수: 0.686

### **1-1. 랜덤 포레스트 하이퍼 파라미터 튜닝**

GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV

params={
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2,8,16],
    'min_samples_leaf': [1,5,8]
}

rf_clf=RandomForestClassifier(random_state=156,
                              n_jobs=-1)
grid_cv=GridSearchCV(rf_clf,
                     param_grid=params,
                     cv=5,
                     n_jobs=-1)
grid_cv.fit(X,y)

In [None]:
print('최적 하이퍼 파라미터:\n',grid_cv.best_params_)
print('최고 예측 정확도: {0:.4f}'.format(grid_cv.best_score_))

최적 하이퍼 파라미터:
 {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
최고 예측 정확도: 0.6776


In [None]:
rf_clf=RandomForestClassifier(max_depth=20,
                              min_samples_leaf=1,
                              min_samples_split=2,
                              n_estimators=300,
                              random_state=156)
rf_clf.fit(X,y)
y_pred_rf=rf_clf.predict(test)

리더보드 점수: 0.686

RandomizedSearch

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

param_dist = {
    'n_estimators': (100,200,300),
    'max_depth': [None, 10, 20],
    'min_samples_split': randint(2, 11),
    'min_samples_leaf': randint(1, 5),
}

rf_clf=RandomForestClassifier(random_state=156,
                              n_jobs=-1)

random_search = RandomizedSearchCV(
    rf_clf, param_distributions=param_dist,
    n_iter=50,  # 전체 조합 중 30개만 시도
    cv=5, n_jobs=-1, random_state=42
)
random_search.fit(X, y)

In [None]:
print('최적 하이퍼 파라미터:\n',random_search.best_params_)
print('최고 예측 정확도: {0:.4f}'.format(random_search.best_score_))

최적 하이퍼 파라미터:
 {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 300}
최고 예측 정확도: 0.6755


In [None]:
rf_clf=RandomForestClassifier(max_depth=None,
                              min_samples_leaf=1,
                              min_samples_split=4,
                              n_estimators=300,
                              random_state=156)
rf_clf.fit(X,y)
y_pred_rf=rf_clf.predict(test)

리더보드 점수: 0.675

Hyperopt

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
import numpy as np

# 하이퍼파라미터 탐색 공간 정의
space = {
    'n_estimators': hp.quniform('n_estimators', 50, 300, 10),
    'max_depth': hp.quniform('max_depth', 3, 20, 1),
    'min_samples_split': hp.quniform('min_samples_split', 2, 10, 1),
    'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 5, 1),
}

# 목적 함수 정의
def objective(params):
    # 정수형 파라미터 처리
    params['n_estimators'] = int(params['n_estimators'])
    params['max_depth'] = int(params['max_depth'])
    params['min_samples_split'] = int(params['min_samples_split'])
    params['min_samples_leaf'] = int(params['min_samples_leaf'])

    rf_clf = RandomForestClassifier(**params, random_state=42)
    score = cross_val_score(rf_clf, X, y, cv=5, scoring='accuracy').mean()

    return {'loss': -score, 'status': STATUS_OK}

# 탐색 실행
trials = Trials()
best = fmin(
    fn=objective,
    space=space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials,
    rstate=np.random.default_rng(42)
)

# 결과 출력
print("\nBest Hyperparameters:")
print(best)

100%|██████████| 50/50 [07:47<00:00,  9.35s/trial, best loss: -0.6785466126230457]

Best Hyperparameters:
{'max_depth': np.float64(20.0), 'min_samples_leaf': np.float64(1.0), 'min_samples_split': np.float64(4.0), 'n_estimators': np.float64(200.0)}


In [None]:
rf_clf=RandomForestClassifier(max_depth=20,
                              min_samples_leaf=1,
                              min_samples_split=4,
                              n_estimators=200,
                              random_state=156)
rf_clf.fit(X,y)
y_pred_rf=rf_clf.predict(test)

리더보드 점수: 0.674