그리드 서치
- 사용자가 지정한 하이퍼파라미터의 가능 조합을 체계적 시도
- 지정한 값내에 모든 조합을 시도
- 설정할 하이퍼파라미터 적을때 유리

랜덤 그리드 서치
- 사용자가 지정한 '범위'값 내에 모든 조합을 시도
- 무작위성이 들어가기때문에 최적 조합을 놓칠 가능성 존재m

In [1]:
# 라이브러리 import
from sklearn.model_selection import GridSearchCV #GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# 데이터 로드 및 분할
data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)


In [2]:
# 랜덤포레스트 분류기 객체 생성
model1 = RandomForestClassifier(random_state=42)

In [4]:
# 임의 그리드 서치 파라미터 설정 
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
}

In [5]:
# 모델 훈련 
grid_search = GridSearchCV(estimator =model1, param_grid =param_grid, cv =5, verbose =2) # verbose : 훈련 과정에서 출력되는 로그 메시지의 상세도 
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   0.3s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   0.4s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   0.3s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   0.2s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   0.2s
[CV] END max_depth=None, min_samples_split=2, n_estimators=300; total time=   0.4s
[CV] END max_depth=None, 

In [6]:
#최적의 하이퍼파라미터 출력
print(grid_search.best_params_)
print(grid_search.best_score_)

{'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
0.9626373626373625


In [7]:
best_model = grid_search.best_estimator_
accuracy = best_model.score(X_test, y_test)
print(f"Best Model Accuracy: {accuracy}")

Best Model Accuracy: 0.9649122807017544


# 랜덤 그리드 서치


In [8]:
# 라이브러리 임포트
from sklearn.model_selection import RandomizedSearchCV #RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from scipy.stats import randint

In [9]:
# 데이터 로드 및 분할
data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)


In [10]:
model2 = RandomForestClassifier(random_state=42)

In [11]:
param_dist = {
    'n_estimators': randint(50, 300), # 50~300사이의 임의 값 선택
    'max_depth': [None, 10, 20, 30, 40, 50], # None, 10~ 50사이의 임의 값 선택
    'min_samples_split': randint(2, 11), #2~10 사이에서 임의의 값 선택
}

In [12]:
# 랜덤 서치 설정

random_search = RandomizedSearchCV(estimator= model2, param_distributions=param_dist, n_iter=20, cv=5, verbose=2, random_state=42)

`n_iter` : 무작위로  n개의 하이퍼파라미터 조합시도  
`param_distributions` : 지정된 범위내에서 임의로 선택된 값을 모델 학습


In [13]:
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] END max_depth=30, min_samples_split=9, n_estimators=238; total time=   0.5s
[CV] END max_depth=30, min_samples_split=9, n_estimators=238; total time=   0.3s
[CV] END max_depth=30, min_samples_split=9, n_estimators=238; total time=   0.4s
[CV] END max_depth=30, min_samples_split=9, n_estimators=238; total time=   0.4s
[CV] END max_depth=30, min_samples_split=9, n_estimators=238; total time=   0.3s
[CV] END max_depth=40, min_samples_split=8, n_estimators=171; total time=   0.2s
[CV] END max_depth=40, min_samples_split=8, n_estimators=171; total time=   0.2s
[CV] END max_depth=40, min_samples_split=8, n_estimators=171; total time=   0.2s
[CV] END max_depth=40, min_samples_split=8, n_estimators=171; total time=   0.2s
[CV] END max_depth=40, min_samples_split=8, n_estimators=171; total time=   0.2s
[CV] END max_depth=20, min_samples_split=8, n_estimators=124; total time=   0.1s
[CV] END max_depth=20, min_samples_split=8, n_e

In [14]:
print(random_search.best_params_)
print(random_search.best_score_)

{'max_depth': None, 'min_samples_split': 2, 'n_estimators': 268}
0.9604395604395604


In [15]:
best_model = random_search.best_estimator_
accuracy = best_model.score(X_test, y_test)
print(f"Best Model Accuracy: {accuracy}")

Best Model Accuracy: 0.9649122807017544
