<a href="https://colab.research.google.com/github/YonggunJung/colab/blob/main/GridSearchCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

In [2]:
# 그리드서치씨브이 성능평가 지표
# GridSearchCV
# 모델 최적의 하이퍼파라미터를 찾는 목적
# 1. 딕서너리 형태로 파라미터를 지정
## parameters = {'max_depth':[1, 2, 3], 'min_samples_split':[2, 3]}
# 2. GridSearchCV 메서드 적용
## 임포트 위치 : sklearn.model_selection.GridSearchCV
## 그리드 서치 객체 = GridSearchCV(모델명, param_grid = parameters, cv = 교차검증수, refit = True, return_train_score = True)
# 3. 그리드서치객체.fit() => 학습
# 4. 평가

In [4]:
from sklearn.datasets import load_iris
iris = load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [5]:
X = iris.data
y = iris.target

X.shape, y.shape

((150, 4), (150,))

In [8]:
# 모델 생성 및 파라미터 확인
from sklearn.neighbors import KNeighborsClassifier

modelknn = KNeighborsClassifier()
modelknn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [10]:
# GridSearchCV 메서드 적용
# n_neighbors에 들어갈 값 설정 - 하이퍼마라미터 생성 => 딕셔너리 형태
# {하이퍼파라미터명 : [값...]}
parameters = {'n_neighbors' : list(range(3, 12, 2))}



In [12]:
# 그리드 서치 객체 생성
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(modelknn, param_grid = parameters, cv = 5, refit = True, return_train_score = True, verbose = 1)
grid

In [13]:
# 그리드 서치 객체를 이용한 학습
# 그리드서치객체.fit() => 학습

#학습용데이터와 데스트용 데이터 분리
from sklearn.model_selection import train_test_split

Xtr, Xval, ytr, yval = train_test_split(X, y, test_size = 0.2, random_state =202309)

In [14]:
# 학습
grid.fit(Xtr, ytr)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


In [16]:
# 평가, 결과물 확인
print('최적의 파라미터 =>', grid.best_params_)
print('최적의 평가 점수 =>', grid.best_score_)
print('최적의 모델 =>', grid.best_estimator_.get_params())

최적의 파라미터 => {'n_neighbors': 3}
최적의 평가 점수 => 0.9666666666666666
최적의 모델 => {'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 3, 'p': 2, 'weights': 'uniform'}


In [17]:
# 각 교차 검증 결과 : cv_results_ => 딕셔너리 => 데이터프레임
# grid.cv_result_
df_score = pd.DataFrame(grid.cv_results_)
df_score

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.001123,0.000209,0.008153,0.003868,3,{'n_neighbors': 3},1.0,0.916667,1.0,0.958333,...,0.966667,0.03118,1,0.947917,1.0,0.958333,0.958333,0.958333,0.964583,0.018162
1,0.001137,7.7e-05,0.006924,0.004162,5,{'n_neighbors': 5},0.958333,0.833333,1.0,0.958333,...,0.95,0.061237,2,0.979167,1.0,0.96875,0.96875,0.96875,0.977083,0.012148
2,0.004064,0.003616,0.005895,0.003124,7,{'n_neighbors': 7},0.916667,0.833333,1.0,1.0,...,0.941667,0.062361,5,0.979167,1.0,0.979167,0.96875,0.979167,0.98125,0.010206
3,0.001089,0.000119,0.009914,0.004753,9,{'n_neighbors': 9},0.958333,0.833333,0.958333,1.0,...,0.95,0.061237,2,0.979167,0.989583,0.979167,0.96875,0.989583,0.98125,0.007795
4,0.002507,0.002944,0.007693,0.00498,11,{'n_neighbors': 11},0.958333,0.875,0.958333,1.0,...,0.95,0.040825,2,0.979167,1.0,0.989583,0.958333,0.958333,0.977083,0.016667


In [18]:
# 제공 속성과 메서드 확인
# 'best_params_'
# 'best_score_'
# 'best_estimator_'
# 'cv_results_'
print(dir(grid))

['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_check_feature_names', '_check_n_features', '_check_refit_for_multimetric', '_estimator_type', '_format_results', '_get_param_names', '_get_tags', '_more_tags', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_required_parameters', '_run_search', '_select_best_index', '_validate_data', '_validate_params', 'best_estimator_', 'best_index_', 'best_params_', 'best_score_', 'classes_', 'cv', 'cv_results_', 'decision_function', 'error_score', 'estimator', 'fit', 'get_params', 'inverse_transform', 'multimetric_', 'n_features_in_', 'n_jobs', 'n_splits_', 'param_grid', 'pre_dispa

In [21]:
# 최적의 모델로 다시 학습
# print('최적의 모델 => ', grid.best_estimator_.get_params())
grid.best_estimator_.fit(Xtr, ytr)
pred = grid.best_estimator_.predict(Xval)


In [22]:
# 성능 평가
from sklearn.metrics import accuracy_score
accuracy_score(yval, pred)

0.9666666666666667