# iris DataSet으로 분류 
- 다중 클래스 분류
- svm
    - GridSearch를 이용해서 최적의 C, gamma 
    - 평가지표: 정확도

In [1]:
from sklearn.datasets import load_iris
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


##### 데이터셋 로드 및 train/test set 나누기

In [2]:
X, y = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y,
                                                    random_state=930)

##### 파이프라인 생성

In [3]:
order = [
    ('scaler', StandardScaler()),
    ('svc', SVC(random_state=0))
]
pipeline = Pipeline(order, verbose=True)

##### GridSearchCV 생성 및 학습

In [6]:
param = {
    'svc__kernel': ['rbf', 'linear'],
    'svc__C':[0.001, 0.01, 0.1, 1, 10, 100],
    'svc__gamma':[0.001, 0.01, 0.1, 1, 10]
}

gs = GridSearchCV(pipeline,
                  param,
                  scoring='accuracy',
                  cv=5,
                  n_jobs=-1)

In [7]:
gs.fit(X_train, y_train)

[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
[Pipeline] ............... (step 2 of 2) Processing svc, total=   0.0s


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))],
                                verbose=True),
             n_jobs=-1,
             param_grid={'svc__C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'svc__gamma': [0.001, 0.01, 0.1, 1, 10],
                         'svc__kernel': ['rbf', 'linear']},
             scoring='accuracy')

#####  결과확인

In [8]:
gs.best_params_

{'svc__C': 1, 'svc__gamma': 0.001, 'svc__kernel': 'linear'}

In [9]:
gs.best_score_

0.9735177865612649

In [10]:
import pandas as pd
df = pd.DataFrame(gs.cv_results_)
df.sort_values('rank_test_score').head(10)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_svc__C,param_svc__gamma,param_svc__kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
31,0.001596,0.000489,0.000399,0.000489,1,0.001,linear,"{'svc__C': 1, 'svc__gamma': 0.001, 'svc__kerne...",1.0,0.913043,0.954545,1.0,1.0,0.973518,0.034989,1
33,0.002194,0.000977,0.000599,0.000489,1,0.01,linear,"{'svc__C': 1, 'svc__gamma': 0.01, 'svc__kernel...",1.0,0.913043,0.954545,1.0,1.0,0.973518,0.034989,1
35,0.001396,0.000489,0.000598,0.000489,1,0.1,linear,"{'svc__C': 1, 'svc__gamma': 0.1, 'svc__kernel'...",1.0,0.913043,0.954545,1.0,1.0,0.973518,0.034989,1
37,0.001997,6e-06,0.000199,0.000399,1,1.0,linear,"{'svc__C': 1, 'svc__gamma': 1, 'svc__kernel': ...",1.0,0.913043,0.954545,1.0,1.0,0.973518,0.034989,1
39,0.001197,0.000399,0.000399,0.000489,1,10.0,linear,"{'svc__C': 1, 'svc__gamma': 10, 'svc__kernel':...",1.0,0.913043,0.954545,1.0,1.0,0.973518,0.034989,1
52,0.002194,0.000977,0.000399,0.000488,100,0.01,rbf,"{'svc__C': 100, 'svc__gamma': 0.01, 'svc__kern...",1.0,0.869565,0.954545,1.0,1.0,0.964822,0.050778,6
49,0.001995,0.000631,0.000399,0.000489,10,10.0,linear,"{'svc__C': 10, 'svc__gamma': 10, 'svc__kernel'...",1.0,0.913043,0.909091,1.0,1.0,0.964427,0.043586,7
47,0.001795,0.000399,0.000399,0.000489,10,1.0,linear,"{'svc__C': 10, 'svc__gamma': 1, 'svc__kernel':...",1.0,0.913043,0.909091,1.0,1.0,0.964427,0.043586,7
45,0.002194,0.000746,0.0,0.0,10,0.1,linear,"{'svc__C': 10, 'svc__gamma': 0.1, 'svc__kernel...",1.0,0.913043,0.909091,1.0,1.0,0.964427,0.043586,7
41,0.001396,0.000489,0.000598,0.000489,10,0.001,linear,"{'svc__C': 10, 'svc__gamma': 0.001, 'svc__kern...",1.0,0.913043,0.909091,1.0,1.0,0.964427,0.043586,7


##### test set으로 최종 평가

In [13]:
pred_test = gs.predict(X_test)
accuracy_score(y_test, pred_test)

0.9473684210526315