### 데이터 불러오기

In [2]:
from sklearn.datasets import load_wine
raw_wine = load_wine()

### 피처, 타깃 데이터 지정

In [3]:
X = raw_wine.data
y = raw_wine.target

### 트레이닝/테스트 데이터 분할

In [5]:
from sklearn.model_selection import train_test_split
X_tn, X_te, y_tn, y_te = train_test_split(X, y, random_state=0)

### 데이터 표준화

In [7]:
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std = std_scale.transform(X_te)

### 그리드 서치

In [9]:
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV

param_grid = {'kernel' : ('linear', 'rbf'),
              'C' : [0.5, 1, 10, 100]}
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
svc = SVC(random_state=0)
grid_cv = GridSearchCV(svc, param_grid, cv=kfold, scoring='accuracy')
grid_cv.fit(X_tn_std, y_tn)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=0, shuffle=True),
             estimator=SVC(random_state=0),
             param_grid={'C': [0.5, 1, 10, 100], 'kernel': ('linear', 'rbf')},
             scoring='accuracy')

### 그리드 서치 결과 확인

In [11]:
grid_cv.cv_results_

{'mean_fit_time': array([0.0011991 , 0.00119915, 0.00059786, 0.00100083, 0.00039992,
        0.0008019 , 0.00059891, 0.00091014]),
 'std_fit_time': array([3.99425417e-04, 3.99764661e-04, 4.88170005e-04, 2.18722247e-06,
        4.89805021e-04, 4.00952278e-04, 4.89009769e-04, 1.79103845e-04]),
 'mean_score_time': array([0.00040145, 0.0004034 , 0.00019999, 0.00039921, 0.00039997,
        0.00039926, 0.0002296 , 0.00060015]),
 'std_score_time': array([0.00049167, 0.00049409, 0.00039997, 0.00048893, 0.00048986,
        0.00048899, 0.00045919, 0.00049002]),
 'param_C': masked_array(data=[0.5, 0.5, 1, 1, 10, 10, 100, 100],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf', 'linear', 'rbf',
                    'linear', 'rbf'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=ob

### 그리드 서치 결과 확인(DataFrame)

In [14]:
import numpy as np
import pandas as pd
np.transpose(pd.DataFrame(grid_cv.cv_results_))

Unnamed: 0,0,1,2,3,4,5,6,7
mean_fit_time,0.001199,0.001199,0.000598,0.001001,0.0004,0.000802,0.000599,0.00091
std_fit_time,0.000399,0.0004,0.000488,0.000002,0.00049,0.000401,0.000489,0.000179
mean_score_time,0.000401,0.000403,0.0002,0.000399,0.0004,0.000399,0.00023,0.0006
std_score_time,0.000492,0.000494,0.0004,0.000489,0.00049,0.000489,0.000459,0.00049
param_C,0.5,0.5,1,1,10,10,100,100
param_kernel,linear,rbf,linear,rbf,linear,rbf,linear,rbf
params,"{'C': 0.5, 'kernel': 'linear'}","{'C': 0.5, 'kernel': 'rbf'}","{'C': 1, 'kernel': 'linear'}","{'C': 1, 'kernel': 'rbf'}","{'C': 10, 'kernel': 'linear'}","{'C': 10, 'kernel': 'rbf'}","{'C': 100, 'kernel': 'linear'}","{'C': 100, 'kernel': 'rbf'}"
split0_test_score,0.888889,0.962963,0.888889,0.925926,0.888889,0.925926,0.888889,0.925926
split1_test_score,0.962963,1.0,0.962963,0.962963,0.962963,0.962963,0.962963,0.962963
split2_test_score,0.925926,0.962963,0.925926,0.962963,0.925926,0.962963,0.925926,0.962963


### 베스트 스코어 & 하이퍼파라미터

In [21]:
grid_cv.best_score_

0.9774928774928775

In [17]:
grid_cv.best_params_

{'C': 0.5, 'kernel': 'rbf'}

### 최종 모형

In [22]:
clf = grid_cv.best_estimator_

In [23]:
clf

SVC(C=0.5, random_state=0)

### 크로스 밸리데이션 스코어 확인(1)

In [24]:
from sklearn.model_selection import cross_validate
metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
cv_scores = cross_validate(clf, X_tn_std, y_tn, cv=kfold, scoring=metrics)
cv_scores

{'fit_time': array([0.00200033, 0.00094652, 0.00099635, 0.00099754, 0.00100589]),
 'score_time': array([0.00205231, 0.00200367, 0.0019989 , 0.00200248, 0.00100017]),
 'test_accuracy': array([0.96296296, 1.        , 0.96296296, 0.96153846, 1.        ]),
 'test_precision_macro': array([0.96296296, 1.        , 0.96969697, 0.96969697, 1.        ]),
 'test_recall_macro': array([0.96666667, 1.        , 0.96296296, 0.95833333, 1.        ]),
 'test_f1_macro': array([0.9628483 , 1.        , 0.96451914, 0.96190476, 1.        ])}

### 크로스 벨리데이션 스코어 확인(2)

In [25]:
from sklearn.model_selection import cross_val_score
cv_score = cross_val_score(clf, X_tn_std, y_tn, cv=kfold, scoring='accuracy')
cv_score

array([0.96296296, 1.        , 0.96296296, 0.96153846, 1.        ])

In [26]:
cv_score.mean()

0.9774928774928775

In [27]:
cv_score.std()

0.01838434849561446

### 예측

In [29]:
pred_svm = clf.predict(X_te_std)
pred_svm

array([0, 2, 1, 0, 1, 1, 0, 2, 1, 1, 2, 2, 0, 1, 2, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 2, 0, 0, 1, 0, 0, 0, 2, 1, 1, 2, 0, 0, 1, 1,
       1])

### 정확도

In [31]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_te, pred_svm)
accuracy

1.0

### Confusion Matrix

In [33]:
from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(y_te, pred_svm)
conf_mat

array([[16,  0,  0],
       [ 0, 21,  0],
       [ 0,  0,  8]], dtype=int64)

### Classification Report

In [35]:
from sklearn.metrics import classification_report
class_report = classification_report(y_te, pred_svm)
print(class_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00         8

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

