### SVM을 적용한 분류 예측.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.datasets import load_iris
warnings.filterwarnings(action='ignore')                  # Turn off the warnings.
%matplotlib inline

#### 1. 데이터를 읽어온다.

In [2]:
# 데이터 가져오기.
data = load_iris()

In [3]:
# 설명 변수.
X = data['data']
columns = list(data['feature_names'])
print(columns)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [4]:
# 반응 변수.
Y = data['target']
labels = list(data['target_names'])
print(labels)

['setosa', 'versicolor', 'virginica']


In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)

#### 2. SVM 하이퍼 파라미터 최적화 (RBF kernel).

C     : Penalty parameter. <br>
gamma : kernel parameter ($\gamma$).

In [7]:
C_grid = 0.02*np.arange(1,20)
gamma_grid = 0.02*np.arange(1,50)
parameters = {'C': C_grid, 'gamma' : gamma_grid}
gridCV = GridSearchCV(SVC(kernel='rbf'), parameters, cv=10)              # "n_jobs = -1" means "use all the CPU cores".
gridCV.fit(X_train, Y_train)
best_C = gridCV.best_params_['C']
best_gamma = gridCV.best_params_['gamma']

In [8]:
print("SVM best C : " + str(best_C))
print("SVM best gamma : " + str(best_gamma))

SVM best C : 0.2
SVM best gamma : 0.78


In [9]:
# 최적화된 예측.
# SVM_best = SVC(kernel='rbf', C=best_C,gamma=best_gamma)            # 최적의 하이퍼 파라미터를 사용해서 학습객체 재생성.
# SVM_best.fit(X_train, Y_train);
SVM_best = gridCV.best_estimator_                                    # 교차검증의 결과인 최적의 학습객체 사용.
Y_pred = SVM_best.predict(X_test)
print( "SVM best accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

SVM best accuracy : 0.978


#### 3. SVM 하이퍼 파라미터 최적화 (Polynomial kernel):

In [12]:
C_grid = 0.0001*np.arange(1,30)
gamma_grid = 0.01*np.arange(1,30)
parameters = {'C': C_grid, 'gamma' : gamma_grid}
gridCV = GridSearchCV(SVC(kernel='poly'), parameters, cv=10)              # "n_jobs = -1" means "use all the CPU cores".
gridCV.fit(X_train, Y_train)
best_C = gridCV.best_params_['C']
best_gamma = gridCV.best_params_['gamma']

In [13]:
print("SVM best C : " + str(best_C))
print("SVM best gamma : " + str(best_gamma))

SVM best C : 0.0007
SVM best gamma : 0.27


In [14]:
# 최적화된 예측.
# SVM_best = SVC(kernel='poly', C=best_C,gamma=best_gamma)           # 최적의 하이퍼 파라미터를 사용해서 학습객체 재생성.
# SVM_best.fit(X_train, Y_train);
SVM_best = gridCV.best_estimator_                                    # 교차검증의 결과인 최적의 학습객체 사용.
Y_pred = SVM_best.predict(X_test)
print( "SVM best accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

SVM best accuracy : 0.956
