### SVM(Support Vector Machine)

- ML에서 오랫동안 연구되었고 성능이 좋은 모델
- DL보다 성능이 좋은 경우도 있음

In [168]:
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt

### [1] 데이터 준비

In [169]:
digit = load_digits()

In [170]:
# data, target 분리
data = digit['data']
target = digit['target']

In [171]:
print(f'data : {data.shape}, target : {target.shape}')

data : (1797, 64), target : (1797,)


In [172]:
print(f'data[0] : {digit["data"][0]}')
print(f'images[0] : {digit["images"][0]}')

data[0] : [ 0.  0.  5. 13.  9.  1.  0.  0.  0.  0. 13. 15. 10. 15.  5.  0.  0.  3.
 15.  2.  0. 11.  8.  0.  0.  4. 12.  0.  0.  8.  8.  0.  0.  5.  8.  0.
  0.  9.  8.  0.  0.  4. 11.  0.  1. 12.  7.  0.  0.  2. 14.  5. 10. 12.
  0.  0.  0.  0.  6. 13. 10.  0.  0.  0.]
images[0] : [[ 0.  0.  5. 13.  9.  1.  0.  0.]
 [ 0.  0. 13. 15. 10. 15.  5.  0.]
 [ 0.  3. 15.  2.  0. 11.  8.  0.]
 [ 0.  4. 12.  0.  0.  8.  8.  0.]
 [ 0.  5.  8.  0.  0.  9.  8.  0.]
 [ 0.  4. 11.  0.  1. 12.  7.  0.]
 [ 0.  2. 14.  5. 10. 12.  0.  0.]
 [ 0.  0.  6. 13. 10.  0.  0.  0.]]


### [2] 학습용 & 테스트용

In [173]:
train_input, test_input, train_target, test_target = train_test_split(data, target,
                                                                     random_state=42)

### [3] - 1 모델 생성 (로지스틱)

In [174]:
lr = LogisticRegression(max_iter=5000)

In [175]:
lr.fit(train_input, train_target)

LogisticRegression(max_iter=5000)

In [176]:
lr.score(train_input, train_target)

1.0

### [4] - 1 예측 (로지스틱)

In [177]:
print(lr.predict([digit['data'][1]]))

[1]


### [3] - 2 모델  생성 (SVC)

In [178]:
# model 학습
model = SVC().fit(train_input, train_target)
model

SVC()

In [179]:
model.score(train_input, train_target)

0.9962880475129918

In [180]:
model.score(test_input, test_target)

0.9866666666666667

### [4] - 2 예측 (SVC)

In [181]:
print(model.predict([digit['data'][2]]))

[2]


### [5] 성능 튜닝 ( 스케일링)

In [182]:
from sklearn.preprocessing import MinMaxScaler

In [183]:
# 훈련데이터에 대해 스케일링 적용을 위한 조사
scaler = MinMaxScaler()
scaler.fit(train_input)

# 훈련데이터에 대해 스케일링 적용
train_input_scaled = scaler.transform(train_input)

# 테스트데이터에 대해 스케일링 적용
test_input_scaled = scaler.transform(test_input)

In [184]:
# gamma 파라미저 조정
model = SVC(C=5, gamma=0.1).fit(train_input_scaled, train_target)

# 평가
print("훈련 세트 정확도: {:.3f}".format(model.score(train_input_scaled, train_target)))
print("테스트 세트 정확도: {:.3f}".format(model.score(test_input_scaled, test_target)))

훈련 세트 정확도: 0.999
테스트 세트 정확도: 0.991


### [6] 모델 . predict

In [185]:
print(model.predict(scaler.transform([digit['data'][7]])))

[7]


### C : 5 , gamma = 0.1 일때 정확도 가장 높음

In [167]:
#RBF 커널 SVM, C와 gamma 퍼러미터 사용
for thisGamma in [.1, .25, .5, 1]:
    for thisC in [1,5,10,20,40,100]:
        model3=SVC(kernel="rbf", C=thisC, 
                   gamma=thisGamma).fit(train_input_scaled, train_target)
        m3train=model3.score(train_input_scaled,train_target)
        m3test=model3.score(test_input_scaled,test_target)
        print("RBF SVM : C:{}, gamma:{},training score:{:2f},test score:{:2f} \n".format(thisC, thisGamma, m3train, m3test))

RBF SVM : C:1, gamma:0.1,training score:0.996288,test score:0.986667 

RBF SVM : C:5, gamma:0.1,training score:0.999258,test score:0.991111 

RBF SVM : C:10, gamma:0.1,training score:1.000000,test score:0.988889 

RBF SVM : C:20, gamma:0.1,training score:1.000000,test score:0.988889 

RBF SVM : C:40, gamma:0.1,training score:1.000000,test score:0.988889 

RBF SVM : C:100, gamma:0.1,training score:1.000000,test score:0.988889 

RBF SVM : C:1, gamma:0.25,training score:0.997773,test score:0.988889 

RBF SVM : C:5, gamma:0.25,training score:1.000000,test score:0.991111 

RBF SVM : C:10, gamma:0.25,training score:1.000000,test score:0.991111 

RBF SVM : C:20, gamma:0.25,training score:1.000000,test score:0.991111 

RBF SVM : C:40, gamma:0.25,training score:1.000000,test score:0.991111 

RBF SVM : C:100, gamma:0.25,training score:1.000000,test score:0.991111 

RBF SVM : C:1, gamma:0.5,training score:1.000000,test score:0.988889 

RBF SVM : C:5, gamma:0.5,training score:1.000000,test score:0