In [2]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [6]:
X,y = load_breast_cancer(return_X_y = True)
X_train, X_test, y_train,y_test = train_test_split(X, y, stratify = y, random_state = 1)

In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
svc = SVC(kernel = 'linear', random_state = 1, C = 200)
svc.fit(X_train_scaled, y_train)

SVC(C=200, kernel='linear', random_state=1)

In [8]:
# C: 0.01
pred_train = svc.predict(X_train_scaled)
pred_test = svc.predict(X_test_scaled)

accuracy_score(y_train, pred_train), accuracy_score(y_test, pred_test)

(0.9741784037558685, 0.972027972027972)

In [10]:
#C:1

pred_train = svc.predict(X_train_scaled)
pred_test = svc.predict(X_test_scaled)

accuracy_score(y_train, pred_train), accuracy_score(y_test, pred_test)

(0.9906103286384976, 0.965034965034965)

In [13]:
#C:200

pred_train = svc.predict(X_train_scaled)
pred_test = svc.predict(X_test_scaled)

accuracy_score(y_train, pred_train), accuracy_score(y_test, pred_test)

(0.9976525821596244, 0.965034965034965)

## 커널 서포트 백터 머신 : 비선형 데이터셋에 svm 적용 

In [14]:
from sklearn.svm import SVC

In [29]:
rbf_svc = SVC(kernel = 'rbf',
              C = 1,
              gamma = 0.01,
             random_state = 1,
             probability = True)

In [30]:
rbf_svc.fit(X_train_scaled, y_train)

SVC(C=1, gamma=0.01, probability=True, random_state=1)

In [31]:
pred_train = rbf_svc.predict(X_train_scaled)
pred_test = rbf_svc.predict(X_test_scaled)

In [32]:
accuracy_score(y_train, pred_train), accuracy_score(y_test, pred_test)

(0.9765258215962441, 0.965034965034965)

In [34]:
from sklearn.metrics import recall_score, precision_score
recall_score(y_test,pred_test), precision_score(y_test, pred_test)

(1.0, 0.9473684210526315)

In [38]:
from sklearn.metrics import roc_auc_score, average_precision_score

pos_proba= rbf_svc.predict_proba(X_train_scaled)[:,1]
pos_proba

roc_auc_score(y_train, pos_proba), average_precision_score(y_train, pos_proba)

(0.996019127034603, 0.9969714508587535)

In [42]:
#그리드서치
param = {
    'kernel':['rbf','linear'],
    'C':[0.001,0.01,0.1,1,10,100],
    'gamma':[0.001,0.01,0.1,1,10,100]
}

svc = SVC(random_state = 1, probability = True)
gs_svc = GridSearchCV(svc,
                     param_grid = param,
                     cv = 3,
                     scoring = ['accuracy', 'roc_auc'],
                     refit = 'accuracy',
                     n_jobs = -1)

In [43]:
gs_svc.fit(X_train_scaled,y_train)

GridSearchCV(cv=3, estimator=SVC(probability=True, random_state=1), n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
                         'kernel': ['rbf', 'linear']},
             refit='accuracy', scoring=['accuracy', 'roc_auc'])

In [44]:
gs_svc.best_params_

{'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}

In [45]:
pd.DataFrame(gs_svc.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_accuracy,split1_test_accuracy,split2_test_accuracy,mean_test_accuracy,std_test_accuracy,rank_test_accuracy,split0_test_roc_auc,split1_test_roc_auc,split2_test_roc_auc,mean_test_roc_auc,std_test_roc_auc,rank_test_roc_auc
0,0.043297,0.005226,0.011944,0.000377,0.001,0.001,rbf,"{'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}",0.626761,0.626761,0.626761,0.626761,0.00000,51,0.983676,0.988128,0.987492,0.986432,0.001966,41
1,0.023360,0.001178,0.006991,0.000830,0.001,0.001,linear,"{'C': 0.001, 'gamma': 0.001, 'kernel': 'linear'}",0.901408,0.929577,0.922535,0.917840,0.01197,42,0.986220,0.989824,0.989188,0.988411,0.001571,35
2,0.036687,0.003498,0.011995,0.004019,0.001,0.01,rbf,"{'C': 0.001, 'gamma': 0.01, 'kernel': 'rbf'}",0.626761,0.626761,0.626761,0.626761,0.00000,51,0.979224,0.988552,0.987492,0.985089,0.004170,50
3,0.027142,0.002936,0.006732,0.000422,0.001,0.01,linear,"{'C': 0.001, 'gamma': 0.01, 'kernel': 'linear'}",0.901408,0.929577,0.922535,0.917840,0.01197,42,0.986220,0.989824,0.989188,0.988411,0.001571,35
4,0.045193,0.003728,0.013442,0.002111,0.001,0.1,rbf,"{'C': 0.001, 'gamma': 0.1, 'kernel': 'rbf'}",0.626761,0.626761,0.626761,0.626761,0.00000,51,0.984524,0.982404,0.986008,0.984312,0.001479,54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,0.034296,0.004501,0.004602,0.000403,100,1,linear,"{'C': 100, 'gamma': 1, 'kernel': 'linear'}",0.971831,0.943662,0.957746,0.957746,0.01150,25,0.989612,0.996184,0.972228,0.986008,0.010107,44
68,0.080449,0.004079,0.014992,0.000816,100,10,rbf,"{'C': 100, 'gamma': 10, 'kernel': 'rbf'}",0.626761,0.626761,0.626761,0.626761,0.00000,51,0.762879,0.762985,0.735637,0.753834,0.012867,61
69,0.028671,0.011005,0.005078,0.000170,100,10,linear,"{'C': 100, 'gamma': 10, 'kernel': 'linear'}",0.971831,0.943662,0.957746,0.957746,0.01150,25,0.989612,0.996184,0.972228,0.986008,0.010107,44
70,0.107558,0.001697,0.016198,0.001088,100,100,rbf,"{'C': 100, 'gamma': 100, 'kernel': 'rbf'}",0.626761,0.626761,0.626761,0.626761,0.00000,51,0.500000,0.500000,0.500000,0.500000,0.000000,67


In [46]:
#교차검증
from sklearn.model_selection import cross_val_score
svc2 = SVC(C= 10, gamma = 0.01)
result = cross_val_score(svc2,
                        X_train_scaled,
                        y_train,
                        scoring = 'accuracy',
                        cv = 3)

In [47]:
result

array([0.97887324, 0.99295775, 0.97887324])

In [48]:
np.mean(result)

0.983568075117371