# 乳癌資料庫預測SVM分類
>使用scikit-learn 機器學習套件裡的SVR演算法

* (一)引入函式庫及內建乳癌資料集<br>
引入之函式庫如下<br>
sklearn.datasets: 用來匯入內建之乳癌資料集`datasets.load_breast_cancer()`<br>
sklearn.SVR: 支持向量機回歸分析之演算法<br>
matplotlib.pyplot: 用來繪製影像

In [1]:
from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Step1. 下載資料

In [2]:
breast_cancer=datasets.load_breast_cancer()

In [3]:
breast_cancer.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
#import pandas as pd
#x = pd.DataFrame(breast_cancer['data'],columns=breast_cancer['feature_names'])
#y = pd.DataFrame(breast_cancer['target'], columns=['target'])

In [5]:
x = breast_cancer.data
x

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [6]:
y = breast_cancer.target
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

## Step2. 區分訓練集與測試集

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.3)

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler().fit(x_train)  ##注意StandardScaler要()
x_train_std = sc.transform(x_train)
sc = StandardScaler().fit(x_test)
x_test_std = sc.transform(x_test)

## Step3. 建模

In [13]:
clf = svm.SVR(kernel='rbf')  ##用SVC比較好
clf.fit(x_train_std,y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

#     Step4. 預測

```

```


In [14]:
clf.predict(x_test_std)

array([ 0.89397517,  1.02862261,  0.91000053,  0.57563705,  1.13131186,
        1.09238172,  0.90750992,  0.86136737,  0.12759189,  0.92659816,
       -0.12443107,  0.10659193,  1.1794774 ,  0.18026584,  1.11049469,
       -0.19274666,  1.03455911,  0.09980868,  0.79142773,  0.32779859,
        0.89878132,  1.07288326,  0.83197057,  1.17125517,  1.05241106,
        0.97333848,  1.00042635,  0.57006468,  0.35795392,  0.09712401,
       -0.04851627,  0.06860115,  0.71830606,  0.2137721 ,  1.04782541,
        0.63727114,  0.85491896,  0.84081989,  0.72571831,  0.83038937,
        0.962619  ,  0.70437533,  0.97588615,  0.96493724,  1.11578709,
        0.14631652,  0.04791028,  0.2232224 ,  0.13842719,  0.93576926,
        0.5631174 ,  0.19745715,  0.98922978,  0.28616739,  0.10142319,
        0.07795303,  0.81694046,  1.05090497,  1.12228525,  1.11135414,
       -0.0303576 ,  1.00239144,  0.2350096 ,  0.84991389,  0.90175028,
        1.09083387,  1.12547782, -0.08170198,  0.83401902,  0.70

In [24]:
y_test

array([1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1])

In [26]:
print(clf.predict(x_test_std).shape)
print(y_test.shape)

(171,)
(171,)


## Step5. 準確度分析

In [30]:
predictions2 = clf.predict(x_train_std)
predictions = clf.predict(x_test_std)
error_index2=np.where(predictions2!=y_train)[0]
print('train accuracy:',1-len(error_index2)/len(x_train))
error_index=np.where(predictions!=y_test)[0]
print('test accuracy:',1-len(error_index)/len(x_test))
##得出的結果 非0,1

train accuracy: 0.0
test accuracy: 0.0


In [15]:
clf.score(x_train_std,y_train)

0.916679036184159

In [16]:
clf.score(x_test_std,y_test)

0.849144715965429