# 一、导入数据

In [None]:
import numpy as np
import pandas as pd

In [None]:
data = pd.read_excel("input/data_.xlsx",index_col=0)

In [None]:
data_ = data.copy()
data_

In [None]:
data_.info()

# 二、设置变量

In [None]:
features = data_.drop(columns=['Class','Thickness_class','Cell Size'])
features

In [None]:
target = data_.Class.replace(to_replace = [2, 4], value = [0,1])
target

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
transfer = StandardScaler()

In [None]:
features_ = transfer.fit_transform(features)

In [None]:
features_

# 三、拆分数据集

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train_features, test_features, train_target, test_target = train_test_split(
    features, target, 
    test_size = 145, random_state = 99)

In [None]:
print(train_features.shape)
print(train_target.shape)
print(test_features.shape)
print(test_target.shape)

# 四、建立模型

In [None]:
from sklearn.svm import SVC

In [None]:
svc_linear = SVC(kernel = "linear")

In [None]:
svc_linear.fit(train_features, train_target)

In [None]:
svc_linear.coef_

In [None]:
svc_linear.intercept_

In [None]:
test_predict = svc_linear.predict(test_features)
test_predict

In [None]:
svc_linear.score(test_features,test_target)

In [None]:
svc_poly = SVC(kernel="poly",degree=3,coef0=0.2)

In [None]:
svc_poly.fit(train_features, train_target)

In [None]:
svc_poly.coef_

In [None]:
svc_poly.intercept_

In [None]:
test_predict = svc_poly.predict(test_features)
test_predict

In [None]:
svc_poly.score(test_features,test_target)

In [None]:
svc_rbf = SVC(kernel="rbf")

In [None]:
svc_rbf.fit(train_features, train_target)

In [None]:
svc_rbf.coef_

In [None]:
svc_rbf.intercept_

In [None]:
test_predict = svc_rbf.predict(test_features)
test_predict

In [None]:
svc_rbf.score(test_features,test_target)

# 五、查看模型效能

In [None]:
from sklearn.metrics import confusion_matrix, roc_curve,accuracy_score

In [None]:
accuracy_score(test_target,test_predict)

In [None]:
confusion_matrix(test_target,test_predict)

In [None]:
import seaborn as sns

In [None]:
sns.heatmap(confusion_matrix(test_target,test_predict),
           xticklabels=['B','M'],
           yticklabels=['B','M'])

In [None]:
y_score = svc_rbf.decision_function(test_features)

In [None]:
fpr, tpr, thresholds = roc_curve(test_target, y_score)

In [None]:
import matplotlib.pyplot as plt
plt.figure()
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve')
plt.show()

In [None]:
from sklearn.metrics import plot_confusion_matrix, plot_roc_curve, plot_precision_recall_curve

In [None]:
plot_confusion_matrix(svc_rbf, test_features, test_target)

In [None]:
plot_roc_curve(svc_rbf, test_features, test_target)

In [None]:
plot_precision_recall_curve(svc_rbf, test_features, test_target)

# 六、知识拓展

In [None]:
Cs = np.logspace(-3,2,200)

In [None]:
acc_=[]
for C_ in Cs:
    svc_rbf = SVC(kernel="rbf",C=C_)
    svc_rbf.fit(train_features, train_target)
    acc_.append(svc_rbf.score(test_features, test_target))

In [None]:
acc_

In [None]:
len(acc_)

In [None]:
plt.plot(Cs, acc_)
plt.xscale('log')
plt.xlabel('C')
plt.ylabel('acc')
plt.show()

In [None]:
from sklearn.metrics import auc

In [None]:
Cs = np.logspace(-3,2,200)
aucs_=[]
for C_ in Cs:
    svc_rbf = SVC(kernel="rbf",C=C_)
    svc_rbf.fit(train_features, train_target)
    y_score = svc_rbf.decision_function(test_features)
    fpr, tpr, thresholds = roc_curve(test_target, y_score)
    auc_ = auc(fpr, tpr)
    aucs_.append(auc_)

In [None]:
aucs_

In [None]:
len(aucs_)

In [None]:
plt.plot(Cs, aucs_)
plt.xscale('log')
plt.xlabel('C')
plt.ylabel('auc')
plt.show()