# 软投票和硬投票

## 导入相关的包

In [10]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier


In [2]:
# 实验数据

from sklearn.datasets import load_breast_cancer

bc = load_breast_cancer()
y = bc.target
X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)

# 转化为df
df = X
df['target'] = y

In [5]:
# 只取前3个特征
cols = [
    'mean radius',
    'mean texture',
    'mean perimeter',
]

X_train, X_test, y_train, y_test = train_test_split(X[cols],
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42,
                                                    stratify=y)

In [11]:
clf1 = KNeighborsClassifier(n_neighbors=1)

clf2 = LogisticRegression(solver='liblinear', C=0.05, random_state=42)

clf3 = DecisionTreeClassifier(max_depth=1, random_state=42)

pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
pipe2 = Pipeline([['sc', StandardScaler()], ['clf', clf2]])

clf_labels = [
    'KNN',
    'Logistic',
    'Decision tree',
]

voter = VotingClassifier(estimators=[('knn', pipe1), ('lr', pipe2),
                                     ('dc', clf3)],
                         voting='soft',
                         weights=[1, 1, 1])

In [12]:
for clf, label in zip([pipe1, pipe2, clf3, voter],
                      clf_labels + ['Soft Voting']):
    scores = cross_val_score(estimator=clf,
                             X=X_train,
                             y=y_train,
                             cv=10,
                             scoring='roc_auc')
    print("AUC:{:.2} (+/- {:.2}) [{}]".format(scores.mean(), scores.std(),
                                              label))

AUC:0.87 (+/- 0.035) [KNN]
AUC:0.95 (+/- 0.031) [Logistic]
AUC:0.86 (+/- 0.049) [Decision tree]
AUC:0.95 (+/- 0.024) [Soft Voting]
