In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 原理
## 加载数据集

In [None]:
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.3, random_state=42)
plt.scatter(X[y == 0, 0], X[y == 0, 1])
plt.scatter(X[y == 1, 0], X[y == 1, 1])
plt.show()

## 处理数据集

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 划分
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
# 标准化
std_scaler = StandardScaler()
std_scaler.fit(X_train, y_train)
X_train = std_scaler.transform(X_train)
X_test = std_scaler.transform(X_test)


def fit(model):
    model.fit(X_train, y_train)
    print(model, ':', model.score(X_test, y_test))
    return model

## 训练模型

In [None]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

svm_clf = fit(SVC())
log_clf = fit(LogisticRegression())
dt_clf  = fit(DecisionTreeClassifier(random_state=666))

## 投票预测

In [None]:
y_predict1 = svm_clf.predict(X_test)
y_predict2 = log_clf.predict(X_test)
y_predict3 = dt_clf.predict(X_test)

In [None]:
y_predict = np.array((y_predict1 + y_predict2 + y_predict3) >= 2, dtype='int')  # 投票: 少数服从多数

In [None]:
""" 分类问题评价指标:
(1) 准确度： accuracy_score
(2) 精确度： precision_score   <--- 下面3个指标，用于偏斜数据集
(3) 召回率： recall_score
(4) F1值 ：  f1_score
"""
from sklearn.metrics import accuracy_score

print('支持向量机:', accuracy_score(y_test, y_predict1))
print('逻辑回归  :', accuracy_score(y_test, y_predict2))
print('决策树    :', accuracy_score(y_test, y_predict3))
print('集成学习  :', accuracy_score(y_test, y_predict))  # 准确度提高了

# Voting
- 单模型，每个算法关注的都是同一份数据，然后对结果进行投票表决

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
def VotingClassifierTest(voting):
    def VotingClassifierPrivate(svm_clf, log_clf, dt_clf, voting):
        # 1.创建集成学习分类器
        voting_clf = VotingClassifier(estimators=[
            ('支持向量机:', svm_clf),
            ('逻辑回归:', log_clf),
            ('决策树:', dt_clf)
        ], voting=voting)

        # 2.训练
        voting_clf.fit(X_train, y_train)

        # 3.预测
        print(voting_clf.score(X_test, y_test))

    if voting == 'soft':
        svm_clf = SVC(probability=True)  # 先计算概率
        VotingClassifierPrivate(svm_clf, log_clf, dt_clf, voting='soft')
    else:
        svm_clf = SVC(probability=False)
        VotingClassifierPrivate(svm_clf, log_clf, dt_clf, voting='hard')

## Hard Voting
- **投票:** 少数服从多数

In [None]:
VotingClassifierTest(voting='hard')

## Soft Voting
- **投票:** 引入权重，要求`模型能计算概率`

In [None]:
VotingClassifierTest(voting='soft')