# ***模型融合***

In [1]:
import pandas as pd

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier, VotingRegressor

In [2]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

## **分类模型融合——基于Voting**

In [3]:
# 加载数据集
iris = pd.read_excel("data\\iris.xlsx")
X = iris.drop("class", axis=1)
y = iris['class']

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
# 初始化分类器
svc = SVC(kernel='linear', probability=True)
dtc = DecisionTreeClassifier()
gnb = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=3)
lr = LogisticRegression()

# 创建VotingClassifier，设置voting为'soft'表示使用概率平均
eclf = VotingClassifier(estimators=[
    ('svc', svc), 
    ('dtc', dtc), 
    ('gnb', gnb), 
    ('knn', knn), 
    ('lr', lr)], 
    voting='soft')

# 训练模型
predictions = []
for model in [svc, dtc, gnb, knn, lr, eclf]:
    y_pred = model.fit(X_train, y_train).predict(X_test)
    predictions.append(y_pred)
    print(accuracy_score(y_test, y_pred))

1.0
1.0
0.9777777777777777
1.0
1.0
1.0


## **回归模型——基于Voting**

In [5]:
# 加载数据集
boston = pd.read_excel("data\\boston_house_prices.xlsx")
X = boston.drop("MEDV", axis=1)
y = boston['MEDV']

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [6]:
# 初始化分类器
svm = SVR()
dtc = DecisionTreeRegressor()
knn = KNeighborsRegressor(n_neighbors=3)
lr = LinearRegression()

# 创建VotingClassifier，设置voting为'soft'表示使用概率平均
ereg = VotingRegressor(estimators=[
    ('svm', svm), 
    ('dtc', dtc), 
    ('knn', knn), 
    ('lr', lr)])

# 训练模型
predictions = []
for model in [svm, dtc, knn, lr, ereg]:
    y_pred = model.fit(X_train, y_train).predict(X_test)
    predictions.append(y_pred)
    print(mean_squared_error(y_test, y_pred))

53.50370874750568
16.723157894736843
28.149334795321632
21.51744423117709
15.779652809696751
