## 9.1 模型融合示例

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd

In [1]:
# 3个模型的预测结果
predict1 = [1.2, 3.2, 2.1, 6.2]
predict2 = [0.9, 3.1, 2.0, 5.9]
predict3 = [1.1, 2.9, 2.2, 6.0]

In [2]:
# 真实值
y_true = [1, 3, 2, 6]

In [5]:
# 计算每个模型的MAE
print("predict1 MAE：", mean_absolute_error(y_true, predict1))
print("predict2 MAE：", mean_absolute_error(y_true, predict2))
print("predict3 MAE：", mean_absolute_error(y_true, predict3))

predict1 MAE： 0.1750000000000001
predict2 MAE： 0.07499999999999993
predict3 MAE： 0.10000000000000009


In [6]:
# 定义加权函数
def weighted_meathod(predict1, predict2, predict3, w=[1/3, 1/3, 1/3]):
    res = w[0]*pd.Series(predict1) + w[1]*pd.Series(predict2) + w[2]*pd.Series(predict3)
    return res

In [9]:
w = [1/3, 1/3, 1/3]
weighted_predict = weighted_meathod(predict1, predict2, predict3, w)
print("predict weighted MAE：", mean_absolute_error(y_true, weighted_predict))

predict weighted MAE： 0.0666666666666666


In [10]:
w = [0.3, 0.4, 0.3]
weighted_predict = weighted_meathod(predict1, predict2, predict3, w)
print("predict weighted MAE：", mean_absolute_error(y_true, weighted_predict))

predict weighted MAE： 0.05750000000000027


In [11]:
# 计算每个模型的MSE
print("predict1 MSE：", mean_squared_error(y_true, predict1))
print("predict2 MSE：", mean_squared_error(y_true, predict2))
print("predict3 MSE：", mean_squared_error(y_true, predict3))

predict1 MSE： 0.032500000000000036
predict2 MSE： 0.007499999999999985
predict3 MSE： 0.015000000000000027


In [12]:
w = [0.3, 0.4, 0.3]
weighted_predict = weighted_meathod(predict1, predict2, predict3, w)
print("predict weighted MSE：", mean_squared_error(y_true, weighted_predict))

predict weighted MSE： 0.00397500000000003


**三个臭皮匠，顶个诸葛亮**

## 9.2 iris分类模型融合 

#### 1 加载数据

In [25]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import cross_val_score

In [15]:
iris = load_iris()
x = iris.data
y = iris.target

In [16]:
x.shape, y.shape

((150, 4), (150,))

#### 2 数据集切分

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

#### 3 三种模型训练

In [33]:
model1 = LogisticRegression(C=0.1)
model2 = SVC(C=0.1, probability=True) # 软投票时，需要设置probability=True
model3 = RandomForestClassifier(n_estimators=10, max_depth=2)

#### 4 硬投票

In [24]:
ensemble_model = VotingClassifier(estimators=[("LR", model1), ("SVC", model2), ("RF", model3)])

In [32]:
for model , label in zip([model1, model2, model3, ensemble_model], ["LR", "SVC", "RF", "Voting"]):
    scores = cross_val_score(model, x, y, cv=5, scoring="accuracy")
    print("Accuarcy: {:.2f} (+/- {:.2f}) [{}]".format(scores.mean(), scores.std(), label))

Accuarcy: 0.95 (+/- 0.05) [LR]
Accuarcy: 0.92 (+/- 0.03) [SVC]
Accuarcy: 0.95 (+/- 0.03) [RF]
Accuarcy: 0.95 (+/- 0.03) [Voting]
