# chapter 7 앙상블 학습과 랜덤 포레스트

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

In [6]:

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [7]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()), ('svc', SVC())])

In [11]:
from sklearn.metrics import accuracy_score

In [13]:
for clf in (log_clf,rnd_clf,svm_clf,voting_clf ) : 
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.888
SVC 0.896
VotingClassifier 0.912


### 투표 기반 분류기가 가장 성능이 높음을 확인할 수 있습니다.

## 7.2.1 사이킷런의 배깅과 페이스팅

In [14]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [26]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators = 500,
    max_samples = 100, bootstrap = True, n_jobs=-1,oob_score=True)



In [27]:
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)

In [29]:
bag_clf.oob_score_

0.9253333333333333

## 7.4.2 특성 중요도

In [31]:
from sklearn.datasets import load_iris

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500,n_jobs=-1)


In [34]:
rnd_clf.fit(iris["data"],iris["target"])

RandomForestClassifier(n_estimators=500, n_jobs=-1)

In [37]:
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_) :
    print(name, score)

sepal length (cm) 0.10119840775131471
sepal width (cm) 0.02401539689142437
petal length (cm) 0.4298269006247967
petal width (cm) 0.44495929473246415


## 7.5.2 그래디언트 부스팅

In [48]:
np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)

In [38]:
from sklearn.tree import DecisionTreeRegressor


In [49]:
tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X,y)

DecisionTreeRegressor(max_depth=2)

In [50]:
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X,y2)

DecisionTreeRegressor(max_depth=2)

In [51]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X,y3)

DecisionTreeRegressor(max_depth=2)

In [52]:
import numpy as np
X_new = np.array([[0.8]])

In [53]:
y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))

In [54]:
y_pred

array([0.75026781])