# Ensemble Learning and Randoms Forests

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [3]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

In [4]:
voting_clf = VotingClassifier(
    estimators = [('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)], voting='hard')

voting_clf.fit(X_train, y_train)

In [5]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.904


In [6]:
svm_clf_prob = SVC(probability=True)

In [7]:
voting_clf_prob = VotingClassifier(
    estimators = [('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf_prob)], voting='soft')

voting_clf.fit(X_train, y_train)

In [8]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf_prob, voting_clf_prob):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.912


# Bagging and Pasting

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1)

bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [10]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True)

In [12]:
bag_clf.fit(X_train, y_train)

In [15]:
bag_clf.oob_score_

0.92

In [16]:
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)

In [17]:
accuracy_score(y_test, y_pred)

0.904

In [20]:
bag_clf.oob_decision_function_

array([[0.36411609, 0.63588391],
       [0.4296875 , 0.5703125 ],
       [0.99742268, 0.00257732],
       [0.00787402, 0.99212598],
       [0.0234375 , 0.9765625 ],
       [0.11345646, 0.88654354],
       [0.38242894, 0.61757106],
       [0.05555556, 0.94444444],
       [0.9343832 , 0.0656168 ],
       [0.83244681, 0.16755319],
       [0.56763926, 0.43236074],
       [0.02425876, 0.97574124],
       [0.7486911 , 0.2513089 ],
       [0.82170543, 0.17829457],
       [0.91161616, 0.08838384],
       [0.08205128, 0.91794872],
       [0.02791878, 0.97208122],
       [0.92525773, 0.07474227],
       [0.65796345, 0.34203655],
       [0.95490716, 0.04509284],
       [0.04155844, 0.95844156],
       [0.24164524, 0.75835476],
       [0.86563307, 0.13436693],
       [0.98123324, 0.01876676],
       [0.96124031, 0.03875969],
       [0.00539084, 0.99460916],
       [0.95360825, 0.04639175],
       [0.99744246, 0.00255754],
       [0.02133333, 0.97866667],
       [0.70914127, 0.29085873],
       [0.