# Voting

In [5]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [6]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [3]:
voting_clf = VotingClassifier(
    [
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(random_state=42))
    ]
)

In [4]:
voting_clf.fit(X_train, y_train)

In [7]:
for name, clf in voting_clf.named_estimators_.items():
    print(name, '=', clf.score(X_test, y_test))

lr = 0.864
rf = 0.896
svc = 0.896


In [8]:
voting_clf.predict(X_test[:1])

array([1])

In [12]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

[array([1]), array([1]), array([0])]

In [13]:
voting_clf.score(X_test, y_test)

0.912

In [14]:
voting_clf.voting = 'soft'
voting_clf.named_estimators['svc'].probability = True
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.92

# Bagging and Pasting

In [15]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [16]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, n_jobs=-1, random_state=42
)

In [17]:
bag_clf.fit(X_train, y_train)

In [18]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, n_jobs=-1, random_state=42,
    oob_score=True
)
bag_clf.fit(X_train, y_train)

In [19]:
bag_clf.oob_score_

0.9253333333333333

In [20]:
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.904

In [21]:
bag_clf.oob_decision_function_[:3]

array([[0.35579515, 0.64420485],
       [0.43513514, 0.56486486],
       [1.        , 0.        ]])

# Random Forest

In [22]:
from sklearn.ensemble import RandomForestClassifier

In [24]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16,
                                 n_jobs=-1, random_state=42)
rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)

## Using Random Forests for Feature Importances

In [25]:
from sklearn.datasets import load_iris

In [26]:
iris = load_iris(as_frame=True)
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)
rnd_clf.fit(iris.data, iris.target)
for score, name in zip(rnd_clf.feature_importances_, iris.data.columns):
    print(round(score, 3), name)

0.112 sepal length (cm)
0.023 sepal width (cm)
0.441 petal length (cm)
0.423 petal width (cm)


# Boosting

## AdaBoost

In [27]:
from sklearn.ensemble import AdaBoostClassifier

In [28]:
ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=30,
    learning_rate=0.5, random_state=42
)
ada_clf.fit(X_train, y_train)

## Gradient Boost

In [32]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [30]:
np.random.seed(42)
X = np.random.rand(100, 1) - .5
y = 3 * X[:, 0] ** 2 + 0.05 * np.random.randn(100)

In [33]:
tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X, y)

In [34]:
y2 = y - tree_reg1.predict(X)

In [35]:
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg2.fit(X, y2)

In [36]:
y3 = y2 - tree_reg2.predict(X)

In [37]:
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg3.fit(X, y3)

In [38]:
X_new = np.array([[-0.4], [0.0], [0.5]])
sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))

array([0.49484029, 0.04021166, 0.75026781])

In [39]:
from sklearn.ensemble import GradientBoostingRegressor

In [40]:
gbrt = GradientBoostingRegressor(
    max_depth=2, n_estimators=3,
    learning_rate=1, random_state=42
)

In [41]:
gbrt.fit(X, y)

In [44]:
gbrt = GradientBoostingRegressor(
    max_depth=2, n_estimators=500,
    learning_rate=0.05, n_iter_no_change=10,
    random_state=42
)
gbrt.fit(X, y)

In [45]:
gbrt.n_estimators_

92

## Histigram-Based Gradient Boost

In [46]:
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder

In [None]:
hgb_reg = make_pipeline(
    make_column_transformer(
        (OrdinalEncoder(), ['ocean_proximity']),
        remainder='passthrough'
    ),
    HistGradientBoostingRegressor(categorical_features=[0], random_state=42)
)
hgb_reg.fit(housing, housing_labels)

# Stacking

In [4]:
from sklearn.ensemble import StackingClassifier, RandomForestClassifier

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [7]:
stacking_clf = StackingClassifier(
    estimators=[
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(probability=True, random_state=42))
    ],
    final_estimator=RandomForestClassifier(43),
    cv=5
)
stacking_clf.fit(X_train, y_train)

In [8]:
stacking_clf.score(X_test, y_test)

0.912