# Voting Classifier

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.datasets import make_moons

In [13]:
X, y = make_moons(n_samples= 500, noise = 0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=42)

In [14]:
voting_clf = VotingClassifier(estimators=[
    ('lr', LogisticRegression(random_state=42)),
    ('rf', RandomForestClassifier(random_state=42)),
    ('svc', SVC(random_state=42))
])

In [15]:
voting_clf.fit(X_train,y_train)

In [16]:
for name, clf in voting_clf.named_estimators_.items():
    print(name, "=", clf.score(X_test,y_test))

lr = 0.864
rf = 0.896
svc = 0.896


In [17]:
voting_clf.predict(X_test[:1])

array([1], dtype=int64)

In [18]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

[array([1], dtype=int64), array([1], dtype=int64), array([0], dtype=int64)]

In [19]:
voting_clf.score(X_test,y_test)

0.912

In [20]:
voting_clf.voting='soft'
voting_clf.named_estimators['svc'].probability = True
voting_clf.fit(X_train,y_train)
voting_clf.score(X_test,y_test)

0.92

# Bagging and Pasting

In [21]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [27]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500,
                           max_samples = 100, n_jobs=-1, random_state=42,oob_score=True)

In [28]:
bag_clf.fit(X_train,y_train)

In [29]:
bag_clf.score(X_test,y_test)

0.904

In [30]:
from sklearn.ensemble import RandomForestClassifier

In [31]:
rf_clf = RandomForestClassifier(max_samples=100, n_estimators=500, max_leaf_nodes=16, random_state=42)

In [32]:
rf_clf.fit(X_train, y_train)

In [33]:
rf_clf.score(X_test,y_test)

0.92

In [34]:
from sklearn.datasets import load_iris

In [35]:
iris = load_iris(as_frame=True)

In [38]:
rnd_clf = RandomForestClassifier(random_state=42,n_estimators=500)
rnd_clf.fit(iris.data,iris.target)

In [39]:
for score,name in zip(rnd_clf.feature_importances_,iris.data.columns):
    print(round(score,2), name)

0.11 sepal length (cm)
0.02 sepal width (cm)
0.44 petal length (cm)
0.42 petal width (cm)


# Boosting

In [86]:
from sklearn.tree import DecisionTreeRegressor
import numpy as np
import matplotlib.pyplot as plt

In [87]:
np.random.seed(42)
X = np.random.rand(100,1) - 0.5
y = 3 * X[:, 0]**2 + 0.05 * np.random.randn(100)

In [88]:
tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X,y)

In [89]:
y2 = y - tree_reg1.predict(X)

In [90]:
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=43)
tree_reg2.fit(X,y2)

In [91]:
y3 = y2 - tree_reg2.predict(X)

In [92]:
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=44)
tree_reg3.fit(X,y3)

In [93]:
X_new = np.array([[-0.4],[0.],[0.5]])
sum(tree.predict(X_new) for tree in (tree_reg1,tree_reg2,tree_reg3))

array([0.49484029, 0.04021166, 0.75026781])

In [94]:
from sklearn.ensemble import GradientBoostingRegressor

In [95]:
grbt = GradientBoostingRegressor(max_depth=2, n_estimators=3,learning_rate=1.0, random_state=42)

In [96]:
grbt.fit(X,y)

In [97]:
grbt.predict(X_new)

array([0.49484029, 0.04021166, 0.75026781])

In [101]:
# Finding best tree count

grbt = GradientBoostingRegressor(max_depth=2, n_estimators=500,learning_rate=1.0, random_state=42, n_iter_no_change=10)

In [102]:
grbt.fit(X,y)

In [103]:
grbt.n_estimators_

14

# Stacking

In [105]:
from sklearn.ensemble import StackingClassifier

stacking_clf = StackingClassifier(estimators= [
    ('lr', LogisticRegression(random_state=42)),
    ('rf', RandomForestClassifier(random_state=42)),
    ('svc', SVC(probability=True, random_state=42))
],final_estimator = RandomForestClassifier(random_state=43),cv=5)

In [106]:
stacking_clf.fit(X_train,y_train)