In [84]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# sklearn
from sklearn import set_config
set_config(display="diagram")

from sklearn.datasets import make_moons

from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import StackingClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.linear_model import LogisticRegression

from sklearn.svm import SVC


In [6]:
X, y  = make_moons(n_samples=500, noise=0.30, random_state=29)
xtrain,xtest,ytrain,ytest = train_test_split(X, y, test_size=0.2, random_state=29)

# Voting

In [9]:
voting_clf = VotingClassifier(
    estimators=[("lr", LogisticRegression(random_state=29)),
                ('rf', RandomForestClassifier(random_state=29)),
                ("sv", SVC(random_state=29))
    
])

voting_clf.fit(xtrain, ytrain)

In [19]:
voting_clf.named_estimators_

{'lr': LogisticRegression(random_state=29),
 'rf': RandomForestClassifier(random_state=29),
 'sv': SVC(random_state=29)}

In [22]:
for name, est in voting_clf.named_estimators_.items():
    print(f"{name} = {est.score(xtest,ytest)}")

lr = 0.82
rf = 0.88
sv = 0.86


In [23]:
voting_clf.score(xtest, ytest)

0.86

# Bagging

In [57]:
bag_clf = BaggingClassifier(estimator=DecisionTreeClassifier(max_depth=5, random_state=29), 
                       n_estimators=500, 
                       max_samples=0.6,
                       bootstrap_features=True,
                       n_jobs=-1,
                       oob_score=True,
                       random_state=29)

bag_clf.fit(xtrain, ytrain)

In [58]:
bag_clf.score(xtest, ytest)

0.89

In [59]:
score = cross_val_score(bag_clf, xtrain, ytrain, scoring="accuracy")
score

array([0.9375, 0.925 , 0.925 , 0.925 , 0.875 ])

In [60]:
bag_clf.oob_score_

0.925

# Rnadom Forests

In [64]:
rf_clf = RandomForestClassifier(n_estimators=500, 
                                n_jobs=-1, 
                                max_leaf_nodes=15,
                                max_features="sqrt",
                                random_state=29)
rf_clf.fit(xtrain, ytrain)

In [65]:
rf_clf.score(xtest, ytest)

0.88

In [67]:
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)

In [68]:
rnd_clf = RandomForestClassifier(n_estimators=500, 
                                n_jobs=-1,
                                max_leaf_nodes=15,
                                max_features="sqrt",
                                random_state=29)
rnd_clf.fit(iris.data, iris.target)

In [74]:
pd.DataFrame(zip(rnd_clf.feature_importances_, rnd_clf.feature_names_in_), 
             columns=['importance', 'feature']).sort_values(by="importance", ascending=False)

Unnamed: 0,importance,feature
3,0.441105,petal width (cm)
2,0.433965,petal length (cm)
0,0.101977,sepal length (cm)
1,0.022953,sepal width (cm)


# Boosting

In [77]:
ada_clf = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=5),
                             n_estimators=30,
                             learning_rate=0.5,
                             random_state=29
                            )
ada_clf.fit(xtrain, ytrain)

In [78]:
ada_clf.score(xtest, ytest)

0.86

In [82]:
gbct = GradientBoostingClassifier(random_state=29, 
                                  n_estimators=300,
                                  learning_rate=0.05,
                                  n_iter_no_change=10,
                                  max_depth=5,
                                 max_features="sqrt")
gbct.fit(xtrain, ytrain)

In [83]:
gbct.score(xtest, ytest)

0.87

# Stacking

In [85]:
stclf = StackingClassifier(estimators=[
                           ("lr", LogisticRegression(random_state=29)),
                           ("rf", RandomForestClassifier(random_state=29)),
                           ("svc", SVC(probability=True, random_state=29))],
                           final_estimator=RandomForestClassifier(random_state=29),
                           n_jobs=-1
                          )
stclf.fit(xtrain, ytrain)

In [86]:
stclf.score(xtest, ytest)

0.88