In [88]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

dataset = make_moons(n_samples=5000, noise=0.3)

X_train, X_test, y_train, y_test = train_test_split(dataset[0], dataset[1],test_size=0.2)

In [42]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# log_clf = LogisticRegression()
# rnd_clf = RandomForestClassifier()
# svm_clf = SVC()
log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(gamma="scale", probability=True,random_state=42)

voting_clf = VotingClassifier(estimators=
                            #   [('lr', log_clf), 
                               [('rf', rnd_clf), 
                               ('svc', svm_clf)],
                              voting='soft')

In [43]:
from sklearn.metrics import accuracy_score

for clf in [log_clf,rnd_clf, svm_clf, voting_clf]:
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.8573
RandomForestClassifier 0.9055
SVC 0.9178
VotingClassifier 0.9146


In [91]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.918

In [71]:
bag_clf.oob_score_

0.910325

In [90]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)

y_pred = rnd_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.914

In [80]:
# Equivalent Bagging Classifier
bag_clf = BaggingClassifier(DecisionTreeClassifier(splitter="random", max_leaf_nodes=16),
                            n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1)


In [94]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1),
    n_estimators=500, algorithm="SAMME.R", 
    learning_rate=0.3)
ada_clf.fit(X_train, y_train)

y_pred = ada_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.912

In [92]:
from sklearn.model_selection import GridSearchCV
param_grid = [
    {'n_estimators':[100,200,300,400,500,600], 'learning_rate': [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8]}
]
gs = GridSearchCV(ada_clf, param_grid, cv=5, n_jobs=-1)
gs.fit(X_train, y_train)

In [93]:
gs.best_params_

{'learning_rate': 0.3, 'n_estimators': 500}

In [None]:
# Gradient Boosting
import numpy as np

np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)


from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X, y)