In [3]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
X, y = make_moons(n_samples=10000, noise=0.4) 
x_train , x_test , y_train , y_test = train_test_split(X,y,test_size=0.25,random_state=42)

### VotingClassifier

In [25]:
from sklearn.ensemble import RandomForestClassifier , VotingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

rnd_clf = RandomForestClassifier()
# svc_clf = SVC()
svc_clf = SVC(probability=True)
log_clf = LogisticRegression()

# voting_clf = VotingClassifier(estimators=[('rnd',rnd_clf),("svc",svc_clf),("log",log_clf)],voting="hard")
voting_clf = VotingClassifier(estimators=[('rnd',rnd_clf),("svc",svc_clf),("log",log_clf)],voting="soft")
voting_clf.fit(x_train,y_train)


In [26]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svc_clf, voting_clf):
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))



LogisticRegression 0.8276
RandomForestClassifier 0.8452
SVC 0.8584
VotingClassifier 0.856


### BaggingClassifier

In [50]:
# bagging 
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
bag_clf = BaggingClassifier(SVC(),n_estimators=500,max_samples=100,bootstrap=True,n_jobs=-1,oob_score=True)
bag_clf.fit(x_train,y_train)
bag_clf.oob_score_

0.8550666666666666

In [51]:
y_preds = bag_clf.predict(x_test)
accuracy_score(y_test,y_preds)

0.85

### RandomForestClassifier

In [2]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500,n_jobs=-1)
rnd_clf.fit(iris["data"], iris["target"])
for name , score in zip(iris["feature_names"],rnd_clf.feature_importances_):
    print(name,score)
    

sepal length (cm) 0.09023069023172905
sepal width (cm) 0.02301541655738096
petal length (cm) 0.4247040853078087
petal width (cm) 0.4620498079030814


### Boosting
- AdaBoost
- Gradient Boosting

### AdaBoost

In [4]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),n_estimators=200,learning_rate=0.5,algorithm="SAMME.R")
ada_clf.fit(x_train,y_train)



In [6]:
from sklearn.metrics import accuracy_score
ada_ypreds = ada_clf.predict(x_test)
accuracy_score(y_test,ada_ypreds)

0.8556

### Gradient Boosting

In [None]:
# from sklearn.ensemble import GradientBoostingRegressor
# gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
# gbrt.fit(X, y)

### XGBoost

In [None]:
from xgboost import XGBRegressor
xgb_reg = XGBRegressor()
xgb_reg.fit(x_train,y_train)
xgb_ypred = xgb_reg.predict(x_test)
accuracy_score(y_test,xgb_ypred)

# XGBoost also offers several nice features, such as automatically taking care of early
# stopping:
xgb_reg.fit(x_train, y_train,
eval_set=[(x_test, y_test)], early_stopping_rounds=2)
y_pred = xgb_reg.predict(x_test)