In [83]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import numpy as np

In [63]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

In [64]:
X, y = make_moons(n_samples=10000, noise=0.3)

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [66]:
voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svm', svm_clf)], voting='soft'
)

voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)), ('rf', RandomF...',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False))],
         flatten_transform=None, n_jobs=1, voting='soft', weights=None)

In [67]:
from sklearn.metrics import accuracy_score

In [68]:
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.8575
RandomForestClassifier 0.8985
SVC 0.9135
VotingClassifier 0.907


In [69]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [70]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500, max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True
)

In [71]:
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=100, n_estimators=500, n_jobs=-1, oob_score=True,
         random_state=None, verbose=0, warm_start=False)

In [72]:
bag_clf.oob_score_

0.91187499999999999

In [73]:
Y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, Y_pred)

0.91200000000000003

In [74]:
from sklearn.datasets import fetch_mldata

In [111]:
mnist = fetch_mldata('MNIST original')

X = mnist['data']
y = mnist['target']

In [126]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

In [127]:
shuffle_index = np.random.permutation(40000)

In [128]:
X_train, y_train = X[shuffle_index], y_train[shuffle_index]

In [133]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDClassifier
scaler = StandardScaler()


forest_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)


forest_clf.fit(X_train, y_train)



y_pred = forest_clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.62419999999999998

In [131]:
from sklearn.ensemble import ExtraTreesClassifier


extra_pipeline = Pipeline((
    ('scaler', StandardScaler()),
    ('extra_clf', ExtraTreesClassifier())
))

extra_pipeline.fit(X_train.astype(np.float64), y_train)

y_pred = extra_pipeline.predict(X_test)

accuracy_score(y_test, y_pred)



0.67920000000000003

In [132]:
from sklearn.svm import SVC


svc_pipeline = Pipeline((
    ('scaler', StandardScaler()),
    ('svc_clf', SVC())
))



svc_pipeline.fit(X_train.astype(np.float64), y_train)

y_pred = extra_pipeline.predict(X_test)

accuracy_score(y_test, y_pred)



0.67920000000000003