In [78]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.target = mnist.target.astype(np.int8) 
sort_by_target(mnist) 

X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=10000, random_state=42)

In [79]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import LinearSVC

forest_clf = RandomForestClassifier(random_state=42)
extra_clf = ExtraTreesClassifier(random_state=42)
svc_clf = LinearSVC(random_state=42)

for estimator in (forest_clf, extra_clf, svc_clf):
    estimator.fit(X_train, y_train)



In [80]:
[estimator.score(X_val, y_val) for estimator in (forest_clf, extra_clf, svc_clf)]

[0.97, 0.9719, 0.8327]

In [81]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier([('forest', forest_clf), ('extra', extra_clf), ('svc', svc_clf)], voting='hard')
voting_clf.fit(X_train, y_train)

voting_clf.score(X_val, y_val)



0.9703

In [82]:
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]

[0.969, 0.9698, 0.8396]

In [83]:
voting_clf.score(X_test, y_test)

0.969

In [84]:
voting_clf.score(X_val, y_val)

0.9703

In [95]:
X_val_predictions = np.empty((len(X_val), len(voting_clf.estimators_)), dtype=np.float32)

for index, estimator in enumerate(voting_clf.estimators_):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [96]:
forest_blendy = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)

forest_blendy.fit(X_val_predictions, y_val)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=True, random_state=42, verbose=0,
                       warm_start=False)

In [97]:
forest_blendy.oob_score_

0.9717

In [101]:
X_test_predictions = np.empty((len(X_val), len(voting_clf.estimators_)), dtype=np.float32)

for index, estimator in enumerate(voting_clf.estimators_):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [102]:
forest_blendy.score(X_test_predictions, y_test)

0.969