In [48]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', as_frame=False, parser='auto')
X, y = mnist.data, mnist.target
X_train, X_val, X_test, y_train, y_val, y_test =  X[:50000], X[50000:60000], X[60000:], y[:50000], y[50000:60000], y[60000:]

In [49]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier

rnd_fst_clf = RandomForestClassifier(n_estimators=100)
extra_tree_clf = ExtraTreesClassifier(n_estimators=100)
mlp_clf = MLPClassifier()

estimators = [rnd_fst_clf, extra_tree_clf, mlp_clf]
blender_train_set = np.empty((len(X_val), len(estimators)), dtype=object)
for estimator in estimators:
    print('Training estimator: ', estimator)
    estimator.fit(X_train, y_train)
    
[estimator.score(X_val, y_val) for estimator in estimators]

Training estimator:  RandomForestClassifier()
Training estimator:  ExtraTreesClassifier()
Training estimator:  MLPClassifier()


[0.9719, 0.9745, 0.9658]

In [50]:
from sklearn.ensemble import StackingClassifier, VotingClassifier

est = [
    ('rnd_fst', rnd_fst_clf),
    ('extra', extra_tree_clf), 
    ('mlp', mlp_clf)]

voting_clf = VotingClassifier(est)
voting_clf.fit(X_train, y_train)
y_val_encoded = y_val.astype(np.int64)
voting_clf.score(X_val, y_val), [estimator.score(X_val, y_val_encoded) for estimator in voting_clf.estimators_] 

0.9765

In [52]:
voting_clf.score(X_test, y_test), [estimator.score(X_test, y_test.astype(np.int64)) for estimator in voting_clf.estimators_]

(0.9735, [0.9684, 0.9715, 0.964])

In [56]:
blender_pred_set = np.empty((len(X_val), len(estimators)), dtype=object)

for index, estimator in enumerate(estimators):
    blender_pred_set[:, index] = estimator.predict(X_val)

blender_pred_set

array([['3', '3', '3'],
       ['8', '8', '8'],
       ['6', '6', '6'],
       ...,
       ['5', '5', '5'],
       ['6', '6', '6'],
       ['8', '8', '8']], dtype=object)

In [57]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True)
rnd_forest_blender.fit(blender_pred_set, y_val)
rnd_forest_blender.oob_score_

0.9707

In [58]:
from sklearn.metrics import accuracy_score

blender_test_pred = np.empty((len(X_test), len(estimators)), dtype=object)

for index, estimator in enumerate(estimators):
    blender_test_pred[:, index] = estimator.predict(X_test)
    
y_pred = rnd_forest_blender.predict(blender_test_pred)
accuracy_score(y_test, y_pred)

0.9682

In [59]:
X_train_full, y_train_full = X[:60_000], y[:60_000]
stack_clf = StackingClassifier(est, final_estimator=rnd_forest_blender)
stack_clf.fit(X_train_full, y_train_full)
stack_clf.score(X_test, y_test)

0.9773