In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, as_frame=False)

In [4]:
X, y = mnist["data"], mnist["target"]

In [7]:
from sklearn.model_selection import train_test_split
X_dev,  X_test, y_dev, y_test = train_test_split(X, y, test_size=0.15)
X_train, X_val, y_train, y_val = train_test_split(X_dev, y_dev, test_size = 0.15)

In [8]:
print(X_dev.shape)
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(59500, 784)
(50575, 784)
(8925, 784)
(10500, 784)


In [9]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [10]:
random_forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = LinearSVC(max_iter=100, tol=20, random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [12]:
estimators = [random_forest_clf, extra_trees_clf, svm_clf, mlp_clf]
for estimator in estimators:
    print("training the ", estimator)
    estimator.fit(X_train, y_train)
    

training the  RandomForestClassifier(random_state=42)
training the  ExtraTreesClassifier(random_state=42)
training the  LinearSVC(max_iter=100, random_state=42, tol=20)
training the  MLPClassifier(random_state=42)


In [13]:
scores = [estimator.score(X_val, y_val) for estimator in estimators] # model.score prints the score!

In [14]:
print(scores)

[0.9710924369747899, 0.9738935574229692, 0.8787675070028012, 0.9631372549019608]


In [15]:
from sklearn.ensemble import VotingClassifier
named_estimators = [
    ("random_forest_clf", random_forest_clf),
    ("extra_trees_clf", extra_trees_clf),
    ("svm_clf", svm_clf),
    ("mlp_clf", mlp_clf),
]  # create a tuple list filled with classifiers

In [16]:
voting_clf = VotingClassifier(named_estimators)
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('random_forest_clf',
                              RandomForestClassifier(random_state=42)),
                             ('extra_trees_clf',
                              ExtraTreesClassifier(random_state=42)),
                             ('svm_clf',
                              LinearSVC(max_iter=100, random_state=42, tol=20)),
                             ('mlp_clf', MLPClassifier(random_state=42))])

In [19]:
voting_clf.score(X_val, y_val)
import numpy as np

In [20]:
X_val_predictions = np.empty((len(X_val), len(estimators)), dtype=np.float32)
for index, estimator in enumerate(estimators):
    X_val_predictions[:, index] = estimator.predict(X_val)

In [21]:
X_val_predictions

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [5., 5., 5., 5.],
       ...,
       [6., 6., 6., 6.],
       [0., 0., 0., 0.],
       [3., 3., 5., 3.]], dtype=float32)

In [22]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_val_predictions, y_val)

RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)

In [23]:
rnd_forest_blender.oob_score_

0.9733333333333334

In [29]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

In [30]:
for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [31]:
X_test_predictions

array([[9., 9., 9., 9.],
       [7., 7., 7., 7.],
       [2., 2., 4., 2.],
       ...,
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [8., 8., 8., 8.]], dtype=float32)

In [32]:
y_pred = rnd_forest_blender.predict(X_test_predictions)

In [33]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.9689523809523809
