In [1]:
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [4]:
np.random.seed(42)

In [5]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.target = mnist.target.astype(np.uint8)

In [6]:
X = mnist['data']
y = mnist['target']

In [7]:
X.shape

(70000, 784)

In [8]:
y.shape

(70000,)

Applying Ensemble of models to MNIST data then comparing individual classifiers performance to the Ensemble

In [9]:
X_train_val, X_test, y_train_val, y_test = train_test_split(mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=10000, random_state=42)

In [10]:
# Initialize the Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
ext_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svc_clf = LinearSVC(max_iter = 100, tol=20, random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [11]:
from sklearn.calibration import CalibratedClassifierCV

calibrated_linear_svc = CalibratedClassifierCV(svc_clf, method='sigmoid')

In [12]:
estimators = [rf_clf, ext_clf, svc_clf, mlp_clf]
for estimator in estimators:
    print('Training the estimator: ', estimator)
    estimator.fit(X_train, y_train)

Training the estimator:  RandomForestClassifier(random_state=42)
Training the estimator:  ExtraTreesClassifier(random_state=42)
Training the estimator:  LinearSVC(max_iter=100, random_state=42, tol=20)
Training the estimator:  MLPClassifier(random_state=42)


In [13]:
[estimator.score(X_val, y_val) for estimator in estimators]

[0.9692, 0.9715, 0.859, 0.9577]

Clearly the linear SVC is the less efficient

Lets try hard voting

In [15]:
from sklearn.ensemble import VotingClassifier

In [16]:
voting_clf = VotingClassifier(estimators = [('rf', rf_clf), ('ext', ext_clf), ('svc', svc_clf), ('mlp', mlp_clf)], voting='hard')

In [17]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('rf', RandomForestClassifier(random_state=42)),
                             ('ext', ExtraTreesClassifier(random_state=42)),
                             ('svc',
                              LinearSVC(max_iter=100, random_state=42, tol=20)),
                             ('mlp', MLPClassifier(random_state=42))])

In [18]:
voting_clf.score(X_val, y_val)

0.9703

The hard voting has a better score than all single estimators

In [19]:
voting_clf_soft = VotingClassifier(estimators = [('rf', rf_clf), ('ext', ext_clf), ('svc_calib', calibrated_linear_svc), ('mlp', mlp_clf)], voting='soft')
voting_clf_soft.fit(X_train, y_train)
voting_clf_soft.score(X_val, y_val)

0.9685

The soft voting is performing less than hard voting and even from random forest or extra trees classifications

Let's remove the Linear SVC and see if the hard voting can perform better

In [20]:
voting_clf.set_params(svc=None)

VotingClassifier(estimators=[('rf', RandomForestClassifier(random_state=42)),
                             ('ext', ExtraTreesClassifier(random_state=42)),
                             ('svc', None),
                             ('mlp', MLPClassifier(random_state=42))])

In [21]:
voting_clf

VotingClassifier(estimators=[('rf', RandomForestClassifier(random_state=42)),
                             ('ext', ExtraTreesClassifier(random_state=42)),
                             ('svc', None),
                             ('mlp', MLPClassifier(random_state=42))])

In [22]:
# remove the svc in order to evaluate the score
del voting_clf.estimators_[2]

In [23]:
voting_clf.score(X_val, y_val)

0.9727

The performance is slightly better than before

Let's see again the soft voting without Linear SVC model

In [24]:
voting_clf.voting='soft'

In [25]:
voting_clf.score(X_val, y_val)

0.9658

with the Linear SVC taken in consideration in the previous soft voting we have a better result compared to voting without 

In [26]:
voting_clf.voting = "hard"
voting_clf.score(X_test, y_test)

0.971

In [27]:
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]

[0.9645, 0.9691, 0.9608]

the hard voting is slightly better than the best estimator in this case the extra trees classifier 

Training a blender, and together with the classifiers they form a stacking ensemble

Let's build a blender and stacking ensemble

In [28]:
prediction_estimators = [estimator.predict(X_val) for estimator in estimators]

In [29]:
prediction_estimators

[array([5, 8, 2, ..., 7, 6, 7], dtype=uint8),
 array([5, 8, 2, ..., 7, 6, 7], dtype=uint8),
 array([5, 8, 3, ..., 7, 6, 7], dtype=uint8),
 array([5, 8, 2, ..., 7, 6, 7], dtype=uint8)]

In [30]:
# Transpose of the prediction
prediction_estimators_trans = np.array(prediction_estimators).T
prediction_estimators_trans

array([[5, 5, 5, 5],
       [8, 8, 8, 8],
       [2, 2, 3, 2],
       ...,
       [7, 7, 7, 7],
       [6, 6, 6, 6],
       [7, 7, 7, 7]], dtype=uint8)

In [31]:
mse = [mean_squared_error(y_val, y_pred) for y_pred in prediction_estimators]
mse

[0.5287, 0.4827, 2.2303, 0.8029]

we can see that the minimum mean squared error is related to the extra-trees classifier model

In [32]:
rnd_forest_blender = ExtraTreesClassifier(n_estimators=190, bootstrap=True, oob_score=True, random_state=42)
rnd_forest_blender.fit(prediction_estimators_trans, y_val)

ExtraTreesClassifier(bootstrap=True, n_estimators=190, oob_score=True,
                     random_state=42)

In [33]:
rnd_forest_blender.oob_score_

0.9678

Let's validate the blender with the test set

In [34]:
prediction_test = [estimator.predict(X_test) for estimator in estimators]
prediction_test_trans = np.array(prediction_test).T
prediction_test_trans

array([[8, 8, 8, 8],
       [4, 4, 4, 4],
       [8, 8, 8, 8],
       ...,
       [3, 3, 3, 3],
       [8, 8, 3, 8],
       [3, 3, 3, 3]], dtype=uint8)

In [35]:
y_pred_test =  rnd_forest_blender.predict(prediction_test_trans)

y_pred_test

array([8, 4, 8, ..., 3, 8, 3], dtype=uint8)

In [36]:
from sklearn.metrics import accuracy_score

accuracy_score(y_pred_test, y_test)

0.9684

The stacking ensemble doesn't perfom as well as the voting or any individual model classifiers.