# Mnist Prediction - Ensemble Models

In [24]:
import sklearn.datasets as dataset
import sklearn.svm as svm
import sklearn.ensemble as ensemble
import sklearn.model_selection as ms
import sklearn.metrics as m
import sklearn.neural_network as nn
import numpy as np

In [2]:
mnist = dataset.fetch_openml('mnist_784', version=1)

In [3]:
X = mnist.data
y = mnist.target

X_train_val, X_test, y_train_val, y_test = ms.train_test_split(X, y, test_size=10000, random_state=42)
X_train, X_validation, y_train, y_validation = ms.train_test_split(X_train_val, y_train_val, test_size=10000, random_state=42)

In [4]:
rf_clf = ensemble.RandomForestClassifier(random_state=42)
ext_clf = ensemble.ExtraTreesClassifier(random_state=42)
svm_clf = svm.LinearSVC(random_state=42)
mlp_clf = nn.MLPClassifier(random_state=42)

In [5]:
# FITTING THE MODEL
rf_clf.fit(X_train, y_train)
ext_clf.fit(X_train, y_train)
svm_clf.fit(X_train, y_train)
mlp_clf.fit(X_train, y_train)

MLPClassifier(random_state=42)

In [16]:
estimators = [rf_clf, ext_clf, svm_clf, mlp_clf]

print('Individual Model Accuracy on Validation DataSet')
for model in estimators:
    print(f'{model.__class__.__name__} : {model.score(X_validation, y_validation) * 100}')

Individual Model Accuracy on Validation DataSet
RandomForestClassifier : 96.92
ExtraTreesClassifier : 97.15
LinearSVC : 86.95
MLPClassifier : 96.0


In [7]:
# ENSEMBLE 

voting_classifier = ensemble.VotingClassifier(estimators=[
                                                ('ran_clf', ensemble.RandomForestClassifier(random_state=42)),
                                                ('ext_clf', ensemble.ExtraTreesClassifier(random_state=42)),
                                                ('svm', svm.LinearSVC(random_state=42)), 
                                                ('mlp_clf', nn.MLPClassifier(random_state=42))
                                                ], voting='hard')
voting_classifier.fit(X_train, y_train)

VotingClassifier(estimators=[('ran_clf',
                              RandomForestClassifier(random_state=42)),
                             ('ext_clf', ExtraTreesClassifier(random_state=42)),
                             ('svm', LinearSVC(random_state=42)),
                             ('mlp_clf', MLPClassifier(random_state=42))])

In [14]:
print('Voting Classifier Model Accuracy on Validation DataSet')
voting_classifier.score(X_validation, y_validation) * 100

Voting Classifier Model Accuracy on Validation DataSet


97.09

In [15]:
print('Voting Classifier Model Accuracy on Testing DataSet')
voting_classifier.score(X_test, y_test) * 100

Voting Classifier Model Accuracy on Testing DataSet


96.82

In [18]:
print('Individual Model Accuracy on Testing DataSet')
for model in estimators:
    print(f'{model.__class__.__name__} : {model.score(X_test, y_test) * 100}')

Individual Model Accuracy on Testing DataSet
RandomForestClassifier : 96.45
ExtraTreesClassifier : 96.91
LinearSVC : 87.07000000000001
MLPClassifier : 96.14


In [19]:
# Lets remove SVM from the voting classifier model and re-train the whole model 
# ENSEMBLE 

voting_classifier_new = ensemble.VotingClassifier(estimators=[
                                                ('ran_clf', ensemble.RandomForestClassifier(random_state=42)),
                                                ('ext_clf', ensemble.ExtraTreesClassifier(random_state=42)),
                                                ('mlp_clf', nn.MLPClassifier(random_state=42))
                                                ], voting='hard')
voting_classifier_new.fit(X_train, y_train)

VotingClassifier(estimators=[('ran_clf',
                              RandomForestClassifier(random_state=42)),
                             ('ext_clf', ExtraTreesClassifier(random_state=42)),
                             ('mlp_clf', MLPClassifier(random_state=42))])

In [22]:
print('New Voting Classiifer Model Accuracy on Validation Dataset')
voting_classifier_new.score(X_validation, y_validation)

New Voting Classiifer Model Accuracy on Validation Dataset


0.9731

In [23]:
print('New Voting Classiifer Model Accuracy on Testing Dataset')
voting_classifier_new.score(X_test, y_test)

New Voting Classiifer Model Accuracy on Testing Dataset


0.9713

# Stacking Ensemble

In [44]:
row_lenght_X_validation = len(X_validation)
col_lenght = len(estimators)

X_validation_predictions = np.empty((row_lenght_X_validation, col_lenght), dtype=np.float32)
print(X_validation_predictions.shape)
print(X_validation_predictions)

(10000, 4)
[[-6.4579905e+34  7.0905702e-43  6.0616614e-31  7.1045832e-43]
 [ 9.8989125e-41  0.0000000e+00            nan            nan]
 [ 3.1949605e-43  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 ...
 [ 1.4012985e-45  0.0000000e+00  1.4012985e-45  0.0000000e+00]
 [ 1.4012985e-45  0.0000000e+00  1.4012985e-45  0.0000000e+00]
 [ 1.4012985e-45  0.0000000e+00  1.4012985e-45  0.0000000e+00]]


In [46]:
estimators

[RandomForestClassifier(random_state=42),
 ExtraTreesClassifier(random_state=42),
 LinearSVC(random_state=42),
 MLPClassifier(random_state=42)]

In [64]:
for index, estimator in enumerate(estimators):
    X_validation_predictions[:, index] = estimator.predict(X_validation) # storing the prediction of the validation dataset

In [65]:
X_validation_predictions

array([[5., 5., 5., 5.],
       [8., 8., 8., 8.],
       [2., 2., 2., 2.],
       ...,
       [7., 7., 7., 7.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]], dtype=float32)

In [66]:
rnd_forest_blender = ensemble.RandomForestClassifier(random_state=42, n_estimators=200, oob_score=True)
rnd_forest_blender.fit(X_validation_predictions, y_validation)

RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)

In [67]:
rnd_forest_blender.oob_score_

0.9689

In [68]:
lenght_X_test = len(X_test)

X_test_prediction = np.empty((lenght_X_test, col_lenght), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_prediction[:, index] = estimator.predict(X_test)

In [69]:
y_prediction = rnd_forest_blender.predict(X_test_prediction)

In [71]:
m.accuracy_score(y_test, y_prediction) * 100

96.76