In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier

In [2]:
mnist = fetch_openml('mnist_784', version = 1)

In [3]:
X, y = mnist["data"], mnist["target"]

In [4]:
X_train, X_rest, y_train, y_rest = train_test_split(X, y, test_size = 0.2857, random_state = 42)

In [5]:
X_train.shape

(50001, 784)

In [6]:
X_val, X_test, y_val, y_test = train_test_split(X_rest, y_rest, test_size = 0.5, random_state = 42)

In [7]:
X_val.shape

(9999, 784)

In [8]:
X_test.shape

(10000, 784)

In [9]:
rForest = RandomForestClassifier(n_jobs = -1, n_estimators = 200)
extraTrees = ExtraTreesClassifier(n_estimators=200, n_jobs = -1)
svc = SVC(kernel = 'rbf', C = 1.0, probability= True)

In [10]:
classifiers = {rForest: "RandomForestClassifier", extraTrees: "ExtraTreesClassifier", svc: "SVC"}

In [11]:
estimators = [('rf', rForest), ('extree', extraTrees), ('svc', svc)]

In [12]:
trainedClassifiers = []
for estimator in estimators:
    estimator[1].fit(X_train, y_train)
    print("Validation Score of ", estimator[0], " is: ", estimator[1].score(X_val, y_val))
    print("Test Score of ", estimator[0], " is: ", estimator[1].score(X_test, y_test))
    trainedClassifiers.append(estimator[1])

Validation Score of  rf  is:  0.9682968296829683
Test Score of  rf  is:  0.9674
Validation Score of  extree  is:  0.9705970597059705
Test Score of  extree  is:  0.9711
Validation Score of  svc  is:  0.9767976797679768
Test Score of  svc  is:  0.9761


In [13]:
voteEnsemble = VotingClassifier(estimators=estimators, voting='soft')

In [14]:
voteEnsemble.fit(X_train, y_train)

VotingClassifier(estimators=[('rf',
                              RandomForestClassifier(n_estimators=200,
                                                     n_jobs=-1)),
                             ('extree',
                              ExtraTreesClassifier(n_estimators=200,
                                                   n_jobs=-1)),
                             ('svc', SVC(probability=True))],
                 voting='soft')

In [15]:
voteEnsemble.score(X_val, y_val)

0.9777977797779778

In [16]:
voteEnsemble.score(X_test, y_test)

0.9774