In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = mnist['data']
y = mnist['target']
y = y.astype(np.uint8)
print(X.shape)

(70000, 784)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=10000, shuffle=True)

X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()
X_val = X_val.to_numpy()
y_val = y_val.to_numpy()

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC

forest = RandomForestClassifier(n_estimators=100, n_jobs=-1)
xtree = ExtraTreesClassifier(n_estimators=100, n_jobs=-1)
svc = SVC()

forest.fit(X_train, y_train)
xtree.fit(X_train, y_train)
svc.fit(X_train, y_train)

SVC()

In [5]:
for model in (forest, xtree, svc):
    y_pred = model.predict(X_val)
    print(accuracy_score(y_val, y_pred))

0.9665
0.9687
0.9747


In [13]:
from sklearn.ensemble import VotingClassifier

forest2 = RandomForestClassifier(n_estimators=70, n_jobs=-1)
xtree2 = ExtraTreesClassifier(n_estimators=70, n_jobs=-1)
svc2 = SVC()

ensemble = VotingClassifier(
    estimators=[('forest', forest2), ('Extra tree', xtree2), ('support vector', svc2)],
    voting='hard',
    n_jobs=-1
)

ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_val)
accuracy_score(y_val, y_pred)

0.9716

In [16]:
from sklearn.ensemble import VotingClassifier

forest2 = RandomForestClassifier(n_estimators=100, n_jobs=-1)
xtree2 = ExtraTreesClassifier(n_estimators=100, n_jobs=-1)
svc2 = SVC(probability=True)

ensemble = VotingClassifier(
    estimators=[('forest', forest2), ('Extra tree', xtree2), ('support vector', svc2)],
    voting='soft',
    n_jobs=-1
)

ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_val)
accuracy_score(y_val, y_pred)

0.9747