In [1]:
from sklearn.datasets import fetch_openml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
X,y = fetch_openml('mnist_784',version=1,return_X_y=True, )

In [3]:
X.shape

(70000, 784)

In [4]:
TRAIN_SET = 50000
VAL_SET = 10000
TEST_SET = 10000

np.random.seed = 42
random_indices = np.random.permutation(len(X))

X = X.iloc[random_indices]
y = y.iloc[random_indices]

X_train = X.iloc[:TRAIN_SET]
y_train = y.iloc[:TRAIN_SET]
X_val = X.iloc[TRAIN_SET:TRAIN_SET+VAL_SET]
y_val = y.iloc[TRAIN_SET:TRAIN_SET+VAL_SET]
X_test = X.iloc[-TEST_SET:]
y_test = y.iloc[-TEST_SET:]

print(len(X_train), len(X_val), len(X_test))

50000 10000 10000


In [5]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC

forest = RandomForestClassifier(random_state=42)
extra = ExtraTreesClassifier(random_state=42)
svm = SVC(random_state=42, probability=True)

forest.fit(X_train, y_train)
extra.fit(X_train, y_train)
svm.fit(X_train, y_train)

print("Forest: ", forest.score(X_val, y_val))
print("Extra Trees: ", extra.score(X_val, y_val))
print("SVM: ", svm.score(X_val, y_val))

Forest:  0.9698
Extra Trees:  0.9716
SVM:  0.9788


In [6]:
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score

voting = VotingClassifier(
    estimators=[('forest', forest), ('extra', extra), ('svm', svm)],
    voting='hard'
)

voting.fit(X_train, y_train)
print("Hard Voting: ", voting.score(X_val, y_val))

y_pred = voting.predict(X_test)
print("Precision: ", precision_score(y_test, y_pred, average="macro"))
print("Recall: ", recall_score(y_test, y_pred, average="macro"))
print("Accuracy: ", accuracy_score(y_test, y_pred))

Hard Voting:  0.9744
Precision:  0.9716650493958658
Recall:  0.9717022272239453
Accuracy:  0.9719


In [7]:
soft_voting = VotingClassifier(
    estimators=[('forest', forest), ('extra', extra), ('svm', svm)],
    voting='soft'
)

soft_voting.fit(X_train, y_train)
print("Soft Voting: ", voting.score(X_val, y_val))

y_pred = soft_voting.predict(X_test)
print("Precision: ", precision_score(y_test, y_pred, average="macro"))
print("Recall: ", recall_score(y_test, y_pred, average="macro"))
print("Accuracy: ", accuracy_score(y_test, y_pred))

Soft Voting:  0.9744
Precision:  0.977991765893132
Recall:  0.9779256318151104
Accuracy:  0.9781
