In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml

df = fetch_openml('mnist_784', as_frame=False)

X, y = df.data, df.target

# Split into training, validation and test set
# 50.000 training, 10.000 validation, 10.000 test
X_train, X_valid, X_test, y_train, y_valid, y_test = X[:50000], X[50000:60000], X[60000:], y[:50000], y[50000:60000], y[60000:]

In [14]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC

# Creating the classifiers
# n_estimators => hyperparameter number of decision trees. 
# Artarsa kullandığımız classifier'ın performansı artar fakat computational cost da artar
randomForest_clf = RandomForestClassifier(n_estimators=100, random_state=42)
extraTrees_clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(kernel='rbf', gamma='scale')

# Training the training dataset with those random classifiers
randomForest_clf.fit(X_train, y_train)
extraTrees_clf.fit(X_train, y_train)
svm_clf.fit(X_train, y_train)

In [23]:
# Random classifier'ların doğruluğunu ölçme
from sklearn.metrics import accuracy_score

y_pred_rf = randomForest_clf.predict(X_test)
y_pred_et = extraTrees_clf.predict(X_test)
y_pred_svm = svm_clf.predict(X_test)

print('Random Forest accuracy:', accuracy_score(y_test, y_pred_rf))
print('Extra Trees accuracy:', accuracy_score(y_test, y_pred_et))
print('SVM accuracy:', accuracy_score(y_test, y_pred_svm))

# Buradaki sonuçlara bakıldığında SVM en hassasiyet gösteren ve en doğru sonucu veren
# Random Forest: 0.968
# Extra Trees: 0.9703
# SVM: 0.9785

Random Forest accuracy: 0.968
Extra Trees accuracy: 0.9703
SVM accuracy: 0.9785


In [24]:
from sklearn.ensemble import VotingClassifier

# Soft voting kullanılınca hata => predict_proba is not available when  probability=False

# Using hard voting to combine them into an ensemble
voting_clf = VotingClassifier(estimators=[('rf', randomForest_clf),
                                          ('et', extraTrees_clf),
                                          ('svm', svm_clf)], voting='hard')

# Training the voting classifier on the training set generated
voting_clf.fit(X_train, y_train)

In [25]:
# Hard voting ile ensemble yaptığımız modelin sonucu
y_pred_voting = voting_clf.predict(X_test)

print('Voting accuracy:', accuracy_score(y_test, y_pred_voting))

# Tuhaf bir şekilde çıkan sonuçları valid ile yaptığımda SVM en iyi çıktı, ensemble daha kötüydü

# Buradaki sonuca bakınca, SVM ensemble'dan daha iyi bir sonuç verdi ?
# Voting: 0.9737

Voting accuracy: 0.9737
