In [1]:
from sklearn.datasets import fetch_openml

# This library contains images of 28x28 pixels.
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(mnist['data'], mnist['target'], test_size=0.15, random_state=42)


In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

knn_clf = KNeighborsClassifier(n_neighbors=10)
knn_clf.fit(X_train, y_train)

rnd_clf = RandomForestClassifier()
rnd_clf.fit(X_train, y_train)

tree_clf = DecisionTreeClassifier(max_depth=10)
tree_clf.fit(X_train, y_train)

DecisionTreeClassifier(max_depth=10)

In [11]:
# Accuracy Score for above models
from sklearn.metrics import accuracy_score

for clf in (knn_clf, rnd_clf, tree_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

KNeighborsClassifier 0.9656190476190476
RandomForestClassifier 0.9659047619047619
DecisionTreeClassifier 0.8556190476190476


In [10]:
from sklearn.ensemble import VotingClassifier

voting_hard_clf = VotingClassifier(
    estimators=[('knn', knn_clf), ('rnd', rnd_clf), ('tree', tree_clf)],
    voting='hard'
)

voting_soft_clf = VotingClassifier(
    estimators=[('knn', knn_clf), ('rnd', rnd_clf), ('tree', tree_clf)],
    voting='soft'
)

In [None]:
# Accuracy score for voting models
from sklearn.metrics import accuracy_score

for clf in (voting_hard_clf, voting_soft_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=1
)
bag_clf.fit(X_train, y_train)

pas_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=False, n_jobs=1
)
pas_clf.fit(X_train, y_train)

rnd_clf = RandomForestClassifier()
rnd_clf.fit(X_train, y_train)

In [None]:
# Accuracy score for above classifier
from sklearn.metrics import accuracy_score

for clf in (bag_clf, pas_clf, rnd_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

In [None]:
# AdaBoost classifier
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm="SAMME.R", learning_rate=0.5, random_state=42)
ada_clf.fit(X_train, y_train)