## Chapter 7 - Ensemble Learning and Random Forests

### Question 8 - ensemble of different classifiers

loading mnist data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784' , version=1)
X , y = mnist["data"] , mnist["target"]

splitting the data to train,validation,test

In [3]:
from sklearn.model_selection import train_test_split

X_train , X_test , y_train , y_test = train_test_split(X , y , train_size=60000 , random_state=42)

X_train , X_val , y_train , y_val = train_test_split(X_train , y_train , train_size=50000 , random_state=42)

training classifiers

In [4]:
from sklearn.ensemble import RandomForestClassifier , ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [5]:
rand_clf = RandomForestClassifier(random_state=42)
extra_clf = ExtraTreesClassifier(random_state=42)
svm_clf = SVC(random_state=42)

In [6]:
rand_clf.fit(X_train , y_train)
extra_clf.fit(X_train , y_train)
svm_clf.fit(X_train , y_train)

SVC(random_state=42)

In [7]:
y_rand_pred = rand_clf.predict(X_val)
y_extra_pred = extra_clf.predict(X_val)
y_svm_pred = svm_clf.predict(X_val)

In [8]:
from sklearn.metrics import accuracy_score

print(rand_clf ,accuracy_score(y_rand_pred , y_val))
print(extra_clf , accuracy_score(y_extra_pred, y_val))
print(svm_clf , accuracy_score(y_svm_pred , y_val))

RandomForestClassifier(random_state=42) 0.9692
ExtraTreesClassifier(random_state=42) 0.9715
SVC(random_state=42) 0.9788


building ensemble voting classifier

In [9]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators= [('rf' , rand_clf) , ('ex' , extra_clf) , ('svm' , svm_clf)] ,
                             voting= 'hard')

voting_clf.fit(X_train , y_train)
y_voting_pred = voting_clf.predict(X_val)

print(voting_clf , accuracy_score(y_voting_pred , y_val))

VotingClassifier(estimators=[('rf', RandomForestClassifier(random_state=42)),
                             ('ex', ExtraTreesClassifier(random_state=42)),
                             ('svm', SVC(random_state=42))]) 0.9744


In [10]:
y_rand_pred_test = rand_clf.predict(X_test)
y_extra_pred_test = extra_clf.predict(X_test)
y_svm_pred_test = svm_clf.predict(X_test)
y_voting_pred_test = voting_clf.predict(X_test)

evaluating the classifiers - svm with the best accuracy, the voting classifier at second

In [11]:
print(rand_clf ,accuracy_score(y_rand_pred_test , y_test))
print(extra_clf , accuracy_score(y_extra_pred_test, y_test))
print(svm_clf , accuracy_score(y_svm_pred_test , y_test))
print(voting_clf , accuracy_score(y_voting_pred_test , y_test))

RandomForestClassifier(random_state=42) 0.9645
ExtraTreesClassifier(random_state=42) 0.9691
SVC(random_state=42) 0.976
VotingClassifier(estimators=[('rf', RandomForestClassifier(random_state=42)),
                             ('ex', ExtraTreesClassifier(random_state=42)),
                             ('svm', SVC(random_state=42))]) 0.9713


### Question 9 - building a blender

creating new training set using the predictions from the classifiers on the validation set in the last question as instances

In [12]:
new_training_set = np.array([y_rand_pred , y_extra_pred , y_svm_pred]).transpose()

fitting a classifier on the new training set

In [13]:
rand_blender_clf = RandomForestClassifier(random_state=42)
rand_blender_clf.fit(new_training_set , y_val)

RandomForestClassifier(random_state=42)

evaluating on the test set - svm first with the best accuracy, the blender third

In [14]:
new_test_set = np.array([y_rand_pred_test , y_extra_pred_test , y_svm_pred_test]).transpose()
y_blender_pred = rand_blender_clf.predict(new_test_set)

In [15]:
print(rand_clf ,accuracy_score(y_rand_pred_test , y_test))
print(extra_clf , accuracy_score(y_extra_pred_test, y_test))
print(svm_clf , accuracy_score(y_svm_pred_test , y_test))
print(voting_clf , accuracy_score(y_voting_pred_test , y_test))
print(rand_blender_clf , accuracy_score(y_blender_pred , y_test))

RandomForestClassifier(random_state=42) 0.9645
ExtraTreesClassifier(random_state=42) 0.9691
SVC(random_state=42) 0.976
VotingClassifier(estimators=[('rf', RandomForestClassifier(random_state=42)),
                             ('ex', ExtraTreesClassifier(random_state=42)),
                             ('svm', SVC(random_state=42))]) 0.9713
RandomForestClassifier(random_state=42) 0.9706
