# Ensemble Learning and Random Forest

## Exercise 8

In [3]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC, LinearSVC
import numpy as np


try:
    mnist = fetch_openml('mnist_784', version=1, as_frame=False)
    mnist.target = mnist.target.astype(np.int64)
except:
    from sklearn.datasets import fetch_mldata
    mnist = fetch_mldata('MNIST original')
    


In [4]:
X_train_val, X_test, y_train_val, y_test = train_test_split(
    mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=10000, random_state=42)

In [5]:
random_forest_clf = RandomForestClassifier(n_estimators=10, random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=10, random_state=42)
svm_clf = LinearSVC(random_state=42)
#estimators vector

estimators = [random_forest_clf,extra_trees_clf,svm_clf]

In [6]:
#Train all estimators
for estimator in estimators:
    print('Training the ', estimator)
    estimator.fit(X_train, y_train)

Training the  RandomForestClassifier(n_estimators=10, random_state=42)
Training the  ExtraTreesClassifier(n_estimators=10, random_state=42)
Training the  LinearSVC(random_state=42)




In [7]:
#individual scores
[estimator.score(X_val, y_val) for estimator in estimators]

[0.9469, 0.9492, 0.8695]

In [8]:
#grouping all estimator inside a voting classifier
from sklearn.ensemble import VotingClassifier

name_estimators = [
    ('random_forest', random_forest_clf),
    ('extra_tree', extra_trees_clf),
    ('svm_clf', svm_clf)
]

voting_clf = VotingClassifier(
    estimators=name_estimators,
    voting='hard',
    n_jobs=-1
)
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('random_forest',
                              RandomForestClassifier(n_estimators=10,
                                                     random_state=42)),
                             ('extra_tree',
                              ExtraTreesClassifier(n_estimators=10,
                                                   random_state=42)),
                             ('svm_clf', LinearSVC(random_state=42))],
                 n_jobs=-1)

In [9]:
voting_clf.score(X_val, y_val)

0.9511

## Exercise 9

In [11]:
X_val_predicts = np.empty((len(X_val), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_val_predicts[:,index] = estimator.predict(X_val)
    
X_val_predicts

array([[5., 5., 5.],
       [8., 8., 8.],
       [2., 2., 2.],
       ...,
       [7., 7., 7.],
       [6., 6., 6.],
       [7., 7., 7.]], dtype=float32)

In [12]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_val_predicts,y_val)

RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)

In [13]:
rnd_forest_blender.oob_score_

0.951