## Chap 7 Ex

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl

In [2]:
from sklearn.model_selection import train_test_split


In [3]:
X = pd.read_feather('../data/mnist_784_X.feather')
y = pd.read_feather('../data/mnist_784_y.feather')
X = X.to_numpy()
y = y.to_numpy()

In [4]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X,y,
                                                            test_size=10000,
                                                            random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X,y,
                                                  test_size=10000,
                                                  random_state=42)


In [5]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [6]:
random_forest_clf = RandomForestClassifier(n_estimators=100,random_state=42)
extra_trees_clf = ExtraTreesClassifier(n_estimators=100,random_state=42)
svm_clf = LinearSVC(max_iter=100,tol=20,random_state=42)
mlp_clf = MLPClassifier(random_state=42)

In [7]:
estimators = [random_forest_clf, extra_trees_clf,svm_clf,mlp_clf]

for estimator in estimators:
    print(f"Training the {estimator}")
    estimator.fit(X_train,y_train)

Training the RandomForestClassifier(random_state=42)


  estimator.fit(X_train,y_train)


Training the ExtraTreesClassifier(random_state=42)


  estimator.fit(X_train,y_train)


Training the LinearSVC(max_iter=100, random_state=42, tol=20)


  y = column_or_1d(y, warn=True)


Training the MLPClassifier(random_state=42)


  y = column_or_1d(y, warn=True)


In [8]:
[estimator.score(X_val,y_val) for estimator in estimators]

[0.9674, 0.9682, 0.8416, 0.9596]

In [9]:
from sklearn.ensemble import VotingClassifier

In [10]:
named_estimators = [
                    ("random_forest_clf", random_forest_clf),
                    ("extra_trees_clf", extra_trees_clf),
                     ("svm_clf", svm_clf),
                    ("mlp_clf", mlp_clf)]


In [11]:
voting_clf = VotingClassifier(named_estimators)

In [12]:
voting_clf.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [None]:
voting_clf.score(X_val, y_val)

0.9676

In [None]:
[estimator.score(X_val,y_val) for estimator in voting_clf.estimators_]

[0.9674, 0.9682, 0.8416, 0.9596]

In [None]:
# Remove svm
voting_clf.set_params(svm_clf=None)


VotingClassifier(estimators=[('random_forest_clf',
                              RandomForestClassifier(random_state=42)),
                             ('extra_trees_clf',
                              ExtraTreesClassifier(random_state=42)),
                             ('svm_clf', None),
                             ('mlp_clf', MLPClassifier(random_state=42))])

In [None]:
voting_clf.estimators

[('random_forest_clf', RandomForestClassifier(random_state=42)),
 ('extra_trees_clf', ExtraTreesClassifier(random_state=42)),
 ('svm_clf', None),
 ('mlp_clf', MLPClassifier(random_state=42))]

In [None]:
del voting_clf.estimators_[2]

In [None]:
voting_clf.estimators

[('random_forest_clf', RandomForestClassifier(random_state=42)),
 ('extra_trees_clf', ExtraTreesClassifier(random_state=42)),
 ('svm_clf', None),
 ('mlp_clf', MLPClassifier(random_state=42))]

In [None]:
voting_clf.score(X_val, y_val)

0.971

In [None]:
voting_clf.voting="soft"
voting_clf.score(X_val,y_val)

0.9669

In [None]:
voting_clf.voting = "hard"
voting_clf.score(X_val, y_val)


0.971

In [None]:
[estimator.score(X_test, y_test) for estimator in voting_clf.estimators_]

[0.9674, 0.9682, 0.9596]

## 9 Stacking Ensemble  

In [None]:
X_val

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
len(X_val),len(estimators)

(10000, 4)

In [None]:
X_val_predictions = np.empty((len(X_val), len(estimators)),dtype=np.float32)
for index,estimator in enumerate(estimators):
    X_val_predictions[:,index] = estimator.predict(X_val)
X_val_predictions

array([[8., 8., 8., 8.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.],
       ...,
       [3., 3., 3., 3.],
       [8., 8., 5., 8.],
       [3., 3., 3., 3.]], dtype=float32)

In [None]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200,
                                            oob_score=True,
                                            random_state=42)
rnd_forest_blender.fit(X_val_predictions,y_val)

  rnd_forest_blender.fit(X_val_predictions,y_val)


RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)

In [None]:
rnd_forest_blender.oob_score_

0.9665

In [None]:
X_test_predictions = np.empty((len(X_test), len(estimators)), dtype=np.float32)

for index, estimator in enumerate(estimators):
    X_test_predictions[:, index] = estimator.predict(X_test)

In [None]:
y_pred = rnd_forest_blender.predict(X_test_predictions)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(y_test, y_pred)

0.9819