# Ensemble Learning 

## Data Loading

In [1]:
from keras.datasets import mnist

(x, y), (x_test, y_test) = mnist.load_data()

x = x.reshape((x.shape[0], x.shape[1] * x.shape[2]))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1] * x_test.shape[2]))

Using TensorFlow backend.


In [2]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 10_000)

print(x_train.shape, y_train.shape)

(50000, 784) (50000,)


## Learning

In [3]:
from sklearn.metrics import classification_report

### Random Forest

In [4]:
%%time
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators = 20)
rf_model.fit(x_train, y_train)

rf_val_pred = rf_model.predict(x_val)
rf_val_proba = rf_model.predict_proba(x_val)

print(classification_report(y_val, rf_val_pred))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       939
           1       0.98      0.99      0.98      1119
           2       0.94      0.95      0.95       988
           3       0.95      0.93      0.94       998
           4       0.96      0.95      0.96      1010
           5       0.96      0.95      0.95       875
           6       0.96      0.98      0.97      1018
           7       0.97      0.96      0.97      1082
           8       0.94      0.93      0.94       974
           9       0.95      0.94      0.94       997

   micro avg       0.96      0.96      0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

Wall time: 11.6 s


### Extra Trees

In [5]:
%%time

from sklearn.ensemble import ExtraTreesClassifier

et_model = ExtraTreesClassifier(n_estimators = 20)
et_model.fit(x_train, y_train)

et_val_pred = et_model.predict(x_val)
et_val_proba = et_model.predict_proba(x_val)

print(classification_report(y_val, et_val_pred))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       939
           1       0.98      0.99      0.99      1119
           2       0.94      0.97      0.95       988
           3       0.96      0.94      0.95       998
           4       0.96      0.95      0.95      1010
           5       0.94      0.96      0.95       875
           6       0.97      0.97      0.97      1018
           7       0.98      0.96      0.97      1082
           8       0.96      0.93      0.94       974
           9       0.94      0.95      0.94       997

   micro avg       0.96      0.96      0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000

Wall time: 12 s


### Logistic Regression

In [6]:
%%time

from sklearn.linear_model import LogisticRegression

log_model = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=100, C=0.1)
log_model.fit(x_train, y_train)

log_val_pred = log_model.predict(x_val)
log_val_proba = log_model.predict_proba(x_val)

print(classification_report(y_val, log_val_pred))

              precision    recall  f1-score   support

           0       0.94      0.97      0.95       939
           1       0.95      0.97      0.96      1119
           2       0.90      0.90      0.90       988
           3       0.90      0.90      0.90       998
           4       0.92      0.92      0.92      1010
           5       0.88      0.86      0.87       875
           6       0.96      0.95      0.96      1018
           7       0.95      0.93      0.94      1082
           8       0.90      0.88      0.89       974
           9       0.89      0.90      0.89       997

   micro avg       0.92      0.92      0.92     10000
   macro avg       0.92      0.92      0.92     10000
weighted avg       0.92      0.92      0.92     10000

Wall time: 22.2 s




### Ensemble

In [7]:
# soft voting

val_proba = (rf_val_proba + et_val_proba) / 2

print('Soft Voting')
print(classification_report(y_val, val_proba.argmax(axis=1)))

Soft Voting
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       939
           1       0.98      0.99      0.99      1119
           2       0.94      0.97      0.96       988
           3       0.97      0.95      0.96       998
           4       0.97      0.95      0.96      1010
           5       0.96      0.97      0.96       875
           6       0.97      0.98      0.98      1018
           7       0.98      0.96      0.97      1082
           8       0.96      0.94      0.95       974
           9       0.94      0.95      0.95       997

   micro avg       0.97      0.97      0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

