# Ensembling models
Importing data

In [28]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist
_, (validation_data, validation_labels) = mnist.load_data()
validation_data = validation_data / 255
validation_data = validation_data.reshape(-1, 28, 28, 1)

Importing trained models

In [99]:
model_V4_3 = tf.keras.models.load_model("CNN_MNIST_V4_3.h5")
model_V4_5= tf.keras.models.load_model("CNN_MNIST_V4_5.h5")
model_V4_7 = tf.keras.models.load_model("CNN_MNIST_V4_7.h5")

In [100]:
print("Model V4_3 Accuracy")
model_V4_3.evaluate(validation_data, validation_labels)
print("Model V4_5 Accuracy")
model_V4_5.evaluate(validation_data, validation_labels)
print("Model V4_7 Accuracy")
model_V4_7.evaluate(validation_data, validation_labels)

Model V4_3 Accuracy
Model V4_5 Accuracy
Model V4_7 Accuracy


[0.013782843016088009, 0.9965000152587891]

Generating prediction tables for all models

In [101]:
model_V4_3_predict = model_V4_3.predict(validation_data)
model_V4_5_predict = model_V4_5.predict(validation_data)
model_V4_7_predict = model_V4_7.predict(validation_data)

Creating ensemble and computing ensemble accuracy. Note the methodology used to compute the accuracy is to take the majority agreement, but if there is a three-way-tie then take the prediction for which all models are collectively the most confident in by summing confidence.

In [102]:
import numpy as np
num_examples = len(validation_data)
count = 0
incorrect = []
for i in range(num_examples):
    predict_3, predict_5, predict_7 = np.argmax(model_V4_3_predict[i]), np.argmax(model_V4_5_predict[i]), np.argmax(model_V4_7_predict[i])
    ensemble_predict = 0
    if predict_3 == predict_5 or predict_3 == predict_7:
        ensemble_predict = predict_3
    elif predict_5 == predict_7:
        ensemble_predict = predict_5
    else:
        ensemble_predict = np.argmax(model_V4_3_predict[i] + model_V4_5_predict[i] + model_V4_7_predict[i])

    if ensemble_predict == validation_labels[i]:
        count += 1
    else:
        incorrect.append(i)
print(count / num_examples)

0.9973


Printing examples that ensemble incorrectly predicted

In [103]:
print(incorrect)

[193, 247, 445, 447, 582, 659, 1112, 1393, 1737, 1901, 2040, 2130, 2462, 2771, 3073, 3422, 4201, 4443, 4699, 4740, 4761, 6569, 6576, 6625, 8279, 8527, 9729]


In [104]:
for i in incorrect:
    print(f"Example = {i},\tactual = {validation_labels[i]}, " +
          f"model_4_3 = {np.argmax(model_V4_3.predict(validation_data[i][np.newaxis]))}, " +
          f"model_4_5 = {np.argmax(model_V4_5.predict(validation_data[i][np.newaxis]))}, " +
          f"model_4_7 = {np.argmax(model_V4_7.predict(validation_data[i][np.newaxis]))}")

Example = 193,	actual = 9, model_4_3 = 8, model_4_5 = 8, model_4_7 = 8
Example = 247,	actual = 4, model_4_3 = 4, model_4_5 = 2, model_4_7 = 2
Example = 445,	actual = 6, model_4_3 = 6, model_4_5 = 0, model_4_7 = 0
Example = 447,	actual = 4, model_4_3 = 9, model_4_5 = 9, model_4_7 = 4
Example = 582,	actual = 8, model_4_3 = 8, model_4_5 = 2, model_4_7 = 2
Example = 659,	actual = 2, model_4_3 = 2, model_4_5 = 7, model_4_7 = 7
Example = 1112,	actual = 4, model_4_3 = 6, model_4_5 = 6, model_4_7 = 6
Example = 1393,	actual = 5, model_4_3 = 7, model_4_5 = 7, model_4_7 = 7
Example = 1737,	actual = 5, model_4_3 = 5, model_4_5 = 3, model_4_7 = 3
Example = 1901,	actual = 9, model_4_3 = 4, model_4_5 = 4, model_4_7 = 4
Example = 2040,	actual = 5, model_4_3 = 6, model_4_5 = 6, model_4_7 = 6
Example = 2130,	actual = 4, model_4_3 = 9, model_4_5 = 9, model_4_7 = 9
Example = 2462,	actual = 2, model_4_3 = 2, model_4_5 = 0, model_4_7 = 0
Example = 2771,	actual = 4, model_4_3 = 9, model_4_5 = 9, model_4_7 = 