# MNIST CNN

In [1]:
import tensorflow as tf
import numpy as np
import os
from scipy.special import softmax

In [2]:
import tensorflow.keras.datasets.mnist as mnist

In [3]:
import vbranch

In [4]:
save = True
model_id = 5

## Load Data

In [5]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [6]:
num_classes = 10

In [7]:
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

y_train_one_hot = tf.keras.utils.to_categorical(y_train, num_classes)
y_test_one_hot = tf.keras.utils.to_categorical(y_test, num_classes)

In [8]:
X_train.shape

(60000, 28, 28, 1)

## Build Model

In [9]:
BATCH_SIZE = 32
EPOCHS = 10
STEPS_PER_EPOCH = 100
model_path = './models/mnist-cnn_' + str(model_id)

In [10]:
tf.reset_default_graph()

train_data = (X_train.astype('float32'), y_train_one_hot)
test_data = (X_test.astype('float32'), y_test_one_hot)

batch_size = tf.placeholder('int64', name='batch_size')

train_dataset = tf.data.Dataset.from_tensor_slices(train_data).\
    batch(batch_size).repeat().\
    shuffle(buffer_size=4*BATCH_SIZE)

test_dataset = tf.data.Dataset.from_tensor_slices(test_data).\
    batch(batch_size).repeat()

iter_ = tf.data.Iterator.from_structure(train_dataset.output_types, 
                                       train_dataset.output_shapes)
inputs, labels_one_hot = iter_.get_next()

train_init_op = iter_.make_initializer(train_dataset)
test_init_op = iter_.make_initializer(test_dataset, name='test_init_op')

In [11]:
with tf.variable_scope('model_' + str(model_id)):
    model = vbranch.models.simple_cnn(inputs, num_classes, 16, 32)

In [12]:
model.summary()

i   Layer name          Output shape        Parameters                    Num param 
------------------------------------------------------------------------------------
    Input               [None,28,28,1]                                              
------------------------------------------------------------------------------------
0   conv2d_1_1          [None,26,26,16]     (3,3,1,16) (16,)              160       
------------------------------------------------------------------------------------
1   bn_1_1              [None,26,26,16]     (16,) (16,)                   32        
------------------------------------------------------------------------------------
2   relu_1_1            [None,26,26,16]                                   0         
------------------------------------------------------------------------------------
3   conv2d_1_2          [None,24,24,16]     (3,3,16,16) (16,)             2320      
-----------------------------------------------------------------

In [13]:
tf.trainable_variables()

[<tf.Variable 'model_5/conv2d_1_1_f:0' shape=(3, 3, 1, 16) dtype=float32_ref>,
 <tf.Variable 'model_5/conv2d_1_1_b:0' shape=(16,) dtype=float32_ref>,
 <tf.Variable 'model_5/bn_1_1_scale:0' shape=(16,) dtype=float32_ref>,
 <tf.Variable 'model_5/bn_1_1_beta:0' shape=(16,) dtype=float32_ref>,
 <tf.Variable 'model_5/conv2d_1_2_f:0' shape=(3, 3, 16, 16) dtype=float32_ref>,
 <tf.Variable 'model_5/conv2d_1_2_b:0' shape=(16,) dtype=float32_ref>,
 <tf.Variable 'model_5/bn_1_2_scale:0' shape=(16,) dtype=float32_ref>,
 <tf.Variable 'model_5/bn_1_2_beta:0' shape=(16,) dtype=float32_ref>,
 <tf.Variable 'model_5/conv2d_2_1_f:0' shape=(3, 3, 16, 32) dtype=float32_ref>,
 <tf.Variable 'model_5/conv2d_2_1_b:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'model_5/bn_2_1_scale:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'model_5/bn_2_1_beta:0' shape=(32,) dtype=float32_ref>,
 <tf.Variable 'model_5/conv2d_2_2_f:0' shape=(3, 3, 32, 32) dtype=float32_ref>,
 <tf.Variable 'model_5/conv2d_2_2_b:0' shape=

In [14]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_one_hot, 
                                                                 logits=model.output), name='loss')
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

In [15]:
pred = tf.nn.softmax(model.output, name='pred')
pred_max = tf.one_hot(tf.argmax(pred, axis=-1), num_classes)
acc = tf.reduce_mean(tf.reduce_sum(labels_one_hot*pred_max, [1]), name='acc')

In [16]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for e in range(EPOCHS):
        print("Epoch {}/{}".format(e + 1, EPOCHS))
        progbar = tf.keras.utils.Progbar(STEPS_PER_EPOCH)
        
        sess.run(train_init_op, feed_dict={batch_size: BATCH_SIZE})

        for i in range(STEPS_PER_EPOCH):
            _, loss_value, acc_value = sess.run([train_op, loss, acc])
            
            if i == STEPS_PER_EPOCH - 1:
                sess.run(test_init_op, feed_dict={batch_size: len(X_test)})
                val_loss, val_acc = sess.run([loss, acc])
                progbar.update(i + 1, values=[("loss", loss_value), ("acc", acc_value), 
                                              ("val_loss", val_loss), ("val_acc", val_acc)])
            else:
                progbar.update(i + 1, values=[("loss", loss_value), ("acc", acc_value)])
    
    if save:
        saver = tf.train.Saver()
        path = os.path.join(model_path, 'ckpt')
        saver.save(sess, path)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Model Ensemble

In [17]:
test_outputs = []
test_losses = []
test_accs = []

num_models = 5
graphs = [tf.Graph() for _ in range(num_models)]
sessions = [tf.Session(graph=g) for g in graphs]

for i in range(len(graphs)):
    with graphs[i].as_default():
        model_path = './models/mnist-cnn_' + str(i + 1)
        meta_path = os.path.join(model_path, 'ckpt.meta')
        ckpt = tf.train.get_checkpoint_state(model_path)
        
        imported_graph = tf.train.import_meta_graph(meta_path)
        imported_graph.restore(sessions[i], ckpt.model_checkpoint_path)
                
        sessions[i].run('test_init_op', feed_dict={'batch_size:0': len(X_test)})
        
        output, loss, acc = sessions[i].run(['model_%d'%(i+1)+'/'+'output:0', 
                                             'loss:0', 'acc:0'])
        test_outputs.append(output)
        test_losses.append(loss)
        test_accs.append(acc)

INFO:tensorflow:Restoring parameters from ./models/mnist-cnn_1/ckpt
INFO:tensorflow:Restoring parameters from ./models/mnist-cnn_2/ckpt
INFO:tensorflow:Restoring parameters from ./models/mnist-cnn_3/ckpt
INFO:tensorflow:Restoring parameters from ./models/mnist-cnn_4/ckpt
INFO:tensorflow:Restoring parameters from ./models/mnist-cnn_5/ckpt


In [18]:
test_accs

[0.9686, 0.9704, 0.9643, 0.9701, 0.9683]

In [19]:
def compute_acc(pred, labels_one_hot):
    pred_max = tf.keras.utils.to_categorical(np.argmax(pred, axis=-1), num_classes)
    return np.mean(np.sum(labels_one_hot*pred_max, axis=1))

In [20]:
for i in range(num_models):
    assert compute_acc(softmax(test_outputs[i], axis=-1), y_test_one_hot) == test_accs[i]

### Average Predictions Before Softmax

In [21]:
before_mean_output = np.array(test_outputs).mean(axis=0)

In [22]:
before_mean_output.shape

(10000, 10)

In [23]:
before_mean_acc = compute_acc(softmax(before_mean_output, axis=-1), y_test_one_hot)

In [24]:
print(before_mean_acc)

0.9783


### Average Predictions After Softmax

In [25]:
after_mean_output = softmax(np.array(test_outputs), axis=-1).mean(axis=0)
after_mean_acc = compute_acc(after_mean_output, y_test_one_hot)

In [26]:
print(after_mean_acc)

0.9781
