# Baseline MNIST FCN

In [27]:
import tensorflow as tf
import numpy as np
import os
from scipy.special import softmax

In [2]:
import tensorflow.keras.datasets.mnist as mnist

In [3]:
import vbranch

In [4]:
save = True
model_id = 1

## Load Data

In [5]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [6]:
input_dim = 784
num_classes = 10

In [7]:
X_train_flat = X_train.reshape([-1, input_dim])
X_test_flat = X_test.reshape([-1, input_dim])

y_train_one_hot = tf.keras.utils.to_categorical(y_train, num_classes)
y_test_one_hot = tf.keras.utils.to_categorical(y_test, num_classes)

## Build Model

In [8]:
BATCH_SIZE = 32
EPOCHS = 10
STEPS_PER_EPOCH = 100
model_path = './models/mnist_' + str(model_id)

In [9]:
tf.reset_default_graph()

train_data = (X_train_flat.astype('float32'), y_train_one_hot)
test_data = (X_test_flat.astype('float32'), y_test_one_hot)

batch_size = tf.placeholder('int64', name='batch_size')

train_dataset = tf.data.Dataset.from_tensor_slices(train_data).\
    batch(batch_size).repeat().\
    shuffle(buffer_size=4*BATCH_SIZE)

test_dataset = tf.data.Dataset.from_tensor_slices(test_data).\
    batch(batch_size).repeat()

iter_ = tf.data.Iterator.from_structure(train_dataset.output_types, 
                                       train_dataset.output_shapes)
inputs, labels_one_hot = iter_.get_next()

train_init_op = iter_.make_initializer(train_dataset)
test_init_op = iter_.make_initializer(test_dataset, name='test_init_op')

In [10]:
with tf.variable_scope('model_' + str(model_id)):
    model = vbranch.models.simple_fcn(inputs, 128, num_classes)

In [11]:
model.summary()

i   Layer name          Output shape        Parameters                    Num param 
------------------------------------------------------------------------------------
    Input               [None,784]                                                  
------------------------------------------------------------------------------------
0   fc1                 [None,128]          (784,128) (128,)              100480    
------------------------------------------------------------------------------------
1   bn1                 [None,128]          (128,) (128,)                 256       
------------------------------------------------------------------------------------
2   relu                [None,128]                                        0         
------------------------------------------------------------------------------------
3   fc2                 [None,10]           (128,10) (10,)                1290      
-----------------------------------------------------------------

In [12]:
tf.global_variables()

[<tf.Variable 'model_1/fc1_w:0' shape=(784, 128) dtype=float32_ref>,
 <tf.Variable 'model_1/fc1_b:0' shape=(128,) dtype=float32_ref>,
 <tf.Variable 'model_1/bn1_scale:0' shape=(128,) dtype=float32_ref>,
 <tf.Variable 'model_1/bn1_beta:0' shape=(128,) dtype=float32_ref>,
 <tf.Variable 'model_1/fc2_w:0' shape=(128, 10) dtype=float32_ref>,
 <tf.Variable 'model_1/fc2_b:0' shape=(10,) dtype=float32_ref>,
 <tf.Variable 'model_1/bn2_scale:0' shape=(10,) dtype=float32_ref>,
 <tf.Variable 'model_1/bn2_beta:0' shape=(10,) dtype=float32_ref>]

In [13]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_one_hot, 
                                                                 logits=model.output), name='loss')
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

In [14]:
pred = tf.nn.softmax(model.output, name='pred')
pred_max = tf.one_hot(tf.argmax(pred, axis=-1), num_classes)
acc = tf.reduce_mean(tf.reduce_sum(labels_one_hot*pred_max, [1]), name='acc')

In [15]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for e in range(EPOCHS):
        print("Epoch {}/{}".format(e + 1, EPOCHS))
        progbar = tf.keras.utils.Progbar(STEPS_PER_EPOCH)
        
        sess.run(train_init_op, feed_dict={batch_size: BATCH_SIZE})

        for i in range(STEPS_PER_EPOCH):
            _, loss_value, acc_value = sess.run([train_op, loss, acc])
            
            if i == STEPS_PER_EPOCH - 1:
                sess.run(test_init_op, feed_dict={batch_size: len(X_test_flat)})
                val_loss, val_acc = sess.run([loss, acc])
                progbar.update(i + 1, values=[("loss", loss_value), ("acc", acc_value), 
                                              ("val_loss", val_loss), ("val_acc", val_acc)])
            else:
                progbar.update(i + 1, values=[("loss", loss_value), ("acc", acc_value)])
    
    if save:
        saver = tf.train.Saver()
        path = os.path.join(model_path, 'ckpt')
        saver.save(sess, path)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Model Ensemble

In [22]:
test_outputs = []
test_losses = []
test_accs = []

num_models = 1
graphs = [tf.Graph() for _ in range(num_models)]
sessions = [tf.Session(graph=g) for g in graphs]

for i in range(len(graphs)):
    with graphs[i].as_default():
        model_path = './models/mnist_' + str(i + 1)
        meta_path = os.path.join(model_path, 'ckpt.meta')
        ckpt = tf.train.get_checkpoint_state(model_path)
        
        imported_graph = tf.train.import_meta_graph(meta_path)
        imported_graph.restore(sessions[i], ckpt.model_checkpoint_path)
                
        sessions[i].run('test_init_op', feed_dict={'batch_size:0': len(X_test_flat)})
        
        output, loss, acc = sessions[i].run(['model_%d'%(i+1)+'/'+'output:0', 
                                             'loss:0', 'acc:0'])
        test_outputs.append(output)
        test_losses.append(loss)
        test_accs.append(acc)

INFO:tensorflow:Restoring parameters from ./models/mnist_1/ckpt


In [23]:
test_acc

[0.9415]

In [24]:
def compute_acc(pred, labels_one_hot):
    pred_max = tf.keras.utils.to_categorical(np.argmax(pred, axis=-1), num_classes)
    return np.mean(np.sum(labels_one_hot*pred_max, axis=1))

In [25]:
for i in range(num_models):
    assert compute_acc(test_pred[i], y_test_one_hot) == test_acc[i]

### Average Predictions Before Softmax

In [28]:
before_mean_output = np.array(test_outputs).mean(axis=0)
before_mean_acc = compute_acc(softmax(before_mean_output, axis=-1), y_test_one_hot)

In [29]:
print(before_mean_acc)

0.9415


### Average Predictions After Softmax

In [30]:
after_mean_output = softmax(np.array(test_outputs), axis=-1).mean(axis=0)
after_mean_acc = compute_acc(softmax(after_mean_output, axis=-1), y_test_one_hot)

In [31]:
print(after_mean_acc)

0.9415
