In [1]:
import tensorflow as tf
from seblock import squeeze_and_excitation_block
from utils import conv2d_layer, max_pool, dense_layer

train, test = tf.keras.datasets.mnist.load_data()

In [2]:
X_train, Y_train = train
X_test, Y_test = test

with tf.Session() as sess:
    Y_train = tf.one_hot(Y_train, depth=10).eval()
    Y_test = tf.one_hot(Y_test, depth=10).eval()
    
    X_train = tf.reshape(X_train, [-1, 28, 28, 1]).eval()
    X_test = tf.reshape(X_test, [-1, 28, 28, 1]).eval()

## The Model

In [3]:
def model(X_train, Y_train, X_test, Y_test, lr=0.001, batch_size=32, num_epochs=5):
    tf.reset_default_graph()
    
    m, n_H, n_W, n_C_in = X_train.shape
    n_classes = Y_train.shape[1]
    num_minibatches_per_epoch = m // batch_size

    
    ### PLACEHOLDERS
    with tf.name_scope('placeholders') as nscope:
        X = tf.placeholder(tf.float32, shape=[None, n_H, n_W, n_C_in], name='X_placeholder')
        Y = tf.placeholder(tf.float32, shape=[None, n_classes], name='Y_placeholder')
    
    ### FORWARD PROPAGATION
    with tf.name_scope('forward-prop') as nscope:
        Z1 = conv2d_layer(X, n_filters=8, size=[5, 5], padding='SAME', name='conv2d-layer-1')
        P1 = max_pool(Z1, size=[2, 2], strides=[1, 2, 2, 1], name='max-pool-layer-1')

        S2 = squeeze_and_excitation_block(P1, out_dim=8, reduction_ratio=16, layer_name='SE-block-2')

        Z3 = conv2d_layer(S2, n_filters=16, size=[9, 9], padding='SAME', name='conv2d-layer-3')
        P3 = max_pool(Z3, size=[2, 2], strides=[1, 2, 2, 1], name='max-pool-layer-3')

        S4 = squeeze_and_excitation_block(P3, out_dim=16, reduction_ratio=16, layer_name='SE-block-4')
        
        F5 = tf.layers.flatten(S4, name='flatten-5')
        
        A6 = dense_layer(F5, units=100, activation=tf.nn.relu, name='dense-layer-6')
        Z = dense_layer(A6, units=10, activation=None, name='dense-layer-7')
    
    
    ### COMPUTE THE COST
    with tf.name_scope('cost') as nscope:
        cost = tf.reduce_mean(
            tf.losses.softmax_cross_entropy(logits=Z, onehot_labels=Y)
        )
    
    ### OPTIMIZER
    with tf.name_scope('optimizer') as nscope:
        train = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)
    
    ### INPUT SEQUENCE
    with tf.name_scope('train-dataset') as nscope:
        train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
        train_dataset = train_dataset.repeat(num_epochs).shuffle(m*num_epochs).batch(batch_size)

        trainset_iterator = train_dataset.make_one_shot_iterator()
        next_minibatch = trainset_iterator.get_next()
        
    # ACCURACY
    with tf.name_scope('accuracy') as nscope:
        # Calculate the correct predictions
        predict_op = tf.argmax(Z, 1)
        correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
        
    ### INITIALIZER
    init = tf.global_variables_initializer()
    
    ### RUNNING THE COMPUTATION GRAPH WITH `tf.Session`
    with tf.Session() as sess:
        sess.run(init)
        
        k = 0
        epoch = 0
        epoch_cost = 0.
        epoch_acc = 0.
            
        while True:
            try:
                X_mini, Y_mini = sess.run(next_minibatch)
                (_, tmp_cost, tmp_acc) = sess.run(
                    (train, cost, accuracy),
                    feed_dict={
                        X: X_mini, Y: Y_mini
                    }
                )
                    
                epoch_cost += tmp_cost/num_minibatches_per_epoch
                epoch_acc += tmp_acc/num_minibatches_per_epoch
                k+=1
                
            except tf.errors.OutOfRangeError:
                break
                    
                    
            if k % num_minibatches_per_epoch == 0:
                print(f'EPOCH {epoch}     |||     COST: {epoch_cost}     |||     ACCURACY: {epoch_acc}')                
                epoch += 1
                epoch_cost = 0.
                epoch_acc = 0.
        
        train_acc = accuracy.eval(feed_dict={X: X_train, Y: Y_train})
        test_acc = accuracy.eval(feed_dict={X: X_test, Y: Y_test})
        print()
        print(f'TRAIN ACCURACY: {train_acc}')
        print(f'TEST ACCURACY: {test_acc}')
        
    return

In [4]:
model(X_train, Y_train, X_test, Y_test, lr=0.001, batch_size=32, num_epochs=5)

EPOCH 0     |||     COST: 0.2623580956148599     |||     ACCURACY: 0.9400666666666864
EPOCH 1     |||     COST: 0.08667279607782852     |||     ACCURACY: 0.9753166666666966
EPOCH 2     |||     COST: 0.07273989441257726     |||     ACCURACY: 0.9793666666667015
EPOCH 3     |||     COST: 0.057125596673074784     |||     ACCURACY: 0.9833333333333708
EPOCH 4     |||     COST: 0.049462226644020696     |||     ACCURACY: 0.9856833333333724

TRAIN ACCURACY: 0.9886999726295471
TEST ACCURACY: 0.9843999743461609


In [5]:
writer = tf.summary.FileWriter('./boards/1')
writer.add_graph(tf.get_default_graph())
writer.flush()

In [6]:
!tensorboard --logdir ./boards/1

TensorBoard 1.12.2 at http://Victors-MacBook-Pro.local:6006 (Press CTRL+C to quit)
[33mW0210 12:32:55.687920 Reloader tf_logging.py:120] Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
[0mW0210 12:32:55.687920 123145356095488 tf_logging.py:120] Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
^C
