In [1]:
import tensorflow as tf
import numpy as np
import math
import os
import skimage as ski
import skimage.io

from tensorflow.contrib.layers import xavier_initializer_conv2d as xavier_conv2d
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

tf.reset_default_graph()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


<h1> CONFIG FILE </h1>

In [2]:
config = {
    "max_epochs" : 20,
    "batch_size" : 50,
    "lr_policy" : 1e-4,
    "num_examples" : mnist.train.images.shape[0],
    "weight_decay" : 1e-3,
    "SAVE_DIR" : "output_mnist_conv/"
}

OUTPUT_SHAPE = 10

<h1> Utility methods </h1>

In [3]:
def evaluate(session, name, imgs, labels):
    print("\nRunning evaluation: ", name)
    batch_size = config['batch_size']
    num_examples = imgs.shape[0]
    assert num_examples % batch_size == 0
    num_batches = num_examples // batch_size
    cnt_correct = 0
    loss_avg = 0

    for i in range(num_batches):
        batch_x = imgs[i*batch_size:(i+1)*batch_size, :]
        batch_y = labels[i*batch_size:(i+1)*batch_size, :]
        
        learning_rate = float((num_batches - i)/(num_batches))
        if (learning_rate <= 0.0):
            learning_rate = 0.01
            
        data_dict = {x: batch_x, y: batch_y, weight_decay: config["weight_decay"],
                     is_training: False, lr: learning_rate}
        loss_val, predicted = session.run([loss, logits], feed_dict=data_dict)

        yp = np.argmax(predicted, 1)
        yt = np.argmax(batch_y, 1)
        cnt_correct += (yp == yt).sum()
        loss_avg += loss_val
        
    acc = (cnt_correct / num_examples * 100)
    loss_avg /= num_batches
    print(name + " accuracy = %.4f" % acc)
    print(name + " avg loss = %.4f\n" % loss_avg)

    
def draw_conv_filters(epoch, step, weights, save_dir):
  # kxkxCxn_filters
  k, k, C, num_filters = weights.shape

  w = weights.copy().swapaxes(0, 3).swapaxes(1,2)
  w = w.reshape(num_filters, C, k, k)
  w -= w.min()
  w /= w.max()

  border = 1
  cols = 8
  rows = math.ceil(num_filters / cols)
  width = cols * k + (cols-1) * border
  height = rows * k + (rows-1) * border

  for i in range(1):
    img = np.zeros([height, width])
    for j in range(num_filters):
      r = int(j / cols) * (k + border)
      c = int(j % cols) * (k + border)
      img[r:r+k,c:c+k] = w[j,i]
    filename = 'epoch_%02d_step_%06d_input_%03d.png' % (epoch, step, i)
    ski.io.imsave(os.path.join(save_dir, filename), img)
    
    
def batch_norm(inputs, is_training, dims, decay=0.999, epsilon=1e-3):

        scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
        print("Scale: ", scale.get_shape())
        beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
        print("Beta: ",beta.get_shape())
        pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
        print("Pop mean: ",pop_mean.get_shape())
        pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
        print("Pop var: ",pop_var.get_shape())

        def if_true():
            batch_mean, batch_var = tf.nn.moments(inputs, dims)
            print("Batch mean/var", batch_mean.get_shape(), batch_var.get_shape())
            train_mean = tf.assign(pop_mean,
                                   pop_mean * decay + batch_mean * (1 - decay))
            train_var = tf.assign(pop_var,
                                  pop_var * decay + batch_var * (1 - decay))
            with tf.control_dependencies([train_mean, train_var]):
                return tf.nn.batch_normalization(inputs,
                                                 batch_mean, batch_var, beta, scale, epsilon)

        def if_false():
            return tf.nn.batch_normalization(inputs,
                                             pop_mean, pop_var, beta, scale, epsilon)

        result = tf.cond(is_training, if_true, if_false)
        return result

    
def conv_2d(tensor, filters, biases, is_training=None, dims=None,strides=1, activation=tf.nn.relu, padding='SAME'):
    h1 = tf.nn.conv2d(tensor, filters, strides=[1, strides, strides, 1], padding=padding)
    h1 = tf.nn.bias_add(h1, biases)
    return activation(batch_norm(h1, is_training, dims))


def max_pool_2d(tensor, k_size=2, stride=2, padding='SAME'):
    return tf.nn.max_pool(tensor, ksize=[1, k_size, k_size, 1], strides=[1, stride, stride, 1], padding=padding)


def dropout(tensor, use_dropout, rate=0.5):
    return tf.layers.dropout(tensor, rate=rate, training=use_dropout)


# FC layer
def dense(tensor, filters, biases, is_training=None, dims=None, activation=None):
    tensor = tf.reshape(tensor, [-1, filters.get_shape().as_list()[0]])
    res = tf.matmul(tensor, filters) + biases
    if activation:
        return activation(batch_norm(res, is_training, dims))
    return res

<h1> Weights & Biases </h1>

In [4]:
weights = {
    'conv1': tf.get_variable('w_conv1', [5, 5, 1, 16], initializer=xavier_conv2d()),
    'conv2': tf.get_variable('w_conv2', [5, 5, 16, 32], initializer=xavier_conv2d()),

    'fc3': tf.get_variable('w_fc3', [7 * 7 * 32, 512], initializer=xavier_conv2d()),
    'fc4': tf.get_variable('w_fc4', [512, OUTPUT_SHAPE], initializer=xavier_conv2d())
}

biases = {
    'conv1': tf.Variable(tf.zeros([16]), name='b_conv1'),
    'conv2': tf.Variable(tf.zeros([32]), name='b_conv2'),
    'fc3': tf.Variable(tf.zeros([512]), name='b_fc3'),
    'fc4': tf.Variable(tf.zeros([OUTPUT_SHAPE]), name='b_fc4')
}

<h1> LAYERS </h1>

In [5]:
x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
y = tf.placeholder(tf.float32, shape=[None, OUTPUT_SHAPE])
weight_decay = tf.placeholder(tf.float32)
is_training = tf.placeholder(tf.bool)

#################### 1ST LAYER ####################
input_x = tf.reshape(x, [-1, 28, 28, 1])

# h1 is [batch_size, 28, 28, 16]
h1 = conv_2d(input_x, weights["conv1"], biases["conv1"], is_training=is_training, dims=[0, 1, 2])
print("H1:", h1.get_shape())

# max pool convolved layer [batch_size, 14, 14, 16]
h1_pooled = max_pool_2d(h1, k_size=2)
print("H1 pooled:", h1_pooled.get_shape())

#################### 2ND LAYER ####################
# h2 is [batch_size, 14, 14, 32]
h2 = conv_2d(h1_pooled, weights["conv2"], biases["conv2"], is_training=is_training, dims=[0, 1, 2])
print("H2:", h2.get_shape())

# max pool convolved layer [batch_size, 7, 7, 32]
h2_pooled = max_pool_2d(h2, k_size=2)
print("H2 pooled:", h2_pooled.get_shape())

#################### FC LAYER ####################
fc1 = dense(h2_pooled, weights['fc3'],  biases['fc3'], is_training=is_training, dims=[0, 1], activation=tf.nn.relu)
print("FC1:", fc1.get_shape())

#################### FC LAYER ####################
# logits is [batch_size, 10]
logits = dense(fc1, weights['fc4'],  biases['fc4'], is_training=is_training, activation=None)
print("Logits:", logits.get_shape())

Scale:  (16,)
Beta:  (16,)
Pop mean:  (16,)
Pop var:  (16,)
Batch mean/var (16,) (16,)
H1: (?, 28, 28, 16)
H1 pooled: (?, 14, 14, 16)
Scale:  (32,)
Beta:  (32,)
Pop mean:  (32,)
Pop var:  (32,)
Batch mean/var (32,) (32,)
H2: (?, 14, 14, 32)
H2 pooled: (?, 7, 7, 32)
Scale:  (512,)
Beta:  (512,)
Pop mean:  (512,)
Pop var:  (512,)
Batch mean/var () ()
FC1: (?, 512)
Logits: (?, 10)


<h1> Loss & Train step </h1>

In [6]:
regularizers = 0
for w in weights.values():
    regularizers += tf.nn.l2_loss(w)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))

loss = loss + weight_decay*regularizers

lr = tf.placeholder(tf.float32)
train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss)

<h1> MAIN CODE </h1>

In [7]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    max_epochs = config["max_epochs"]
    batch_size = config["batch_size"]
    lr_policy = config["lr_policy"]
    num_examples = mnist.train.images.shape[0]
    num_batches = num_examples // batch_size

    train_x = mnist.train.images
    train_x = train_x.reshape([-1, 28, 28, 1])
    train_y = mnist.train.labels

    valid_x = mnist.validation.images
    valid_x = valid_x.reshape([-1, 28, 28, 1])
    valid_y = mnist.validation.labels

    test_x = mnist.test.images
    test_x = test_x.reshape([-1, 28, 28, 1])
    test_y = mnist.test.labels

    train_mean = train_x.mean()
    train_x -= train_mean
    valid_x -= train_mean
    test_x -= train_mean

    for epoch in range(1, max_epochs + 1):
        cnt_correct = 0

        permutation_idx = np.random.permutation(num_examples)
        train_x = train_x[permutation_idx]
        train_y = train_y[permutation_idx]

        for i in range(num_batches):
            # store mini-batch to ndarray
            batch_x = train_x[i * batch_size:(i + 1) * batch_size, :]
            batch_y = train_y[i * batch_size:(i + 1) * batch_size, :]

            learning_rate = float((num_batches - i - 100)/(num_batches))
            if (learning_rate <= 0.05):
                learning_rate = 0.001
                            
            data_dict = {x: batch_x, y: batch_y, lr: learning_rate,
                         weight_decay: config["weight_decay"], is_training: True}
            loss_val, predicted, _ = sess.run([loss, logits, train_step], feed_dict=data_dict)
            
            yp = np.argmax(predicted, 1)
            yt = np.argmax(batch_y, 1)
            cnt_correct += (yp == yt).sum()
            acc = (cnt_correct / ((i+1)*batch_size) * 100)

            if (i+1) % 100 == 0:
                w = sess.run(weights['conv1'])
                print("epoch %d, step %d/%d, batch loss = %.4f, train acc = %.4f, lr=%.3f" %
                      (epoch, i+1, num_batches, loss_val, acc, learning_rate))
                
                draw_conv_filters(epoch, i * batch_size, w, config["SAVE_DIR"])

        evaluate(sess, "Validation", valid_x, valid_y)
        
    # TESTING
    evaluate(sess, "Test", test_x, test_y)

epoch 1, step 100/1100, batch loss = 0.8433, train acc = 63.1400, lr=0.819


  .format(dtypeobj_in, dtypeobj_out))


epoch 1, step 200/1100, batch loss = 0.6799, train acc = 77.8600, lr=0.728
epoch 1, step 300/1100, batch loss = 0.5857, train acc = 83.8200, lr=0.637
epoch 1, step 400/1100, batch loss = 0.5253, train acc = 86.9200, lr=0.546
epoch 1, step 500/1100, batch loss = 0.4330, train acc = 88.9280, lr=0.455
epoch 1, step 600/1100, batch loss = 0.4020, train acc = 90.2833, lr=0.365
epoch 1, step 700/1100, batch loss = 0.3633, train acc = 91.3543, lr=0.274
epoch 1, step 800/1100, batch loss = 0.3831, train acc = 92.1800, lr=0.183
epoch 1, step 900/1100, batch loss = 0.3313, train acc = 92.8578, lr=0.092
epoch 1, step 1000/1100, batch loss = 0.3890, train acc = 93.4080, lr=0.001
epoch 1, step 1100/1100, batch loss = 0.3572, train acc = 93.8364, lr=0.001

Running evaluation:  Validation
Validation accuracy = 97.0000
Validation avg loss = 0.5987

epoch 2, step 100/1100, batch loss = 0.4701, train acc = 91.4800, lr=0.819
epoch 2, step 200/1100, batch loss = 0.3334, train acc = 94.0100, lr=0.728
epoch

epoch 10, step 100/1100, batch loss = 0.1749, train acc = 93.5400, lr=0.819
epoch 10, step 200/1100, batch loss = 0.1450, train acc = 95.7000, lr=0.728
epoch 10, step 300/1100, batch loss = 0.1123, train acc = 96.4933, lr=0.637
epoch 10, step 400/1100, batch loss = 0.1418, train acc = 96.9750, lr=0.546
epoch 10, step 500/1100, batch loss = 0.0953, train acc = 97.2840, lr=0.455
epoch 10, step 600/1100, batch loss = 0.1197, train acc = 97.5533, lr=0.365
epoch 10, step 700/1100, batch loss = 0.2062, train acc = 97.7371, lr=0.274
epoch 10, step 800/1100, batch loss = 0.0789, train acc = 97.9525, lr=0.183
epoch 10, step 900/1100, batch loss = 0.0805, train acc = 98.0844, lr=0.092
epoch 10, step 1000/1100, batch loss = 0.1011, train acc = 98.2060, lr=0.001
epoch 10, step 1100/1100, batch loss = 0.1529, train acc = 98.3091, lr=0.001

Running evaluation:  Validation
Validation accuracy = 98.9600
Validation avg loss = 0.1170

epoch 11, step 100/1100, batch loss = 22.7695, train acc = 32.9400, l

epoch 18, step 1100/1100, batch loss = 0.2750, train acc = 95.7018, lr=0.001

Running evaluation:  Validation
Validation accuracy = 93.2400
Validation avg loss = 0.4655

epoch 19, step 100/1100, batch loss = 0.5935, train acc = 82.9200, lr=0.819
epoch 19, step 200/1100, batch loss = 0.2318, train acc = 89.1600, lr=0.728
epoch 19, step 300/1100, batch loss = 0.3390, train acc = 91.5533, lr=0.637
epoch 19, step 400/1100, batch loss = 0.2483, train acc = 92.9550, lr=0.546
epoch 19, step 500/1100, batch loss = 0.2181, train acc = 93.6600, lr=0.455
epoch 19, step 600/1100, batch loss = 0.1599, train acc = 94.2600, lr=0.365
epoch 19, step 700/1100, batch loss = 0.1689, train acc = 94.8086, lr=0.274
epoch 19, step 800/1100, batch loss = 0.1550, train acc = 95.2075, lr=0.183
epoch 19, step 900/1100, batch loss = 0.1771, train acc = 95.5689, lr=0.092
epoch 19, step 1000/1100, batch loss = 0.1326, train acc = 95.8540, lr=0.001
epoch 19, step 1100/1100, batch loss = 0.1386, train acc = 96.0509, l