In [1]:
import os
import time
from datetime import timedelta
from layers_tf import *
from utils import show_graph

X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()

In [2]:
# data dimension
img_size = 32
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
num_channels = 3
num_classes = 10

# configuration of cnn architecture
filter_size1, num_filters1 = 3, 16
filter_size2, num_filters2 = 3, 32
filter_size3, num_filters3 = 3, 64
filter_size4, num_filters4 = 3, 128
fc_size1, fc_size2 = 256, 256
print_every = 1000

In [3]:
# clear old variables
tf.reset_default_graph()

x_image = tf.placeholder(tf.float32, [None, 32, 32, 3], name='x_image')
y_true_cls = tf.placeholder(tf.int64, [None], name='y_true_cls')
phase = tf.placeholder(tf.bool, name='phase')

# add initializer
init = tf.contrib.layers.xavier_initializer()

# add regularizer
reg = tf.contrib.layers.l2_regularizer(scale=0.05)

# add layers
conv1 = conv_norm_relu_pool(x_image, num_filters1, filter_size1, 2, phase, init, reg, 'conv1')
conv2 = conv_norm_relu_pool(conv1,   num_filters2, filter_size2, 2, phase, init, reg, 'conv2')
conv3 = conv_norm_relu_pool(conv2,   num_filters3, filter_size3, 2, phase, init, reg, 'conv3')
conv4 = conv_norm_relu_pool(conv3,   num_filters4, filter_size4, 2, phase, init, reg, 'conv4')
flat = tf.contrib.layers.flatten(conv4, scope='flat')
fc1 = dense_norm_relu(flat, fc_size1, phase, init, reg, 'fc1')
fc2 = dense_norm_relu(fc1,  fc_size2, phase, init, reg, 'fc2')
logits = tf.layers.dense(fc2, num_classes, kernel_initializer=init, kernel_regularizer=reg, name='logits')

with tf.name_scope('loss'):
    # predicted classes
    y_pred = tf.nn.softmax(logits)  # one-hot encoding
    y_pred_cls = tf.argmax(y_pred, dimension=1) # as class number

    # cost function to be optimized
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf.one_hot(y_true_cls, 10))
    base_loss = tf.reduce_mean(cross_entropy)
    reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add(base_loss, reg_loss)

with tf.name_scope('accuracy'):
    # performance measure
    correct_prediction = tf.equal(y_pred_cls, y_true_cls)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [4]:
# implement learning_rate decay
global_step = tf.Variable(initial_value=0, trainable=False)
learning_rate_init = 1e-3
decay_steps = 1000
learning_rate = tf.train.exponential_decay(learning_rate_init, global_step, decay_steps, 0.95, staircase=True)

# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)

In [5]:
# plot computational graphics
show_graph(tf.get_default_graph().as_graph_def())

In [6]:
[v.name for v in tf.global_variables()]

['conv1/conv/kernel:0',
 'conv1/conv/bias:0',
 'conv1/bn/beta:0',
 'conv1/bn/gamma:0',
 'conv1/bn/moving_mean:0',
 'conv1/bn/moving_variance:0',
 'conv2/conv/kernel:0',
 'conv2/conv/bias:0',
 'conv2/bn/beta:0',
 'conv2/bn/gamma:0',
 'conv2/bn/moving_mean:0',
 'conv2/bn/moving_variance:0',
 'conv3/conv/kernel:0',
 'conv3/conv/bias:0',
 'conv3/bn/beta:0',
 'conv3/bn/gamma:0',
 'conv3/bn/moving_mean:0',
 'conv3/bn/moving_variance:0',
 'conv4/conv/kernel:0',
 'conv4/conv/bias:0',
 'conv4/bn/beta:0',
 'conv4/bn/gamma:0',
 'conv4/bn/moving_mean:0',
 'conv4/bn/moving_variance:0',
 'fc1/dense/kernel:0',
 'fc1/dense/bias:0',
 'fc1/bn/beta:0',
 'fc1/bn/gamma:0',
 'fc1/bn/moving_mean:0',
 'fc1/bn/moving_variance:0',
 'fc2/dense/kernel:0',
 'fc2/dense/bias:0',
 'fc2/bn/beta:0',
 'fc2/bn/gamma:0',
 'fc2/bn/moving_mean:0',
 'fc2/bn/moving_variance:0',
 'logits/kernel:0',
 'logits/bias:0',
 'Variable:0',
 'beta1_power:0',
 'beta2_power:0',
 'conv1/conv/kernel/Adam:0',
 'conv1/conv/kernel/Adam_1:0',
 

In [7]:
[v.name for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]

['conv1/conv/kernel:0',
 'conv1/conv/bias:0',
 'conv1/bn/beta:0',
 'conv1/bn/gamma:0',
 'conv2/conv/kernel:0',
 'conv2/conv/bias:0',
 'conv2/bn/beta:0',
 'conv2/bn/gamma:0',
 'conv3/conv/kernel:0',
 'conv3/conv/bias:0',
 'conv3/bn/beta:0',
 'conv3/bn/gamma:0',
 'conv4/conv/kernel:0',
 'conv4/conv/bias:0',
 'conv4/bn/beta:0',
 'conv4/bn/gamma:0',
 'fc1/dense/kernel:0',
 'fc1/dense/bias:0',
 'fc1/bn/beta:0',
 'fc1/bn/gamma:0',
 'fc2/dense/kernel:0',
 'fc2/dense/bias:0',
 'fc2/bn/beta:0',
 'fc2/bn/gamma:0',
 'logits/kernel:0',
 'logits/bias:0']

In [8]:
tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

[<tf.Tensor 'conv1/conv/kernel/Regularizer/l2_regularizer:0' shape=() dtype=float32>,
 <tf.Tensor 'conv2/conv/kernel/Regularizer/l2_regularizer:0' shape=() dtype=float32>,
 <tf.Tensor 'conv3/conv/kernel/Regularizer/l2_regularizer:0' shape=() dtype=float32>,
 <tf.Tensor 'conv4/conv/kernel/Regularizer/l2_regularizer:0' shape=() dtype=float32>,
 <tf.Tensor 'fc1/dense/kernel/Regularizer/l2_regularizer:0' shape=() dtype=float32>,
 <tf.Tensor 'fc2/dense/kernel/Regularizer/l2_regularizer:0' shape=() dtype=float32>,
 <tf.Tensor 'logits/kernel/Regularizer/l2_regularizer:0' shape=() dtype=float32>]