# ResNet on Cifar10

In [1]:
%matplotlib inline
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cPickle
import sys
import os
import gzip

In [2]:
def log(log_file_path, string):
    '''
    Write one line of log into screen and file.
        log_file_path: Path of log file.
        string:        String to write in log file.
    '''
    with open(log_file_path, 'a+') as f:
        f.write(string + '\n')
        f.flush()
    print(string)

## Dataset functions

In [3]:
mnist_dir = '/scratch/f1fan/ResNet/data/'
train_image_f = 'train-images-idx3-ubyte.gz'
train_label_f = 'train-labels-idx1-ubyte.gz'
test_image_f = 't10k-images-idx3-ubyte.gz'
test_label_f = 't10k-labels-idx1-ubyte.gz'

In [4]:
def _read32(bytestream):
    dt = np.dtype(np.uint32).newbyteorder('>')
    return np.frombuffer(bytestream.read(4), dtype=dt)[0]

In [5]:
def load_train_data():
    img_f = os.path.join(mnist_dir, train_image_f)
    lbl_f = os.path.join(mnist_dir, train_label_f)
    
    with gzip.open(img_f) as img_bytestream, gzip.open(lbl_f) as lbl_bytestream:
        # Check magic number
        magic_img, magic_lbl = _read32(img_bytestream), _read32(lbl_bytestream)
        if magic_img != 2051 or magic_lbl != 2049:
            raise ValueError('Invalid magic number')
        
        # Read shape
        image_cnt, label_cnt = _read32(img_bytestream), _read32(lbl_bytestream)
        rows = _read32(img_bytestream)
        cols = _read32(img_bytestream)
        
        # Read label
        label_buf = lbl_bytestream.read(label_cnt)
        labels = np.frombuffer(label_buf, dtype=np.uint8)
        
        # Read image
        image_buf = img_bytestream.read(rows * cols * image_cnt)
        images = np.frombuffer(image_buf, dtype=np.uint8)
        images = images.reshape(image_cnt, rows, cols, 1)
        
        return images, labels


In [6]:
def load_test_data():
    img_f = os.path.join(mnist_dir, test_image_f)
    lbl_f = os.path.join(mnist_dir, test_label_f)
    
    with gzip.open(img_f) as img_bytestream, gzip.open(lbl_f) as lbl_bytestream:
        # Check magic number
        magic_img, magic_lbl = _read32(img_bytestream), _read32(lbl_bytestream)
        if magic_img != 2051 or magic_lbl != 2049:
            raise ValueError('Invalid magic number')
        
        # Read shape
        image_cnt, label_cnt = _read32(img_bytestream), _read32(lbl_bytestream)
        rows = _read32(img_bytestream)
        cols = _read32(img_bytestream)
        
        # Read label
        label_buf = lbl_bytestream.read(label_cnt)
        labels = np.frombuffer(label_buf, dtype=np.uint8)
        
        # Read image
        image_buf = img_bytestream.read(rows * cols * image_cnt)
        images = np.frombuffer(image_buf, dtype=np.uint8)
        images = images.reshape(image_cnt, rows, cols, 1)
        
        return images, labels

In [7]:
def get_per_pixel_mean(train_images, test_images):
    images = np.concatenate((train_images, test_images), axis=0)
    return np.mean(images, axis=0)

## Load data

In [8]:
log_file_path = os.path.join('/scratch/f1fan/ResNet', "log.txt")  
log(log_file_path,
    "Training stage 1: Load training and testing images...")
train_images, train_labels = load_train_data()
test_images, test_labels = load_test_data()

#train_images = train_images - pp_mean
#train_images = train_images / 128.0
#test_images = test_images - pp_mean
#test_images = test_images / 128.0

train_images = 1.0 * train_images / 255 * 2.0 - 1.0
test_images = 1.0 * test_images / 255 * 2.0 - 1.0

Training stage 1: Load training and testing images...


## Hyperparameter

In [9]:
num_units = 5
epoch = 40
batch_size = 100
iteration_per_epoch = train_images.shape[0] // batch_size
learning_rate = 0.1
decay_rate = 0.0002  

## Layer functions

In [10]:
def conv2d(input_layer, output_channels, filter_size, strides, scope):    
    with tf.variable_scope(scope):
        # Variable for filter.
        in_channels = input_layer.get_shape().as_list()[-1]
    
        conv_filter = tf.get_variable(
            name = 'filter', 
            shape = [filter_size, filter_size, in_channels, output_channels],
            initializer = tf.truncated_normal_initializer(stddev = 0.02),
            regularizer = tf.contrib.layers.l2_regularizer(scale = 0.0002)
        )
        # Do convolution.
        conv = tf.nn.conv2d(input_layer, 
                            conv_filter, 
                            strides = strides, 
                            padding = 'SAME')
        # Variable for bias.
        bias = tf.get_variable(name = 'bias', 
                               shape = [output_channels], 
                               initializer = tf.constant_initializer(0.0))
        # Add bias.
        conv = tf.reshape(tf.nn.bias_add(conv, bias), conv.get_shape())

        return conv

In [11]:
def lrelu(input_layer, leak=0.2):
    # Do leaky ReLU and return.
    #return tf.maximum(input_layer, leak * input_layer)
    return tf.nn.relu(input_layer)

In [12]:
def fully_connected(input_layer, output_dim, scope):
    shape = input_layer.get_shape().as_list()
    batch_size, input_dim = shape
    
    with tf.variable_scope(scope):
        # Variable of weight.
        weight = tf.get_variable(
            name = 'weight', 
            shape = [input_dim, output_dim], 
            dtype = tf.float32,
            initializer = tf.random_normal_initializer(stddev = 0.02),
            regularizer = tf.contrib.layers.l2_regularizer(0.0002)
        )
        
        # Variable of bias.
        bias = tf.get_variable(name = "bias", 
                               shape = [output_dim],
                               initializer = tf.constant_initializer(0.0))

        # Do multiplication and return.
        return tf.matmul(input_layer, weight) + bias

In [13]:
def batch_norm(input_layer, is_training, scope, reuse):
    return tf.contrib.layers.batch_norm(input_layer,
                                        decay = 0.9, 
                                        updates_collections = None,
                                        epsilon = 1e-5,
                                        scale = True,
                                        is_training = is_training,
                                        reuse = reuse,
                                        scope = scope)

In [14]:
def avg_pool(input_layer, strides, scope):
    with tf.variable_scope(scope):
        return tf.nn.avg_pool(input_layer, ksize=strides, strides=strides, padding='VALID')

In [15]:
def w_decay():
    cost = []
    for var in tf.trainable_variables():
        if var.op.name.find(r'filter') > 0:
            cost.append(tf.nn.l2_loss(var))
    return tf.mul(decay_rate, tf.add_n(cost))

In [16]:
def residual(input_layer, increase_dim, first, scope):
    in_channels = input_layer.get_shape().as_list()[-1]

    if increase_dim:
        out_channels = in_channels * 2
        strides = [1, 2, 2, 1]
    else:
        out_channels = in_channels
        strides = [1, 1, 1, 1]

    with tf.variable_scope(scope):
        h0 = input_layer if first else lrelu(batch_norm(input_layer, is_training=True, scope='h0_bn', reuse=False))
        
        h1_conv = conv2d(h0, out_channels, filter_size=3, strides=strides, scope='h1_conv')
        h1 = lrelu(batch_norm(h1_conv, is_training=True, scope='h1_bn', reuse=False))
        
        h2_conv = conv2d(h1, out_channels, filter_size=3, strides=[1, 1, 1, 1], scope='h2_conv')
        
        if increase_dim:
            l = avg_pool(input_layer, strides=[1, 2, 2, 1], scope='l_pool')
            l = tf.pad(l, [[0, 0], [0, 0], 
                           [0, 0], [in_channels // 2, in_channels // 2]])
        else:
            l = input_layer

        h2 = tf.add(h2_conv, l)

        return h2

## ResNet model

In [17]:
def resnet(images, num_units):
    with tf.variable_scope('ResNet', reuse=False):
        init_dim = 16
        batch_size = images.get_shape().as_list()[0]
        
        r0_conv = conv2d(images, init_dim, filter_size=3, strides=[1, 1, 1, 1], scope='r0_conv')
        r0 = lrelu(batch_norm(r0_conv, is_training=True, scope='r0_bn', reuse=False))
        
        r1_res = residual(r0, increase_dim=False, first=True, scope='res1.0')
        for k in xrange(1, num_units):
            r1_res = residual(r1_res, increase_dim=False, first=False, scope='res1.{0}'.format(k))

        r2_res = residual(r1_res, increase_dim=True, first=False, scope='res2.0')
        for k in xrange(1, num_units):
            r2_res = residual(r2_res, increase_dim=False, first=False, scope='res2.{0}'.format(k))

        r3_res = residual(r2_res, increase_dim=True, first=False, scope='res3.0')
        for k in xrange(1, num_units):
            r3_res = residual(r3_res, increase_dim=False, first=False, scope='res3.{0}'.format(k))

        r4 = lrelu(batch_norm(r3_res, is_training=True, scope='r4_bn', reuse=False))
        
        axis = [1, 2]
        r5 = tf.reduce_mean(r4, axis, name='global_pool')

        fc = fully_connected(tf.reshape(r5, [batch_size, -1]), output_dim=10, scope='fully_connected')
        return tf.nn.softmax(fc), fc

In [18]:
def build_network(batch_shape):
    # Get shape of single batch
    [batch_size, height, width, channels] = batch_shape
    
    # Placeholders
    images = tf.placeholder(dtype=tf.float32,
                            shape=batch_shape,
                            name='images')
    labels = tf.placeholder(dtype=tf.int32,
                            shape=[batch_size,],
                            name='labels')
    # Calculate losses
    probability, logits = resnet(images, num_units)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                   labels=labels)
    loss = tf.reduce_mean(loss)
    loss += w_decay()
    
    prediction = tf.equal(tf.cast(tf.argmax(probability, axis=1), tf.int32), labels)
    accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
    
    var = [x for x in tf.trainable_variables() if 'ResNet' in x.name]
    return [loss, var, accuracy, images, labels]

In [19]:
def get_optimizers(loss, var):
    with tf.variable_scope('optimizer'):
        global_step = tf.Variable(initial_value = 0, trainable = False)
        global_step_op = global_step.assign_add(1)
        boundaries = [2400, 4000, 8000]
        values = [0.1, 0.01, 0.001, 0.0002]
        lr = tf.train.piecewise_constant(global_step, boundaries, values)
        opt = tf.train.MomentumOptimizer(learning_rate=lr,
                                         momentum=0.9)
        optimizer = opt.minimize(loss=loss,
                                 var_list=var)
        return optimizer, global_step_op, global_step

In [None]:
def train(sess):
    log(log_file_path,
        "Training stage 2: Build network and initialize...")
    # Build network
    batch_shape = [batch_size, 28, 28, 1]
    r_loss, r_var, r_accuracy, images, labels = build_network(batch_shape)
    r_opt, global_step_op, global_step = get_optimizers(r_loss, r_var)
    
    # Show a list of global variables.
    global_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='')
    log(log_file_path, 'Global variables:')
    for i, var in enumerate(global_variables):
        log(log_file_path, "{0} {1}".format(i, var.name))
        
    # Initialize all variables
    all_initializer_op = tf.global_variables_initializer()
    sess.run(all_initializer_op)
    
    log(log_file_path, "Training stage 3: Epoch training...")
    for i in range(epoch):
        '''
        schedule_idx = 0
        schedule_epoch, schedule_lr = learning_rate_schedule[schedule_idx]
        if i == schedule_epoch:
            learning_rate = schedule_lr
            schedule_idx += 1
            print 'learning rate changed! current learning rate: ', learning_rate
        '''
            
        # Shuffle training set
        shuffle = np.random.permutation(train_images.shape[0])
        for j in range(iteration_per_epoch):
            # Get current batch of image
            batch_images = train_images[shuffle[j * batch_size : (j + 1) * batch_size]]
            batch_labels = train_labels[shuffle[j * batch_size : (j + 1) * batch_size]]
            
            #aug_batch_images = tf.map_fn(augment_train_image, batch_images).eval(session=sess)
            
            sess.run(r_opt, feed_dict = {images: batch_images, labels:batch_labels})
            batch_loss, batch_accuracy = sess.run([r_loss, r_accuracy], 
                                                  feed_dict = {images: batch_images, labels:batch_labels})
            if j % 50 == 0:
                log(log_file_path, "Training epoch {0}, iteration {1}, global_step {2}, batch_loss {3}, batch_accuracy {4}".format(
                    i, j, sess.run(global_step), batch_loss, batch_accuracy))
            sess.run(global_step_op)
        
        test_batch_count = test_images.shape[0] // batch_size
        test_loss = 0.0
        test_accuracy = 0.0
        for k in range(test_batch_count):
            # Get current batch of image
            batch_images = test_images[k * batch_size : (k + 1) * batch_size]
            batch_labels = test_labels[k * batch_size : (k + 1) * batch_size]
            
            #aug_batch_images = tf.map_fn(augment_test_image, batch_images).eval(session=sess)
            
            batch_loss, batch_accuracy = sess.run([r_loss, r_accuracy], 
                                                  feed_dict = {images: batch_images, labels:batch_labels})
            test_loss += batch_loss
            test_accuracy += batch_accuracy
            
        log(log_file_path, 'Testing epoch {0}, loss {1}, error {2}'.format(
                i, test_loss / test_batch_count, 1.0 - (test_accuracy / test_batch_count)))
            

In [None]:
config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
# Create computation graph.
graph = tf.Graph()
with graph.as_default():
    # Set GPU number and train.
    gpu_number = 0
    with tf.device("/gpu:{0}".format(gpu_number)):    
        # Training session.
        with tf.Session(config = config) as sess:
            train(sess)

Training stage 2: Build network and initialize...
Global variables:
0 ResNet/r0_conv/filter:0
1 ResNet/r0_conv/bias:0
2 ResNet/r0_bn/beta:0
3 ResNet/r0_bn/gamma:0
4 ResNet/r0_bn/moving_mean:0
5 ResNet/r0_bn/moving_variance:0
6 ResNet/res1.0/h1_conv/filter:0
7 ResNet/res1.0/h1_conv/bias:0
8 ResNet/res1.0/h1_bn/beta:0
9 ResNet/res1.0/h1_bn/gamma:0
10 ResNet/res1.0/h1_bn/moving_mean:0
11 ResNet/res1.0/h1_bn/moving_variance:0
12 ResNet/res1.0/h2_conv/filter:0
13 ResNet/res1.0/h2_conv/bias:0
14 ResNet/res1.1/h0_bn/beta:0
15 ResNet/res1.1/h0_bn/gamma:0
16 ResNet/res1.1/h0_bn/moving_mean:0
17 ResNet/res1.1/h0_bn/moving_variance:0
18 ResNet/res1.1/h1_conv/filter:0
19 ResNet/res1.1/h1_conv/bias:0
20 ResNet/res1.1/h1_bn/beta:0
21 ResNet/res1.1/h1_bn/gamma:0
22 ResNet/res1.1/h1_bn/moving_mean:0
23 ResNet/res1.1/h1_bn/moving_variance:0
24 ResNet/res1.1/h2_conv/filter:0
25 ResNet/res1.1/h2_conv/bias:0
26 ResNet/res1.2/h0_bn/beta:0
27 ResNet/res1.2/h0_bn/gamma:0
28 ResNet/res1.2/h0_bn/moving_mean:0
