In [1]:
import numpy as np
import tensorflow as tf
# import _pickle as cPickle
import pickle
import os.path as osp

In [2]:


def unpickle(file):
    fo = open(file, 'rb')

    u = pickle._Unpickler(fo)
    u.encoding = 'latin1'
    dict = u.load()
    fo.close()
    if 'data' in dict:
        dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.

    return dict



def load_data_one(f):
    batch = unpickle(f)
    data = batch['data']
    labels = batch['labels']
    print("Loading %s: %d" % (f, len(data)))
    return data, labels

def load_data(files, data_dir, label_count):
    data, labels = load_data_one(data_dir + '/' + files[0])
    for f in files[1:]:
        data_n, labels_n = load_data_one(data_dir + '/' + f)
        data = np.append(data, data_n, axis=0)
        labels = np.append(labels, labels_n, axis=0)
    labels = np.array([ [ float(i == label) for i in range(label_count) ] for label in labels ])
    return data, labels

data_dir = "/home/tom/datasets/cifar-10-batches-py"
image_size = 32
image_dim = image_size * image_size * 3
meta = unpickle(data_dir + '/batches.meta')
label_names = meta['label_names']
label_count = len(label_names)

train_files = [ 'data_batch_%d' % d for d in range(1, 6) ]
train_data, train_labels = load_data(train_files, data_dir, label_count)
pi = np.random.permutation(len(train_data))
train_data, train_labels = train_data[pi], train_labels[pi]
test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count)



Loading /home/tom/datasets/cifar-10-batches-py/data_batch_1: 10000
Loading /home/tom/datasets/cifar-10-batches-py/data_batch_2: 10000
Loading /home/tom/datasets/cifar-10-batches-py/data_batch_3: 10000
Loading /home/tom/datasets/cifar-10-batches-py/data_batch_4: 10000
Loading /home/tom/datasets/cifar-10-batches-py/data_batch_5: 10000
Loading /home/tom/datasets/cifar-10-batches-py/test_batch: 10000


In [3]:
data = { 'train_data': train_data,
        'train_labels': train_labels,
        'test_data': test_data,
        'test_labels': test_labels }

batch_size = 64
learning_rate = 0.1
train_data, train_labels = data['train_data'], data['train_labels']
batch_count = int(len(train_data) / batch_size)
batches_data = np.split(train_data[:int(batch_count * batch_size)], batch_count)
batches_labels = np.split(train_labels[:int(batch_count * batch_size)], batch_count)

In [4]:
print(learning_rate)
print("Train:", np.shape(train_data), np.shape(train_labels))
print("Test:", np.shape(test_data), np.shape(test_labels))

0.1
Train: (50000, 3072) (50000, 10)
Test: (10000, 3072) (10000, 10)


In [None]:
def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):
    res = [ 0 ] * len(tensors)
    batch_tensors = [ (placeholder, feed_dict[ placeholder ]) for placeholder in batch_placeholders ]
    total_size = len(batch_tensors[0][1])
    batch_count = int((total_size + batch_size - 1) / batch_size)
    for batch_idx in range(batch_count):
        current_batch_size = None
        for (placeholder, tensor) in batch_tensors:
            batch_tensor = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]
            current_batch_size = len(batch_tensor)
            feed_dict[placeholder] = tensor[ batch_idx*batch_size : (batch_idx+1)*batch_size ]
        tmp = session.run(tensors, feed_dict=feed_dict)
        res = [ r + t * current_batch_size for (r, t) in zip(res, tmp) ]
    return [ r / float(total_size) for r in res ]

def variable_summaries(var):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)
    with tf.name_scope('stddev'):
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar('stddev', stddev)
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var)

def weight_variable(shape):
    with tf.name_scope('weights'):
        initial = tf.truncated_normal(shape, stddev=0.01)
        var = tf.Variable(initial)
#         variable_summaries(var)
    return var

def bias_variable(shape):
    with tf.name_scope('biases'):
        initial = tf.constant(0.01, shape=shape)
        var =  tf.Variable(initial)
#         variable_summaries(var)
    return var
    
def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
    W = weight_variable([ kernel_size, kernel_size, in_features, out_features ])
    conv = tf.nn.conv2d(input, W, [ 1, 1, 1, 1 ], padding='SAME')
    if with_bias:
        return conv + bias_variable([ out_features ])
    return conv

def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
    current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
    current = tf.nn.relu(current)
    current = conv2d(current, in_features, out_features, kernel_size)
    current = tf.nn.dropout(current, keep_prob)
    return current

def block(input, layers, in_features, growth, is_training, keep_prob):
    current = input
    features = in_features
    for idx in range(layers):
        tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
        current = tf.concat((current, tmp), 3)
        features += growth
    return current, features

def avg_pool(input, s):
    return tf.nn.avg_pool(input, [ 1, s, s, 1 ], [1, s, s, 1 ], 'VALID')

depth = 40
weight_decay = 1e-4
layers = int((depth - 4) / 3)
graph = tf.Graph()

print("Creating graph...")
with graph.as_default():
    xs = tf.placeholder("float", shape=[None, image_dim])
    ys = tf.placeholder("float", shape=[None, label_count])
    lr = tf.placeholder("float", shape=[])
    keep_prob = tf.placeholder(tf.float32)
    is_training = tf.placeholder("bool", shape=[])


    current = tf.reshape(xs, [ -1, 32, 32, 3 ])
    current = conv2d(current, 3, 16, 3)

    current, features = block(current, layers, 16, 12, is_training, keep_prob)
    current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
    current = avg_pool(current, 2)
    current, features = block(current, layers, features, 12, is_training, keep_prob)
    current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
    current = avg_pool(current, 2)
    current, features = block(current, layers, features, 12, is_training, keep_prob)

    current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
    current = tf.nn.relu(current)
    current = avg_pool(current, 8)
    final_dim = features
    current = tf.reshape(current, [ -1, final_dim ])
    Wfc = weight_variable([ final_dim, label_count ])
    bfc = bias_variable([ label_count ])
    ys_ = tf.nn.softmax( tf.matmul(current, Wfc) + bfc )

    cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
    l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
    loss = cross_entropy + l2 * weight_decay

    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(loss, global_step=global_step)
    correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    
    # Tensorboard
    with tf.name_scope('losses'):
        tf.summary.scalar("cross_entropy", cross_entropy)
        tf.summary.scalar("accuracy", accuracy)
        tf.summary.scalar("l2", l2)
        tf.summary.scalar("loss", loss)
    summary_op =  tf.summary.merge_all()
    

print("Built graph")

run_name = "vanilla-nosum"
logs_path = osp.expanduser('~/tb/densenet_laurent/%s' % run_name)
summary_writer = tf.summary.FileWriter(logs_path, graph)

with tf.Session(graph=graph) as session:
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    print("Batch per epoch: ", batch_count)    
    for epoch in range(1, 1+300):
        if epoch == 150: learning_rate = 0.01
        if epoch == 225: learning_rate = 0.001
        for batch_idx in range(batch_count):
            xs_, ys_ = batches_data[batch_idx], batches_labels[batch_idx]
            _, train_acc, summary, step = session.run([train_step, accuracy, summary_op, global_step],
                feed_dict = { xs: xs_,
                             ys: ys_,
                             lr: learning_rate,
                             is_training: True,
                             keep_prob: 0.8 })
            summary_writer.add_summary(summary, step)
            
            if batch_idx % 100 == 0:
                print(epoch, batch_idx, "acc: %.2f" % (train_acc))
                
        save_path = saver.save(session, '/tmp/densenet_%d.ckpt' % epoch)
        print("Saved checkpoint to %s" % save_path)

        test_acc = run_in_batch_avg(session, [accuracy ], [ xs, ys ],
                feed_dict = { xs: data['test_data'],
                             ys: data['test_labels'],
                             is_training: False,
                             keep_prob: 1. })

        print("test_results: %s" % str(test_acc))


Creating graph...
Built graph
Batch per epoch:  781
1 0 acc: 0.11
